R tutorial

R Markdown

R tutorial document

#install.packages("ggpubr")

x = 3
print(x)

## [1] 3

name = "Pranali"
age = 27

cat("My name is", name, "and my age is", age)

## My name is Pranali and my age is 27

# Create a vector.
apple = c("red", "green", "yellow")
print(apple)

## [1] "red"    "green"  "yellow"

#Length of the vector
length(apple)

## [1] 3

#Access values from a vector
print(apple[3])

## [1] "yellow"

# Get the class of the vector.
print(class(apple))

## [1] "character"

# Create a list.
list1 = list(c(2, 5, 3), 21.3, sin)
# Print the list.
print(list1)

## [[1]]
## [1] 2 5 3
## 
## [[2]]
## [1] 21.3
## 
## [[3]]
## function (x)  .Primitive("sin")

# Create a matrix.
M = matrix(c("a", "a", "b", "c", "b", "a"), nrow=2, ncol=3, byrow = TRUE)
print(M)

##      [,1] [,2] [,3]
## [1,] "a"  "a"  "b" 
## [2,] "c"  "b"  "a"

# Create a vector.
apple_colors = c("green", "green", "yellow", "red", "red", "red", "green")
# Create a factor object.
factor_apple = factor(apple_colors)
# Print the factor.
print(factor_apple)

## [1] green  green  yellow red    red    red    green 
## Levels: green red yellow

print(nlevels(factor_apple))

## [1] 3

# Create the data frame.
BMI = data.frame(gender = c("Male", "Male" ,"Female"),
                 height = c(152, 171.5, 165),
                 weight = c(81, 93, 78),
                 Age = c(42, 38, 26))
print(BMI)

##   gender height weight Age
## 1   Male  152.0     81  42
## 2   Male  171.5     93  38
## 3 Female  165.0     78  26

#Add two vectors
v = c(2, 5.5, 6)
t = c(8, 3, 4)
print(v+t)

## [1] 10.0  8.5 10.0

#Subtract two vectors
v = c(2, 5.5, 6)
t = c(8, 3, 4)
print(v-t)

## [1] -6.0  2.5  2.0

#Check if each element of the first vector is greater than the corresponding element of the second vector.
v = c(2, 5.5, 6, 9)
t = c(8, 2.5, 14, 9)
print(v>t)

## [1] FALSE  TRUE FALSE FALSE

#Checks if each element of the first vector is equal to the corresponding element of the second vector.
v = c(2, 5.5, 6, 9)
t = c(8, 2.5, 14, 9)
print(v==t)

## [1] FALSE FALSE FALSE  TRUE

##Miscellaneous Operators
# ':' Colon operator. It creates the series of numbers in sequence for a vector.
v = 2:8
print(v)

## [1] 2 3 4 5 6 7 8

# '%' This operator is used to identify if an element belongs to a vector.
v1 = 8
v2 = 12
t = 1:10
print(v1 %in% t)

## [1] TRUE

print(v2 %in% t)

## [1] FALSE

#Subset function
a = 1:50
b = subset(a, a > 40)
c = which(a > 40)

print(b)

##  [1] 41 42 43 44 45 46 47 48 49 50

print(a[c])

##  [1] 41 42 43 44 45 46 47 48 49 50

#grep function
genes = c("TOP2A", "EGFR", "BIRC5", "NCAM1")
print(grep("EGFR", genes))

## [1] 2

a = 1:10
b = 11:20
c = c(-1:-10)
plot(a, b)

plot(a, b, pch = 20)

#Symbols for all the shapes in R
ggpubr::show_point_shapes()

## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

plot(a, c, col = "red", pch = 18)

plot(a, c, col = "red", pch = 18, cex = 5)

plot(a, c, col = "red", pch = 18, cex = 5)
plot(a, c, col = "red", pch = 18, cex = 5, xlab = "Exercise hours", ylab = "Weight")

#Open csv file
#Get working directory
getwd()

## [1] "/home/pranali/Documents/Workshop"

#Set working directory
setwd("/home/pranali/Documents/Workshop/")
getwd()

## [1] "/home/pranali/Documents/Workshop"

#Check if the files exists in your directory
file.exists("data.csv")

## [1] TRUE

#Read the csv file
data = read.csv("data.csv")
print(data)

##      genes C3L.00104 C3L.00365 C3L.00674 C3L.00677 C3L.01040 C3L.01043
## 1   TRIM71    8598.6     732.3   13329.0   23509.8    4309.7    6711.0
## 2     CCR4    1696.6    2397.5    4007.5    2133.9    3879.0   24998.2
## 3     GLB1  207756.9  248245.7  267161.2  325876.3  280285.0  301038.7
## 4    TMPPE   73554.0   77911.5   75838.4   65221.4  106484.2   67498.3
## 5    CRTAP  320777.9  570465.4  310077.5  558084.9  522672.7  363715.3
## 6    SUSD5   37396.4    2222.5   68830.4   33078.6   23267.9    3192.8
## 7    FBXL2   50319.4    1106.5   34255.3   93497.6   30316.4   44149.6
## 8     UBP1  155960.1  154767.8  131105.2  142833.6  135960.8  142266.8
## 9   CLASP2  471379.0  299606.9  425861.3  487867.4  324242.8  294769.9
## 10   MATN1    6966.7    4602.8    2468.4    1672.8    2606.5    1007.9
## 11 PDCD6IP  475110.5  455653.1  373216.0  377236.8  362405.8  349209.7
## 12  ARPP21   59372.1   12446.8   35077.8   21922.7   59691.0   21772.6
## 13    STAC   99406.2  272481.1   18574.6   23035.9   87232.6   27573.3
## 14   DCLK3     971.3    1862.8    9559.7    1018.0    8491.3    8491.7

#Print the dimensions of the data
dim(data)

## [1] 14  7

nrow(data)

## [1] 14

ncol(data)

## [1] 7

rownames(data) = data[,1]
print(data)

##           genes C3L.00104 C3L.00365 C3L.00674 C3L.00677 C3L.01040 C3L.01043
## TRIM71   TRIM71    8598.6     732.3   13329.0   23509.8    4309.7    6711.0
## CCR4       CCR4    1696.6    2397.5    4007.5    2133.9    3879.0   24998.2
## GLB1       GLB1  207756.9  248245.7  267161.2  325876.3  280285.0  301038.7
## TMPPE     TMPPE   73554.0   77911.5   75838.4   65221.4  106484.2   67498.3
## CRTAP     CRTAP  320777.9  570465.4  310077.5  558084.9  522672.7  363715.3
## SUSD5     SUSD5   37396.4    2222.5   68830.4   33078.6   23267.9    3192.8
## FBXL2     FBXL2   50319.4    1106.5   34255.3   93497.6   30316.4   44149.6
## UBP1       UBP1  155960.1  154767.8  131105.2  142833.6  135960.8  142266.8
## CLASP2   CLASP2  471379.0  299606.9  425861.3  487867.4  324242.8  294769.9
## MATN1     MATN1    6966.7    4602.8    2468.4    1672.8    2606.5    1007.9
## PDCD6IP PDCD6IP  475110.5  455653.1  373216.0  377236.8  362405.8  349209.7
## ARPP21   ARPP21   59372.1   12446.8   35077.8   21922.7   59691.0   21772.6
## STAC       STAC   99406.2  272481.1   18574.6   23035.9   87232.6   27573.3
## DCLK3     DCLK3     971.3    1862.8    9559.7    1018.0    8491.3    8491.7

data = data[,-1]
print(data)

##         C3L.00104 C3L.00365 C3L.00674 C3L.00677 C3L.01040 C3L.01043
## TRIM71     8598.6     732.3   13329.0   23509.8    4309.7    6711.0
## CCR4       1696.6    2397.5    4007.5    2133.9    3879.0   24998.2
## GLB1     207756.9  248245.7  267161.2  325876.3  280285.0  301038.7
## TMPPE     73554.0   77911.5   75838.4   65221.4  106484.2   67498.3
## CRTAP    320777.9  570465.4  310077.5  558084.9  522672.7  363715.3
## SUSD5     37396.4    2222.5   68830.4   33078.6   23267.9    3192.8
## FBXL2     50319.4    1106.5   34255.3   93497.6   30316.4   44149.6
## UBP1     155960.1  154767.8  131105.2  142833.6  135960.8  142266.8
## CLASP2   471379.0  299606.9  425861.3  487867.4  324242.8  294769.9
## MATN1      6966.7    4602.8    2468.4    1672.8    2606.5    1007.9
## PDCD6IP  475110.5  455653.1  373216.0  377236.8  362405.8  349209.7
## ARPP21    59372.1   12446.8   35077.8   21922.7   59691.0   21772.6
## STAC      99406.2  272481.1   18574.6   23035.9   87232.6   27573.3
## DCLK3       971.3    1862.8    9559.7    1018.0    8491.3    8491.7

#Access the data object using indices/rownames/colnames
print(data["TRIM71", ])

##        C3L.00104 C3L.00365 C3L.00674 C3L.00677 C3L.01040 C3L.01043
## TRIM71    8598.6     732.3     13329   23509.8    4309.7      6711

print(data[1, ])

##        C3L.00104 C3L.00365 C3L.00674 C3L.00677 C3L.01040 C3L.01043
## TRIM71    8598.6     732.3     13329   23509.8    4309.7      6711

print(data[, "C3L.00104"])

##  [1]   8598.6   1696.6 207756.9  73554.0 320777.9  37396.4  50319.4 155960.1
##  [9] 471379.0   6966.7 475110.5  59372.1  99406.2    971.3

print(data[, 1])

##  [1]   8598.6   1696.6 207756.9  73554.0 320777.9  37396.4  50319.4 155960.1
##  [9] 471379.0   6966.7 475110.5  59372.1  99406.2    971.3

#base statistics calculation
n1 = max(data[, 1])
print(n1)

## [1] 475110.5

n2 = min(data[,1])
print(n2)

## [1] 971.3

m1 = mean(data[, 1])
print(m1)

## [1] 140661.8

m2 = median(data[, 1])
print(m2)

## [1] 66463.05

var1 = c(6, 3, 9, 6, 6, 5, 9, 3)
m3 = as.data.frame(table(var1))
print(m3)

##   var1 Freq
## 1    3    2
## 2    5    1
## 3    6    3
## 4    9    2

summary(var1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.000   4.500   6.000   5.875   6.750   9.000

a = c(1,2,3,4,5,7,8,9,10)
b = c(2,11,15,4,19,20)

intersect(a, b)

## [1] 2 4

union(a, b)

##  [1]  1  2  3  4  5  7  8  9 10 11 15 19 20

#values present in a but absent in b
setdiff(a, b)

## [1]  1  3  5  7  8  9 10

#values present in b but absent in a
setdiff(b, a)

## [1] 11 15 19 20

#basic for loop
for(i in 1:10){
  print(i * 2)
}

## [1] 2
## [1] 4
## [1] 6
## [1] 8
## [1] 10
## [1] 12
## [1] 14
## [1] 16
## [1] 18
## [1] 20

#for loop with if condition
for(i in 1:10){
  c = i * 2
  if(c > 4){
    print(c)
  }
}

## [1] 6
## [1] 8
## [1] 10
## [1] 12
## [1] 14
## [1] 16
## [1] 18
## [1] 20

#for loop with if condition and a logical operator
for(i in 1:10){
  c = i * 2
  if((c > 4) & (c < 8)){
    print(c)
  }
}

## [1] 6

for(i in 1:10){
  c = i * 2
  if((c > 4) | (c < 8)){
    print(c)
  }
}

## [1] 2
## [1] 4
## [1] 6
## [1] 8
## [1] 10
## [1] 12
## [1] 14
## [1] 16
## [1] 18
## [1] 20

#install.packages("ggplot2")
library(ggplot2)
library(ggpubr)

info = read.csv("sampleInfo.csv")
rownames(info) = info[,1]
head(info)

##             CGGA_ID  PRS_type Histology  Grade Gender Age Delay Event Radiation
## CGGA_1095 CGGA_1095      <NA>      <NA> WHO IV   male  29  2778     1         1
## CGGA_309   CGGA_309      <NA>      <NA> WHO IV   male  44   139     1         1
## CGGA_495   CGGA_495      <NA>      <NA> WHO IV   male  53   295     1         0
## CGGA_681   CGGA_681      <NA>      <NA> WHO IV   male  42   110     1        NA
## CGGA_D49   CGGA_D49 Recurrent      <NA> WHO IV Female  32    NA    NA        NA
## CGGA_1003 CGGA_1003   Primary         O WHO II Female  47  3817     0         0
##           Chemo IDH_mutation_status X1p19q_codeletion_status
## CGGA_1095     1              Mutant                Non-codel
## CGGA_309      1            Wildtype                     <NA>
## CGGA_495      0            Wildtype                     <NA>
## CGGA_681     NA            Wildtype                     <NA>
## CGGA_D49     NA              Mutant                    Codel
## CGGA_1003     1              Mutant                    Codel
##           MGMTp_methylation_status
## CGGA_1095               methylated
## CGGA_309             un-methylated
## CGGA_495             un-methylated
## CGGA_681                methylated
## CGGA_D49                      <NA>
## CGGA_1003            un-methylated

data = read.csv("input.csv")
rownames(data) = data[,1]
data = data[, -1]

#head(data)
data[1:5, 1:5]

##        CGGA_1002 CGGA_1003 CGGA_1010 CGGA_1012 CGGA_1014
## BIRC5   3.437035  1.331598 0.2926557  3.529216  3.278858
## EGFR    1.518949  4.955727 1.3641526  3.685498  5.819525
## OLIG2   6.404582  6.886588 3.1388798  6.929365  5.779488
## PTPRZ1  6.977530  7.811640 1.5495014  7.375248  8.457647
## TOP2A   5.113039  4.669319 0.6558055  4.937491  5.806693

identical(colnames(data), rownames(info))

## [1] FALSE

#order the matrix using colnames
data = data[, order(colnames(data))]

#order the matrix using rownames
info = info[order(rownames(info)), ]
identical(colnames(data), rownames(info))

## [1] TRUE

#column bind two objects
mat = cbind.data.frame(info, t(data[1,]))
head(mat)

##             CGGA_ID PRS_type Histology   Grade Gender Age Delay Event Radiation
## CGGA_1001 CGGA_1001  Primary       GBM  WHO IV   Male  11  3817     0         0
## CGGA_1002 CGGA_1002  Primary        AA WHO III Female  43   305     1         1
## CGGA_1003 CGGA_1003  Primary         O  WHO II Female  47  3817     0         0
## CGGA_1004 CGGA_1004  Primary         A  WHO II Female  63   899     1         1
## CGGA_1005 CGGA_1005  Primary         A  WHO II Female  41  3815     0         0
## CGGA_1006 CGGA_1006  Primary        AA WHO III   Male  42   254     1         1
##           Chemo IDH_mutation_status X1p19q_codeletion_status
## CGGA_1001     1            Wildtype                Non-codel
## CGGA_1002     1            Wildtype                Non-codel
## CGGA_1003     1              Mutant                    Codel
## CGGA_1004     0            Wildtype                Non-codel
## CGGA_1005     1              Mutant                Non-codel
## CGGA_1006     1            Wildtype                Non-codel
##           MGMTp_methylation_status     BIRC5
## CGGA_1001            un-methylated 1.1799152
## CGGA_1002               methylated 3.4370352
## CGGA_1003            un-methylated 1.3315983
## CGGA_1004               methylated 0.8866869
## CGGA_1005               methylated 2.9571486
## CGGA_1006            un-methylated 4.4286000

#basic boxplot
ggplot(mat, aes(x = Grade, y = BIRC5)) + geom_boxplot()

#Color the boxes
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot()

#Make the background white
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() +  theme_classic()

#Add pvalues using anova
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() +  theme_classic() + stat_compare_means(method = "anova")

my_comparisons = list(c("WHO II", "WHO III"), c("WHO II", "WHO IV"), c("WHO III", "WHO IV"))

#Add multiple pvalues
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() +  theme_classic() + stat_compare_means(comparisons = my_comparisons)

#Add labels
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() +  theme_classic() + stat_compare_means(comparisons = my_comparisons) + ggtitle("BIRC5 gene expression")

ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() +  theme_classic() + stat_compare_means(comparisons = my_comparisons) + ggtitle("BIRC5 gene expression") + xlab("Glioma grades") + ylab("Log2 CPM expression")

#Remove legend
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() +  theme_classic() + stat_compare_means(comparisons = my_comparisons) + ggtitle("BIRC5 gene expression") + xlab("Glioma grades") + ylab("Log2 CPM expression") + theme(legend.position = "none")

#Store the images
png("BIRC5.png")
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() +  theme_classic() + stat_compare_means(comparisons = my_comparisons) + ggtitle("BIRC5 gene expression") + xlab("Glioma grades") + ylab("Log2 CPM expression") + theme(legend.position = "none")
dev.off()

## png 
##   2

A caption

a = c("Normal", "Normal", "Tumor", "Normal", "Tumor", "Tumor")
b = c(10, 20, 50, 15, 65, 58)

t.test(b ~ a, paired = T, alternative = "two.sided")

## 
##  Paired t-test
## 
## data:  b by a
## t = -29.365, df = 2, p-value = 0.001158
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -48.91828 -36.41506
## sample estimates:
## mean of the differences 
##               -42.66667

t.test(b ~ a, paired = F, alternative = "two.sided")

## 
##  Welch Two Sample t-test
## 
## data:  b by a
## t = -8.1944, df = 3.4831, p-value = 0.002142
## alternative hypothesis: true difference in means between group Normal and group Tumor is not equal to 0
## 95 percent confidence interval:
##  -58.00993 -27.32340
## sample estimates:
## mean in group Normal  mean in group Tumor 
##             15.00000             57.66667

t.test(b ~ a, paired = F, alternative = "greater")

## 
##  Welch Two Sample t-test
## 
## data:  b by a
## t = -8.1944, df = 3.4831, p-value = 0.9989
## alternative hypothesis: true difference in means between group Normal and group Tumor is greater than 0
## 95 percent confidence interval:
##  -54.25752       Inf
## sample estimates:
## mean in group Normal  mean in group Tumor 
##             15.00000             57.66667

t.test(b ~ a, paired = F, alternative = "less")

## 
##  Welch Two Sample t-test
## 
## data:  b by a
## t = -8.1944, df = 3.4831, p-value = 0.001071
## alternative hypothesis: true difference in means between group Normal and group Tumor is less than 0
## 95 percent confidence interval:
##       -Inf -31.07581
## sample estimates:
## mean in group Normal  mean in group Tumor 
##             15.00000             57.66667

a = c("Normal", "Normal", "ATumor", "Normal", "ATumor", "ATumor")
t.test(b ~ a, paired = F, alternative = "greater")

## 
##  Welch Two Sample t-test
## 
## data:  b by a
## t = 8.1944, df = 3.4831, p-value = 0.001071
## alternative hypothesis: true difference in means between group ATumor and group Normal is greater than 0
## 95 percent confidence interval:
##  31.07581      Inf
## sample estimates:
## mean in group ATumor mean in group Normal 
##             57.66667             15.00000

res = t.test(b ~ a, paired = T, alternative = "two.sided")


data = PlantGrowth
data

##    weight group
## 1    4.17  ctrl
## 2    5.58  ctrl
## 3    5.18  ctrl
## 4    6.11  ctrl
## 5    4.50  ctrl
## 6    4.61  ctrl
## 7    5.17  ctrl
## 8    4.53  ctrl
## 9    5.33  ctrl
## 10   5.14  ctrl
## 11   4.81  trt1
## 12   4.17  trt1
## 13   4.41  trt1
## 14   3.59  trt1
## 15   5.87  trt1
## 16   3.83  trt1
## 17   6.03  trt1
## 18   4.89  trt1
## 19   4.32  trt1
## 20   4.69  trt1
## 21   6.31  trt2
## 22   5.12  trt2
## 23   5.54  trt2
## 24   5.50  trt2
## 25   5.37  trt2
## 26   5.29  trt2
## 27   4.92  trt2
## 28   6.15  trt2
## 29   5.80  trt2
## 30   5.26  trt2

# Show the levels
levels(data$group)

## [1] "ctrl" "trt1" "trt2"

#Order the data by levels
data$group = ordered(data$group, levels = c("ctrl", "trt1", "trt2"))

#install.packages("dplyr")
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#calculate group statistics
group_by(data, group) %>%
  summarise(
    count = n(),
    mean = mean(weight, na.rm = TRUE),
    sd = sd(weight, na.rm = TRUE)
  )

## # A tibble: 3 x 4
##   group count  mean    sd
##   <ord> <int> <dbl> <dbl>
## 1 ctrl     10  5.03 0.583
## 2 trt1     10  4.66 0.794
## 3 trt2     10  5.53 0.443

res.aov = aov(weight ~ group, data = data)
res.aov

## Call:
##    aov(formula = weight ~ group, data = data)
## 
## Terms:
##                    group Residuals
## Sum of Squares   3.76634  10.49209
## Deg. of Freedom        2        27
## 
## Residual standard error: 0.6233746
## Estimated effects are balanced

summary(res.aov)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## group        2  3.766  1.8832   4.846 0.0159 *
## Residuals   27 10.492  0.3886                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The rest of the values in the output table describe the independent variable and the residuals:

The Df column displays the degrees of freedom for the independent variable (the number of levels in the variable minus 1), and the degrees of freedom for the residuals (the total number of observations minus one and minus the number of levels in the independent variables).
The Sum Sq column displays the sum of squares (a.k.a. the total variation between the group means and the overall mean).
The Mean Sq column is the mean of the sum of squares, calculated by dividing the sum of squares by the degrees of freedom for each parameter.
The F-value column is the test statistic from the F test. This is the mean square of each independent variable divided by the mean square of the residuals. The larger the F value, the more likely it is that the variation caused by the independent variable is real and not due to chance.
The Pr(>F) column is the p-value of the F-statistic. This shows how likely it is that the F-value calculated from the test would have occurred if the null hypothesis of no difference among group means were true.

TukeyHSD(res.aov)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = weight ~ group, data = data)
## 
## $group
##             diff        lwr       upr     p adj
## trt1-ctrl -0.371 -1.0622161 0.3202161 0.3908711
## trt2-ctrl  0.494 -0.1972161 1.1852161 0.1979960
## trt2-trt1  0.865  0.1737839 1.5562161 0.0120064

#Correlation in R
#install.packages("corrplot")
library(corrplot)

## corrplot 0.89 loaded

head(mtcars)

##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

M = cor(mtcars)
head(M)

##             mpg        cyl       disp         hp       drat         wt
## mpg   1.0000000 -0.8521620 -0.8475514 -0.7761684  0.6811719 -0.8676594
## cyl  -0.8521620  1.0000000  0.9020329  0.8324475 -0.6999381  0.7824958
## disp -0.8475514  0.9020329  1.0000000  0.7909486 -0.7102139  0.8879799
## hp   -0.7761684  0.8324475  0.7909486  1.0000000 -0.4487591  0.6587479
## drat  0.6811719 -0.6999381 -0.7102139 -0.4487591  1.0000000 -0.7124406
## wt   -0.8676594  0.7824958  0.8879799  0.6587479 -0.7124406  1.0000000
##             qsec         vs         am       gear       carb
## mpg   0.41868403  0.6640389  0.5998324  0.4802848 -0.5509251
## cyl  -0.59124207 -0.8108118 -0.5226070 -0.4926866  0.5269883
## disp -0.43369788 -0.7104159 -0.5912270 -0.5555692  0.3949769
## hp   -0.70822339 -0.7230967 -0.2432043 -0.1257043  0.7498125
## drat  0.09120476  0.4402785  0.7127111  0.6996101 -0.0907898
## wt   -0.17471588 -0.5549157 -0.6924953 -0.5832870  0.4276059

corrplot(M, method="circle")

corrplot(M, method="color")

corrplot(M, method="number")

corrplot(M, type="upper")

#Function for calculation pvalue for each correlation

cor.mtest <- function(mat, ...) {
    mat <- as.matrix(mat)
    n <- ncol(mat)
    p.mat<- matrix(NA, n, n)
    diag(p.mat) <- 0
    for (i in 1:(n - 1)) {
        for (j in (i + 1):n) {
            tmp <- cor.test(mat[, i], mat[, j], ...)
            p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
        }
    }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}
# matrix of the p-value of the correlation
p.mat = cor.mtest(mtcars)

corrplot(M, type="upper", p.mat = p.mat, sig.level = 0.01)

corrplot(M, type="upper", p.mat = p.mat, sig.level = 0.01, insig = "blank")

R tutorial

Pranali

19/06/2021

R Markdown