R tutorial document
#install.packages("ggpubr")
x = 3
print(x)
## [1] 3
name = "Pranali"
age = 27
cat("My name is", name, "and my age is", age)
## My name is Pranali and my age is 27
# Create a vector.
apple = c("red", "green", "yellow")
print(apple)
## [1] "red" "green" "yellow"
#Length of the vector
length(apple)
## [1] 3
#Access values from a vector
print(apple[3])
## [1] "yellow"
# Get the class of the vector.
print(class(apple))
## [1] "character"
# Create a list.
list1 = list(c(2, 5, 3), 21.3, sin)
# Print the list.
print(list1)
## [[1]]
## [1] 2 5 3
##
## [[2]]
## [1] 21.3
##
## [[3]]
## function (x) .Primitive("sin")
# Create a matrix.
M = matrix(c("a", "a", "b", "c", "b", "a"), nrow=2, ncol=3, byrow = TRUE)
print(M)
## [,1] [,2] [,3]
## [1,] "a" "a" "b"
## [2,] "c" "b" "a"
# Create a vector.
apple_colors = c("green", "green", "yellow", "red", "red", "red", "green")
# Create a factor object.
factor_apple = factor(apple_colors)
# Print the factor.
print(factor_apple)
## [1] green green yellow red red red green
## Levels: green red yellow
print(nlevels(factor_apple))
## [1] 3
# Create the data frame.
BMI = data.frame(gender = c("Male", "Male" ,"Female"),
height = c(152, 171.5, 165),
weight = c(81, 93, 78),
Age = c(42, 38, 26))
print(BMI)
## gender height weight Age
## 1 Male 152.0 81 42
## 2 Male 171.5 93 38
## 3 Female 165.0 78 26
#Add two vectors
v = c(2, 5.5, 6)
t = c(8, 3, 4)
print(v+t)
## [1] 10.0 8.5 10.0
#Subtract two vectors
v = c(2, 5.5, 6)
t = c(8, 3, 4)
print(v-t)
## [1] -6.0 2.5 2.0
#Check if each element of the first vector is greater than the corresponding element of the second vector.
v = c(2, 5.5, 6, 9)
t = c(8, 2.5, 14, 9)
print(v>t)
## [1] FALSE TRUE FALSE FALSE
#Checks if each element of the first vector is equal to the corresponding element of the second vector.
v = c(2, 5.5, 6, 9)
t = c(8, 2.5, 14, 9)
print(v==t)
## [1] FALSE FALSE FALSE TRUE
##Miscellaneous Operators
# ':' Colon operator. It creates the series of numbers in sequence for a vector.
v = 2:8
print(v)
## [1] 2 3 4 5 6 7 8
# '%' This operator is used to identify if an element belongs to a vector.
v1 = 8
v2 = 12
t = 1:10
print(v1 %in% t)
## [1] TRUE
print(v2 %in% t)
## [1] FALSE
#Subset function
a = 1:50
b = subset(a, a > 40)
c = which(a > 40)
print(b)
## [1] 41 42 43 44 45 46 47 48 49 50
print(a[c])
## [1] 41 42 43 44 45 46 47 48 49 50
#grep function
genes = c("TOP2A", "EGFR", "BIRC5", "NCAM1")
print(grep("EGFR", genes))
## [1] 2
a = 1:10
b = 11:20
c = c(-1:-10)
plot(a, b)
plot(a, b, pch = 20)
#Symbols for all the shapes in R
ggpubr::show_point_shapes()
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
plot(a, c, col = "red", pch = 18)
plot(a, c, col = "red", pch = 18, cex = 5)
plot(a, c, col = "red", pch = 18, cex = 5)
plot(a, c, col = "red", pch = 18, cex = 5, xlab = "Exercise hours", ylab = "Weight")
#Open csv file
#Get working directory
getwd()
## [1] "/home/pranali/Documents/Workshop"
#Set working directory
setwd("/home/pranali/Documents/Workshop/")
getwd()
## [1] "/home/pranali/Documents/Workshop"
#Check if the files exists in your directory
file.exists("data.csv")
## [1] TRUE
#Read the csv file
data = read.csv("data.csv")
print(data)
## genes C3L.00104 C3L.00365 C3L.00674 C3L.00677 C3L.01040 C3L.01043
## 1 TRIM71 8598.6 732.3 13329.0 23509.8 4309.7 6711.0
## 2 CCR4 1696.6 2397.5 4007.5 2133.9 3879.0 24998.2
## 3 GLB1 207756.9 248245.7 267161.2 325876.3 280285.0 301038.7
## 4 TMPPE 73554.0 77911.5 75838.4 65221.4 106484.2 67498.3
## 5 CRTAP 320777.9 570465.4 310077.5 558084.9 522672.7 363715.3
## 6 SUSD5 37396.4 2222.5 68830.4 33078.6 23267.9 3192.8
## 7 FBXL2 50319.4 1106.5 34255.3 93497.6 30316.4 44149.6
## 8 UBP1 155960.1 154767.8 131105.2 142833.6 135960.8 142266.8
## 9 CLASP2 471379.0 299606.9 425861.3 487867.4 324242.8 294769.9
## 10 MATN1 6966.7 4602.8 2468.4 1672.8 2606.5 1007.9
## 11 PDCD6IP 475110.5 455653.1 373216.0 377236.8 362405.8 349209.7
## 12 ARPP21 59372.1 12446.8 35077.8 21922.7 59691.0 21772.6
## 13 STAC 99406.2 272481.1 18574.6 23035.9 87232.6 27573.3
## 14 DCLK3 971.3 1862.8 9559.7 1018.0 8491.3 8491.7
#Print the dimensions of the data
dim(data)
## [1] 14 7
nrow(data)
## [1] 14
ncol(data)
## [1] 7
rownames(data) = data[,1]
print(data)
## genes C3L.00104 C3L.00365 C3L.00674 C3L.00677 C3L.01040 C3L.01043
## TRIM71 TRIM71 8598.6 732.3 13329.0 23509.8 4309.7 6711.0
## CCR4 CCR4 1696.6 2397.5 4007.5 2133.9 3879.0 24998.2
## GLB1 GLB1 207756.9 248245.7 267161.2 325876.3 280285.0 301038.7
## TMPPE TMPPE 73554.0 77911.5 75838.4 65221.4 106484.2 67498.3
## CRTAP CRTAP 320777.9 570465.4 310077.5 558084.9 522672.7 363715.3
## SUSD5 SUSD5 37396.4 2222.5 68830.4 33078.6 23267.9 3192.8
## FBXL2 FBXL2 50319.4 1106.5 34255.3 93497.6 30316.4 44149.6
## UBP1 UBP1 155960.1 154767.8 131105.2 142833.6 135960.8 142266.8
## CLASP2 CLASP2 471379.0 299606.9 425861.3 487867.4 324242.8 294769.9
## MATN1 MATN1 6966.7 4602.8 2468.4 1672.8 2606.5 1007.9
## PDCD6IP PDCD6IP 475110.5 455653.1 373216.0 377236.8 362405.8 349209.7
## ARPP21 ARPP21 59372.1 12446.8 35077.8 21922.7 59691.0 21772.6
## STAC STAC 99406.2 272481.1 18574.6 23035.9 87232.6 27573.3
## DCLK3 DCLK3 971.3 1862.8 9559.7 1018.0 8491.3 8491.7
data = data[,-1]
print(data)
## C3L.00104 C3L.00365 C3L.00674 C3L.00677 C3L.01040 C3L.01043
## TRIM71 8598.6 732.3 13329.0 23509.8 4309.7 6711.0
## CCR4 1696.6 2397.5 4007.5 2133.9 3879.0 24998.2
## GLB1 207756.9 248245.7 267161.2 325876.3 280285.0 301038.7
## TMPPE 73554.0 77911.5 75838.4 65221.4 106484.2 67498.3
## CRTAP 320777.9 570465.4 310077.5 558084.9 522672.7 363715.3
## SUSD5 37396.4 2222.5 68830.4 33078.6 23267.9 3192.8
## FBXL2 50319.4 1106.5 34255.3 93497.6 30316.4 44149.6
## UBP1 155960.1 154767.8 131105.2 142833.6 135960.8 142266.8
## CLASP2 471379.0 299606.9 425861.3 487867.4 324242.8 294769.9
## MATN1 6966.7 4602.8 2468.4 1672.8 2606.5 1007.9
## PDCD6IP 475110.5 455653.1 373216.0 377236.8 362405.8 349209.7
## ARPP21 59372.1 12446.8 35077.8 21922.7 59691.0 21772.6
## STAC 99406.2 272481.1 18574.6 23035.9 87232.6 27573.3
## DCLK3 971.3 1862.8 9559.7 1018.0 8491.3 8491.7
#Access the data object using indices/rownames/colnames
print(data["TRIM71", ])
## C3L.00104 C3L.00365 C3L.00674 C3L.00677 C3L.01040 C3L.01043
## TRIM71 8598.6 732.3 13329 23509.8 4309.7 6711
print(data[1, ])
## C3L.00104 C3L.00365 C3L.00674 C3L.00677 C3L.01040 C3L.01043
## TRIM71 8598.6 732.3 13329 23509.8 4309.7 6711
print(data[, "C3L.00104"])
## [1] 8598.6 1696.6 207756.9 73554.0 320777.9 37396.4 50319.4 155960.1
## [9] 471379.0 6966.7 475110.5 59372.1 99406.2 971.3
print(data[, 1])
## [1] 8598.6 1696.6 207756.9 73554.0 320777.9 37396.4 50319.4 155960.1
## [9] 471379.0 6966.7 475110.5 59372.1 99406.2 971.3
#base statistics calculation
n1 = max(data[, 1])
print(n1)
## [1] 475110.5
n2 = min(data[,1])
print(n2)
## [1] 971.3
m1 = mean(data[, 1])
print(m1)
## [1] 140661.8
m2 = median(data[, 1])
print(m2)
## [1] 66463.05
var1 = c(6, 3, 9, 6, 6, 5, 9, 3)
m3 = as.data.frame(table(var1))
print(m3)
## var1 Freq
## 1 3 2
## 2 5 1
## 3 6 3
## 4 9 2
summary(var1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 4.500 6.000 5.875 6.750 9.000
a = c(1,2,3,4,5,7,8,9,10)
b = c(2,11,15,4,19,20)
intersect(a, b)
## [1] 2 4
union(a, b)
## [1] 1 2 3 4 5 7 8 9 10 11 15 19 20
#values present in a but absent in b
setdiff(a, b)
## [1] 1 3 5 7 8 9 10
#values present in b but absent in a
setdiff(b, a)
## [1] 11 15 19 20
#basic for loop
for(i in 1:10){
print(i * 2)
}
## [1] 2
## [1] 4
## [1] 6
## [1] 8
## [1] 10
## [1] 12
## [1] 14
## [1] 16
## [1] 18
## [1] 20
#for loop with if condition
for(i in 1:10){
c = i * 2
if(c > 4){
print(c)
}
}
## [1] 6
## [1] 8
## [1] 10
## [1] 12
## [1] 14
## [1] 16
## [1] 18
## [1] 20
#for loop with if condition and a logical operator
for(i in 1:10){
c = i * 2
if((c > 4) & (c < 8)){
print(c)
}
}
## [1] 6
for(i in 1:10){
c = i * 2
if((c > 4) | (c < 8)){
print(c)
}
}
## [1] 2
## [1] 4
## [1] 6
## [1] 8
## [1] 10
## [1] 12
## [1] 14
## [1] 16
## [1] 18
## [1] 20
#install.packages("ggplot2")
library(ggplot2)
library(ggpubr)
info = read.csv("sampleInfo.csv")
rownames(info) = info[,1]
head(info)
## CGGA_ID PRS_type Histology Grade Gender Age Delay Event Radiation
## CGGA_1095 CGGA_1095 <NA> <NA> WHO IV male 29 2778 1 1
## CGGA_309 CGGA_309 <NA> <NA> WHO IV male 44 139 1 1
## CGGA_495 CGGA_495 <NA> <NA> WHO IV male 53 295 1 0
## CGGA_681 CGGA_681 <NA> <NA> WHO IV male 42 110 1 NA
## CGGA_D49 CGGA_D49 Recurrent <NA> WHO IV Female 32 NA NA NA
## CGGA_1003 CGGA_1003 Primary O WHO II Female 47 3817 0 0
## Chemo IDH_mutation_status X1p19q_codeletion_status
## CGGA_1095 1 Mutant Non-codel
## CGGA_309 1 Wildtype <NA>
## CGGA_495 0 Wildtype <NA>
## CGGA_681 NA Wildtype <NA>
## CGGA_D49 NA Mutant Codel
## CGGA_1003 1 Mutant Codel
## MGMTp_methylation_status
## CGGA_1095 methylated
## CGGA_309 un-methylated
## CGGA_495 un-methylated
## CGGA_681 methylated
## CGGA_D49 <NA>
## CGGA_1003 un-methylated
data = read.csv("input.csv")
rownames(data) = data[,1]
data = data[, -1]
#head(data)
data[1:5, 1:5]
## CGGA_1002 CGGA_1003 CGGA_1010 CGGA_1012 CGGA_1014
## BIRC5 3.437035 1.331598 0.2926557 3.529216 3.278858
## EGFR 1.518949 4.955727 1.3641526 3.685498 5.819525
## OLIG2 6.404582 6.886588 3.1388798 6.929365 5.779488
## PTPRZ1 6.977530 7.811640 1.5495014 7.375248 8.457647
## TOP2A 5.113039 4.669319 0.6558055 4.937491 5.806693
identical(colnames(data), rownames(info))
## [1] FALSE
#order the matrix using colnames
data = data[, order(colnames(data))]
#order the matrix using rownames
info = info[order(rownames(info)), ]
identical(colnames(data), rownames(info))
## [1] TRUE
#column bind two objects
mat = cbind.data.frame(info, t(data[1,]))
head(mat)
## CGGA_ID PRS_type Histology Grade Gender Age Delay Event Radiation
## CGGA_1001 CGGA_1001 Primary GBM WHO IV Male 11 3817 0 0
## CGGA_1002 CGGA_1002 Primary AA WHO III Female 43 305 1 1
## CGGA_1003 CGGA_1003 Primary O WHO II Female 47 3817 0 0
## CGGA_1004 CGGA_1004 Primary A WHO II Female 63 899 1 1
## CGGA_1005 CGGA_1005 Primary A WHO II Female 41 3815 0 0
## CGGA_1006 CGGA_1006 Primary AA WHO III Male 42 254 1 1
## Chemo IDH_mutation_status X1p19q_codeletion_status
## CGGA_1001 1 Wildtype Non-codel
## CGGA_1002 1 Wildtype Non-codel
## CGGA_1003 1 Mutant Codel
## CGGA_1004 0 Wildtype Non-codel
## CGGA_1005 1 Mutant Non-codel
## CGGA_1006 1 Wildtype Non-codel
## MGMTp_methylation_status BIRC5
## CGGA_1001 un-methylated 1.1799152
## CGGA_1002 methylated 3.4370352
## CGGA_1003 un-methylated 1.3315983
## CGGA_1004 methylated 0.8866869
## CGGA_1005 methylated 2.9571486
## CGGA_1006 un-methylated 4.4286000
#basic boxplot
ggplot(mat, aes(x = Grade, y = BIRC5)) + geom_boxplot()
#Color the boxes
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot()
#Make the background white
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() + theme_classic()
#Add pvalues using anova
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() + theme_classic() + stat_compare_means(method = "anova")
my_comparisons = list(c("WHO II", "WHO III"), c("WHO II", "WHO IV"), c("WHO III", "WHO IV"))
#Add multiple pvalues
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() + theme_classic() + stat_compare_means(comparisons = my_comparisons)
#Add labels
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() + theme_classic() + stat_compare_means(comparisons = my_comparisons) + ggtitle("BIRC5 gene expression")
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() + theme_classic() + stat_compare_means(comparisons = my_comparisons) + ggtitle("BIRC5 gene expression") + xlab("Glioma grades") + ylab("Log2 CPM expression")
#Remove legend
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() + theme_classic() + stat_compare_means(comparisons = my_comparisons) + ggtitle("BIRC5 gene expression") + xlab("Glioma grades") + ylab("Log2 CPM expression") + theme(legend.position = "none")
#Store the images
png("BIRC5.png")
ggplot(mat, aes(x = Grade, y = BIRC5, fill = Grade)) + geom_boxplot() + theme_classic() + stat_compare_means(comparisons = my_comparisons) + ggtitle("BIRC5 gene expression") + xlab("Glioma grades") + ylab("Log2 CPM expression") + theme(legend.position = "none")
dev.off()
## png
## 2
A caption
a = c("Normal", "Normal", "Tumor", "Normal", "Tumor", "Tumor")
b = c(10, 20, 50, 15, 65, 58)
t.test(b ~ a, paired = T, alternative = "two.sided")
##
## Paired t-test
##
## data: b by a
## t = -29.365, df = 2, p-value = 0.001158
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -48.91828 -36.41506
## sample estimates:
## mean of the differences
## -42.66667
t.test(b ~ a, paired = F, alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: b by a
## t = -8.1944, df = 3.4831, p-value = 0.002142
## alternative hypothesis: true difference in means between group Normal and group Tumor is not equal to 0
## 95 percent confidence interval:
## -58.00993 -27.32340
## sample estimates:
## mean in group Normal mean in group Tumor
## 15.00000 57.66667
t.test(b ~ a, paired = F, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: b by a
## t = -8.1944, df = 3.4831, p-value = 0.9989
## alternative hypothesis: true difference in means between group Normal and group Tumor is greater than 0
## 95 percent confidence interval:
## -54.25752 Inf
## sample estimates:
## mean in group Normal mean in group Tumor
## 15.00000 57.66667
t.test(b ~ a, paired = F, alternative = "less")
##
## Welch Two Sample t-test
##
## data: b by a
## t = -8.1944, df = 3.4831, p-value = 0.001071
## alternative hypothesis: true difference in means between group Normal and group Tumor is less than 0
## 95 percent confidence interval:
## -Inf -31.07581
## sample estimates:
## mean in group Normal mean in group Tumor
## 15.00000 57.66667
a = c("Normal", "Normal", "ATumor", "Normal", "ATumor", "ATumor")
t.test(b ~ a, paired = F, alternative = "greater")
##
## Welch Two Sample t-test
##
## data: b by a
## t = 8.1944, df = 3.4831, p-value = 0.001071
## alternative hypothesis: true difference in means between group ATumor and group Normal is greater than 0
## 95 percent confidence interval:
## 31.07581 Inf
## sample estimates:
## mean in group ATumor mean in group Normal
## 57.66667 15.00000
res = t.test(b ~ a, paired = T, alternative = "two.sided")
data = PlantGrowth
data
## weight group
## 1 4.17 ctrl
## 2 5.58 ctrl
## 3 5.18 ctrl
## 4 6.11 ctrl
## 5 4.50 ctrl
## 6 4.61 ctrl
## 7 5.17 ctrl
## 8 4.53 ctrl
## 9 5.33 ctrl
## 10 5.14 ctrl
## 11 4.81 trt1
## 12 4.17 trt1
## 13 4.41 trt1
## 14 3.59 trt1
## 15 5.87 trt1
## 16 3.83 trt1
## 17 6.03 trt1
## 18 4.89 trt1
## 19 4.32 trt1
## 20 4.69 trt1
## 21 6.31 trt2
## 22 5.12 trt2
## 23 5.54 trt2
## 24 5.50 trt2
## 25 5.37 trt2
## 26 5.29 trt2
## 27 4.92 trt2
## 28 6.15 trt2
## 29 5.80 trt2
## 30 5.26 trt2
# Show the levels
levels(data$group)
## [1] "ctrl" "trt1" "trt2"
#Order the data by levels
data$group = ordered(data$group, levels = c("ctrl", "trt1", "trt2"))
#install.packages("dplyr")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#calculate group statistics
group_by(data, group) %>%
summarise(
count = n(),
mean = mean(weight, na.rm = TRUE),
sd = sd(weight, na.rm = TRUE)
)
## # A tibble: 3 x 4
## group count mean sd
## <ord> <int> <dbl> <dbl>
## 1 ctrl 10 5.03 0.583
## 2 trt1 10 4.66 0.794
## 3 trt2 10 5.53 0.443
res.aov = aov(weight ~ group, data = data)
res.aov
## Call:
## aov(formula = weight ~ group, data = data)
##
## Terms:
## group Residuals
## Sum of Squares 3.76634 10.49209
## Deg. of Freedom 2 27
##
## Residual standard error: 0.6233746
## Estimated effects are balanced
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 3.766 1.8832 4.846 0.0159 *
## Residuals 27 10.492 0.3886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The rest of the values in the output table describe the independent variable and the residuals:
The Df column displays the degrees of freedom for the independent variable (the number of levels in the variable minus 1), and the degrees of freedom for the residuals (the total number of observations minus one and minus the number of levels in the independent variables).
The Sum Sq column displays the sum of squares (a.k.a. the total variation between the group means and the overall mean).
The Mean Sq column is the mean of the sum of squares, calculated by dividing the sum of squares by the degrees of freedom for each parameter.
The F-value column is the test statistic from the F test. This is the mean square of each independent variable divided by the mean square of the residuals. The larger the F value, the more likely it is that the variation caused by the independent variable is real and not due to chance.
The Pr(>F) column is the p-value of the F-statistic. This shows how likely it is that the F-value calculated from the test would have occurred if the null hypothesis of no difference among group means were true.
TukeyHSD(res.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ group, data = data)
##
## $group
## diff lwr upr p adj
## trt1-ctrl -0.371 -1.0622161 0.3202161 0.3908711
## trt2-ctrl 0.494 -0.1972161 1.1852161 0.1979960
## trt2-trt1 0.865 0.1737839 1.5562161 0.0120064
#Correlation in R
#install.packages("corrplot")
library(corrplot)
## corrplot 0.89 loaded
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
M = cor(mtcars)
head(M)
## mpg cyl disp hp drat wt
## mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 0.6811719 -0.8676594
## cyl -0.8521620 1.0000000 0.9020329 0.8324475 -0.6999381 0.7824958
## disp -0.8475514 0.9020329 1.0000000 0.7909486 -0.7102139 0.8879799
## hp -0.7761684 0.8324475 0.7909486 1.0000000 -0.4487591 0.6587479
## drat 0.6811719 -0.6999381 -0.7102139 -0.4487591 1.0000000 -0.7124406
## wt -0.8676594 0.7824958 0.8879799 0.6587479 -0.7124406 1.0000000
## qsec vs am gear carb
## mpg 0.41868403 0.6640389 0.5998324 0.4802848 -0.5509251
## cyl -0.59124207 -0.8108118 -0.5226070 -0.4926866 0.5269883
## disp -0.43369788 -0.7104159 -0.5912270 -0.5555692 0.3949769
## hp -0.70822339 -0.7230967 -0.2432043 -0.1257043 0.7498125
## drat 0.09120476 0.4402785 0.7127111 0.6996101 -0.0907898
## wt -0.17471588 -0.5549157 -0.6924953 -0.5832870 0.4276059
corrplot(M, method="circle")
corrplot(M, method="color")
corrplot(M, method="number")
corrplot(M, type="upper")
#Function for calculation pvalue for each correlation
cor.mtest <- function(mat, ...) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat<- matrix(NA, n, n)
diag(p.mat) <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j], ...)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
}
}
colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
p.mat
}
# matrix of the p-value of the correlation
p.mat = cor.mtest(mtcars)
corrplot(M, type="upper", p.mat = p.mat, sig.level = 0.01)
corrplot(M, type="upper", p.mat = p.mat, sig.level = 0.01, insig = "blank")