[2014 Epiworkshop] R basics and Sequencing data analysis

Part II: R plotting

Lecturer: Sheng Li

Weill Cornell Medical College

1. Random data generation

set.seed(1)
t = rnorm(100, 100)
t1 = t + rnorm(100, 0, 0.5)
t2 = t + rnorm(100, 0, 0.5)
n = rnorm(100, 99)
n1 = n + rnorm(100, 0, 0.2)
n2 = n + rnorm(100, 0, 0.2)
y = data.frame(cbind(n1, n2, t1, t2))
head(y)

##       n1     n2     t1     t2
## 1 100.11  99.91  99.06  99.58
## 2  98.33  97.89 100.20 101.03
## 3 100.85 100.73  98.71  99.96
## 4  98.54  98.62 101.67 101.43
## 5 100.57 100.85 100.00  99.19
## 6 100.44 100.83 100.06 100.43

2. Save R object

save(y, file = "y.rda")

3. Load R object

load("y.rda")

4. Write to text file

write.table(y, file = "y.txt")
write.table(y, file = "y.txt", quote = F)
write.table(y, file = "y.txt", quote = F, sep = "\t")
write.table(y, file = "y.txt", quote = F, sep = "\t", row.names = F)

5. Read from text file

X = read.table("y.txt", header = T, sep = "\t")

6. Plotting

Histogram

hist(y$n1)

plot of chunk unnamed-chunk-6

hist(y$n1, breaks = 5)

plot of chunk unnamed-chunk-6

hist(y$n1, main = "normal", xlab = "n1")

plot of chunk unnamed-chunk-6

density plot

plot(density(y$n1))

plot of chunk unnamed-chunk-7

scatter plot

plot(y$n1, y$n2)

plot of chunk unnamed-chunk-8

correlation analysis

cor(y$n1, y$n2, method = "pearson")

## [1] 0.9512

cor.test(y$t1, y$t2, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  y$t1 and y$t2
## t = 11.4, df = 98, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6560 0.8286
## sample estimates:
##    cor 
## 0.7551

cor.test(y$t1, y$n1, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  y$t1 and y$n1
## t = -0.253, df = 98, p-value = 0.8008
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2209  0.1717
## sample estimates:
##      cor 
## -0.02555

cor(y)

##          n1       n2       t1       t2
## n1  1.00000  0.95117 -0.02555  0.04334
## n2  0.95117  1.00000 -0.09278 -0.01098
## t1 -0.02555 -0.09278  1.00000  0.75510
## t2  0.04334 -0.01098  0.75510  1.00000

multiple plots

par(mfrow = c(2, 2))
plot(y$n1, y$n2)
plot(y$t1, y$t2)
plot(y$n1, y$t1)
plot(y$n1, y$t2)

plot of chunk unnamed-chunk-10

par(mfrow = c(1, 1))

heatmap

library(pheatmap)
pheatmap(y)

plot of chunk unnamed-chunk-11

heatmap of correlation

library(pheatmap)
pheatmap(cor(y), display_numbers = T)

plot of chunk unnamed-chunk-12

Calculation by rows or columns

colMeans(y)

##     n1     n2     t1     t2 
##  99.04  99.04 100.09 100.12

rowMeans(y)

##   [1]  99.66  99.36 100.06 100.07 100.15 100.44  99.87 100.31  99.45  99.69
##  [11] 100.53  99.73  99.10  97.78 100.10 100.19  99.20  99.73  99.32 100.16
##  [21] 100.37 100.79  99.79  97.43  99.92 100.01  99.11  98.66  98.77 100.01
##  [31]  99.74  98.31  99.42 100.01  98.79  99.41  99.87  99.32  99.93  99.63
##  [41]  99.59  99.22  99.82  99.51  98.11  97.94 100.23  99.76  99.57 100.71
##  [51] 100.58  99.54  99.72  98.59 100.91 100.76  98.46  98.83  98.62  99.52
##  [61]  99.49 100.17  99.72  99.52  99.15 100.28  98.80 100.21  99.27  99.91
##  [71]  99.71  99.31  99.38  99.32  98.38  99.76  99.20  99.89  99.38 100.29
##  [81]  99.41 100.01  99.87  98.62 100.51  99.66  99.05  99.51  98.65  99.23
##  [91]  99.75 100.62 100.59  99.88 100.39  99.19  99.91  99.15  98.64  99.53

n.rowm = rowMeans(y[, c("n1", "n2")])
t.rowm = rowMeans(y[, c("t1", "t2")])
abs.diff = abs(n.rowm - t.rowm)
abs.diff[1:10]

##  [1] 0.6880 2.5038 1.4594 2.9737 1.1171 0.3883 1.9243 1.5885 2.4329 1.1009

boxplot

idx = order(abs.diff, decreasing = T)[1:10]
e1 = data.frame(group = "n1", value = y[idx, "n1"])
e2 = data.frame(group = "t1", value = y[idx, "t1"])
boxplot(value ~ group, rbind(e1, e2))

plot of chunk unnamed-chunk-14

print plot to pdf file

pdf("boxplot.pdf", width = 5, height = 8)
boxplot(value ~ group, rbind(e1, e2))
dev.off()

## pdf 
##   2