user — Mar 2, 2014, 6:06 PM
library(lattice)
library(plyr)
library(xtable)
prDat <- read.table("GSE4051_data.tsv",header=T)
prDes <- readRDS("GSE4051_design.rds")
str(prDat, max.level = 0)
'data.frame': 29949 obs. of 39 variables:
set.seed(540)
KeepGenes <- sample(1:nrow(prDat), 100)
KeepGenes <- rownames(prDat)[KeepGenes]
miniDat <- subset(prDat, rownames(prDat) %in% KeepGenes)
miniDat <- data.frame(gExp = as.vector(t(as.matrix(miniDat))),
gene = factor(rep(rownames(miniDat), each = ncol(miniDat)),
levels = rownames(miniDat)))
miniDat <- suppressWarnings(data.frame(prDes, miniDat))
str(miniDat)
'data.frame': 3900 obs. of 6 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
$ gExp : num 6.84 6.33 6.52 6.56 6.81 ...
$ gene : Factor w/ 100 levels "1416357_a_at",..: 1 1 1 1 1 1 1 1 1 1 ...
pvalue <- ddply(miniDat, ~ gene, function(z) {
t <- suppressWarnings(t.test(gExp ~ gType, z))
wc <- suppressWarnings(wilcox.test(gExp ~ gType, z))
ks <- suppressWarnings(ks.test(z$gExp[z$gType == "NrlKO"],
z$gExp[z$gType == "wt"]))
c(pvalue_t=t$p.value,pvalue_ws=wc$p.value,pvalue_ks=ks$p.value)
})
#head(pvalue)
pvalue
gene pvalue_t pvalue_ws pvalue_ks
1 1416357_a_at 8.196e-01 8.993e-01 8.306e-01
2 1416962_at 8.987e-01 6.837e-01 6.591e-01
3 1417183_at 6.507e-02 7.417e-02 2.815e-01
4 1417258_at 3.092e-01 2.467e-01 2.815e-01
5 1417558_at 8.252e-02 9.179e-02 1.040e-01
6 1417923_at 4.801e-03 3.535e-03 1.143e-02
7 1418076_at 8.617e-01 1.000e+00 7.546e-01
8 1418200_at 1.937e-01 2.957e-01 4.168e-01
9 1418272_at 8.850e-01 6.031e-01 4.107e-01
10 1418943_at 9.692e-01 9.440e-01 9.868e-01
11 1419159_at 1.320e-01 2.957e-01 1.037e-01
12 1419588_at 4.926e-02 4.363e-02 8.289e-03
13 1419805_s_at 7.125e-01 7.360e-01 9.516e-01
14 1421541_a_at 6.655e-02 1.002e-01 5.544e-02
15 1421768_a_at 2.159e-01 2.731e-01 4.225e-01
16 1422057_at 2.798e-01 4.397e-01 8.306e-01
17 1422503_s_at 4.623e-01 1.062e-01 1.040e-01
18 1422830_s_at 2.198e-01 8.134e-01 2.424e-01
19 1423132_a_at 9.129e-02 2.669e-01 1.903e-01
20 1424095_at 5.904e-01 4.912e-01 6.038e-01
21 1424926_at 6.421e-01 5.133e-01 7.757e-02
22 1425108_a_at 5.185e-01 5.741e-01 9.914e-01
23 1425293_a_at 4.540e-01 6.328e-01 8.537e-01
24 1425341_at 4.505e-01 6.734e-01 4.713e-01
25 1425870_a_at 2.831e-01 4.151e-01 6.452e-01
26 1426003_at 9.926e-01 9.888e-01 9.050e-01
27 1426086_a_at 4.504e-01 4.646e-01 7.005e-01
28 1426301_at 6.822e-01 6.465e-01 3.278e-01
29 1426736_at 6.429e-01 9.889e-01 7.311e-01
30 1428476_a_at 8.968e-01 1.000e+00 8.955e-01
31 1429670_a_at 7.934e-01 6.429e-01 7.142e-01
32 1429696_at 8.164e-01 7.705e-01 9.664e-01
33 1429834_a_at 6.540e-01 5.180e-01 7.546e-01
34 1430029_a_at 4.693e-01 3.992e-01 5.097e-01
35 1430173_x_at 9.957e-01 9.217e-01 9.992e-01
36 1430358_at 3.776e-01 4.440e-01 2.815e-01
37 1430577_at 1.614e-01 1.866e-01 2.265e-01
38 1432127_at 5.168e-01 6.329e-01 6.175e-01
39 1432182_at 1.035e-01 4.302e-02 5.305e-02
40 1432925_at 2.401e-01 3.914e-01 7.413e-01
41 1433117_at 5.313e-01 4.955e-01 4.990e-01
42 1433946_at 2.264e-01 3.363e-01 1.523e-01
43 1433979_at 4.258e-01 7.705e-01 5.171e-01
44 1434325_x_at 9.391e-01 8.441e-01 7.678e-01
45 1435030_at 3.965e-03 7.894e-03 5.074e-02
46 1435208_at 8.065e-01 6.530e-01 8.537e-01
47 1435875_at 4.074e-01 8.331e-01 6.038e-01
48 1436746_at 2.054e-01 1.600e-01 3.550e-01
49 1437715_x_at 9.558e-01 8.786e-01 4.990e-01
50 1437982_x_at 4.202e-01 2.379e-01 3.992e-01
51 1438107_x_at 1.664e-02 2.548e-02 1.126e-01
52 1438429_at 8.133e-01 8.567e-01 7.487e-01
53 1438702_at 7.500e-01 4.480e-01 3.767e-01
54 1439928_at 2.833e-01 3.088e-01 5.171e-01
55 1440749_at 2.820e-02 5.974e-02 1.218e-01
56 1440778_x_at 9.839e-01 9.226e-01 9.713e-01
57 1441078_at 2.931e-01 3.117e-01 6.867e-01
58 1441814_s_at 6.056e-01 7.494e-01 6.589e-01
59 1442457_at 3.002e-01 3.837e-01 8.647e-01
60 1442829_at 3.590e-01 2.011e-01 9.600e-02
61 1443092_at 4.005e-01 5.315e-01 9.112e-01
62 1443255_s_at 2.704e-01 2.610e-01 3.992e-01
63 1443282_at 6.645e-01 6.870e-01 8.674e-01
64 1444196_at 8.048e-01 9.447e-01 5.894e-01
65 1444617_at 1.142e-03 1.116e-03 6.481e-04
66 1446136_at 4.652e-01 5.645e-01 8.306e-01
67 1446571_at 1.012e-01 3.153e-02 4.640e-02
68 1446589_at 5.378e-01 5.551e-01 3.657e-01
69 1446779_at 8.524e-01 6.940e-01 9.305e-01
70 1446961_at 1.476e-01 2.354e-01 5.713e-01
71 1447521_x_at 1.026e-01 3.385e-02 4.237e-02
72 1447640_s_at 1.511e-01 4.024e-02 2.289e-02
73 1448090_at 2.505e-02 6.117e-02 1.319e-01
74 1448624_at 7.754e-01 7.895e-01 9.305e-01
75 1448647_at 2.127e-01 2.793e-01 4.225e-01
76 1449127_at 8.108e-02 4.602e-02 2.289e-02
77 1449162_at 1.742e-01 2.111e-01 3.878e-01
78 1450156_a_at 6.856e-01 9.447e-01 4.628e-01
79 1450169_at 5.092e-01 3.803e-01 2.074e-01
80 1450552_at 3.414e-01 2.985e-01 2.114e-01
81 1450714_at 9.842e-01 6.267e-01 3.745e-01
82 1451108_at 6.570e-01 6.530e-01 6.175e-01
83 1451118_a_at 8.036e-01 8.994e-01 6.729e-01
84 1451363_a_at 9.642e-02 8.397e-02 4.853e-02
85 1452944_at 9.388e-01 9.105e-01 6.175e-01
86 1453147_at 2.950e-02 5.252e-02 1.972e-01
87 1453720_at 1.866e-01 1.841e-01 4.273e-01
88 1454237_at 5.311e-01 6.130e-01 7.678e-01
89 1454632_at 7.676e-01 7.284e-01 9.194e-01
90 1454669_at 8.515e-01 9.440e-01 9.868e-01
91 1455861_at 7.733e-01 8.772e-01 8.754e-01
92 1456736_x_at 4.621e-02 7.414e-02 3.767e-01
93 1457254_x_at 9.494e-01 9.664e-01 7.809e-01
94 1457524_at 1.191e-07 4.636e-06 4.367e-05
95 1458178_at 7.426e-01 9.217e-01 9.988e-01
96 1458637_x_at 2.553e-02 4.068e-02 1.235e-01
97 1459118_at 9.896e-01 9.105e-01 9.893e-01
98 1459539_at 4.099e-02 4.068e-02 3.354e-02
99 1459967_at 5.571e-01 7.254e-01 9.577e-01
100 1460638_at 8.675e-01 7.149e-01 9.139e-01
## get all columns except names
p_value <- pvalue[,-1]
##change to binary
binary <- p_value <= 0.05
binary <- as.data.frame(binary)
rownames(binary) <- pvalue[,1]
(sig_by_test <- apply(binary, 2, sum)) # 2 means column sum
pvalue_t pvalue_ws pvalue_ks
12 13 10
## Choose interesting Genes (sig in all tests)
interest <- rownames(subset(binary, apply(binary,1,sum)==3))
miniDat <- subset(prDat, rownames(prDat) %in% interest)
miniDat <- data.frame(gExp = as.vector(t(as.matrix(miniDat))),
gene = factor(rep(rownames(miniDat), each = ncol(miniDat)),
levels = interest))
miniDat <- suppressWarnings(data.frame(prDes, miniDat))
str(miniDat)
'data.frame': 195 obs. of 6 variables:
$ sidChar : chr "Sample_20" "Sample_21" "Sample_22" "Sample_23" ...
$ sidNum : num 20 21 22 23 16 17 6 24 25 26 ...
$ devStage: Factor w/ 5 levels "E16","P2","P6",..: 1 1 1 1 1 1 1 2 2 2 ...
$ gType : Factor w/ 2 levels "wt","NrlKO": 1 1 1 1 2 2 2 1 1 1 ...
$ gExp : num 8.73 9.17 8.87 8.76 6.66 ...
$ gene : Factor w/ 5 levels "1417923_at","1419588_at",..: 1 1 1 1 1 1 1 1 1 1 ...
stripplot(gType ~ gExp | gene, miniDat,
scales = list(x = list(relation = "free")),
group = gType, auto.key = TRUE)