library(TeachingDemos)
library(ALL)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colMeans,
## colnames, colSums, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax,
## pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
## rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
data(ALL)
library(multtest)
data(golub)
golub.gnames[1042,]
## [1] "2354" "CCND3 Cyclin D3" "M92287_at"
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
mall <- apply(golub[,gol.fac=="ALL"], 1, mean)
maml <- apply(golub[,gol.fac=="AML"], 1, mean)
o <- order(abs(mall-maml), decreasing=TRUE)
print(golub.gnames[o[1:5],2])
## [1] "CST3 Cystatin C (amyloid angiopathy and cerebral hemorrhage)"
## [2] "INTERLEUKIN-8 PRECURSOR"
## [3] "Interleukin 8 (IL8) gene"
## [4] "DF D component of complement (adipsin)"
## [5] "MPO Myeloperoxidase"
library(ape)
table(read.GenBank(c("X94991.1"),as.character=TRUE))
##
## a c g t
## 410 789 573 394
data(golub, package = "multtest")
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
stripchart(golub[1042,] ~ gol.fac, method="jitter")

hist(golub[1042, gol.fac=="ALL"])

x <- sort(golub[1042, gol.fac=="ALL"], decreasing = FALSE)
x[1:5]
## [1] 0.45827 1.10546 1.27645 1.32551 1.36844
boxplot(golub[1042,] ~ gol.fac)

pvec <- seq(0,1,0.25)
quantile(golub[1042, gol.fac=="ALL"],pvec)
## 0% 25% 50% 75% 100%
## 0.458270 1.796065 1.927760 2.178705 2.766100
qqnorm(golub[1042, gol.fac=="ALL"])
qqline(golub[1042, gol.fac=="ALL"])

library(multtest); data(golub)
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
x <- golub[1042,gol.fac=="ALL"]
z <- (x-1.90)/0.50
sum(z^2)
## [1] 25.03312
pchisq(sum(z^2),27, lower.tail=FALSE)
## [1] 0.5726059
n <- 11
x <- golub[2058, gol.fac=="AML"]
t.value <- sqrt(n)*(mean(x)-0)/sd(x)
t.value
## [1] 1.236324
var(golub[1042,gol.fac=="ALL"])/var(golub[1042,gol.fac=="AML"])
## [1] 0.7116441
f<-function(x){dnorm(x,1.9,0.5)}
plot(f,0,4,xlab="x-axis",ylab="density f(x)")
plot(f,0,4,xlab="x-axis",ylab="density f(x)")
x<-seq(0,1.4,0.01)
polygon(c(0,x,1.4), c(0,f(x),0), col="lightblue")

data(golub, package = "multtest")
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
sigma <- 0.25; n <- 27; mu0 <- 0
x <- golub[2058,gol.fac=="ALL"]
z.value <- sqrt(n)*(mean(x) - mu0)/sigma
2*pnorm(-abs(z.value),0,1)
## [1] 0.9991094
mean(x)+qnorm(c(0.025),0,1)*sigma/sqrt(n)
## [1] -0.09424511
mean(x)+qnorm(c(0.975),0,1)*sigma/sqrt(n)
## [1] 0.09435251
library(TeachingDemos)
z.test(x,mu=0,sd=0.25)
##
## One Sample z-test
##
## data: x
## z = 0.0011162, n = 27.000000, Std. Dev. = 0.250000, Std. Dev. of
## the sample mean = 0.048113, p-value = 0.9991
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.09424511 0.09435251
## sample estimates:
## mean of x
## 5.37037e-05
ci.examp(mean.sim =0, sd = 1, n = 25, reps = 100, method = "z", lower.conf=0.025, upper.conf=0.975)

x <- golub[2058,gol.fac=="ALL"]; mu0 <- 0; n <- 27
t.value<-sqrt(n)*(mean(x) - mu0)/sd(x)
t.value
## [1] 0.001076867
mean(x)+qt(0.025,26)*sd(x)/sqrt(n)
## [1] -0.1024562
t.test(x,mu=0)
##
## One Sample t-test
##
## data: x
## t = 0.0010769, df = 26, p-value = 0.9991
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1024562 0.1025636
## sample estimates:
## mean of x
## 5.37037e-05
t.test(golub[1042,gol.fac=="ALL"],mu=0, alternative = c("greater"))
##
## One Sample t-test
##
## data: golub[1042, gol.fac == "ALL"]
## t = 20.06, df = 26, p-value < 2.2e-16
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
## 1.732853 Inf
## sample estimates:
## mean of x
## 1.893883
t.test(golub[1042,] ~ gol.fac, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: golub[1042, ] by gol.fac
## t = 6.3186, df = 16.118, p-value = 9.871e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.8363826 1.6802008
## sample estimates:
## mean in group ALL mean in group AML
## 1.8938826 0.6355909
t.test(golub[1042,] ~ gol.fac, var.equal = TRUE)
##
## Two Sample t-test
##
## data: golub[1042, ] by gol.fac
## t = 6.7983, df = 36, p-value = 6.046e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.8829143 1.6336690
## sample estimates:
## mean in group ALL mean in group AML
## 1.8938826 0.6355909
var.test(golub[1042,] ~ gol.fac)
##
## F test to compare two variances
##
## data: golub[1042, ] by gol.fac
## F = 0.71164, num df = 26, denom df = 10, p-value = 0.4652
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2127735 1.8428387
## sample estimates:
## ratio of variances
## 0.7116441
binom.test(18, 22, p = 0.7, alternative = c("greater"), conf.level = 0.95)
##
## Exact binomial test
##
## data: 18 and 22
## number of successes = 18, number of trials = 22, p-value = 0.1645
## alternative hypothesis: true probability of success is greater than 0.7
## 95 percent confidence interval:
## 0.6309089 1.0000000
## sample estimates:
## probability of success
## 0.8181818
library(ape)
zyxinfreq <- table(read.GenBank(c("X94991.1"),as.character=TRUE))
chisq.test(zyxinfreq)
##
## Chi-squared test for given probabilities
##
## data: zyxinfreq
## X-squared = 187.07, df = 3, p-value < 2.2e-16
shapiro.test(golub[1042, gol.fac=="ALL"])
##
## Shapiro-Wilk normality test
##
## data: golub[1042, gol.fac == "ALL"]
## W = 0.94663, p-value = 0.1774
library(nortest)
ad.test(golub[1042,gol.fac=="ALL"])
##
## Anderson-Darling normality test
##
## data: golub[1042, gol.fac == "ALL"]
## A = 0.52154, p-value = 0.1683
library(outliers)
grubbs.test(golub[1042, gol.fac=="ALL"])
##
## Grubbs test for one outlier
##
## data: golub[1042, gol.fac == "ALL"]
## G = 2.92640, U = 0.65796, p-value = 0.0183
## alternative hypothesis: lowest value 0.45827 is an outlier
data(golub,package="multtest")
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
sh <- apply(golub[,gol.fac=="ALL"], 1, function(x) shapiro.test(x)$p.value)
sum(sh > 0.05)/nrow(golub) * 100
## [1] 58.27598
data(golub, package = "multtest");
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
pt <- apply(golub, 1, function(x) t.test(x ~ gol.fac)$p.value)
pw <- apply(golub, 1, function(x) wilcox.test(x ~ gol.fac)$p.value)
## Warning in wilcox.test.default(x = c(1.20689, 0.51105, 1.20264, 1.72492, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.24444, 0.64282, 0.46461, 0.68311, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.2103, 0.57747, 1.02711, -0.0496, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(-0.1859, -0.38107, -0.47897,
## -0.42782, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(-0.14056, -0.49849, -1.04357,
## -0.64631, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.40334, 0.31732, 0.32201, 0.30622, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.4406, 0.34383, 0.1861, 0.79001,
## 0.06251, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.9125, 0.90637, 1.15064, 0.92193, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.37595, 0.32497, 0.34482, 0.33303, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(2.53193, 2.48503, 2.30672, 2.76934, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.00447, 0.77371, -0.32637, 0.53205, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(1.60338, 1.04229, 0.6379, 1.25547, :
## cannot compute exact p-value with ties
resul <- data.frame(cbind(pw,pt))
resul[pw<0.05 & abs(pt-pw)>0.2,]
## pw pt
## 456 0.04480288 0.2636088
## 1509 0.03215830 0.4427477
library(ALL);data(ALL)
ALLB123 <- ALL[,ALL$BT %in% c("B1","B2","B3")]
y <- exprs(ALLB123)["1866_g_at",]
summary(lm(y ~ ALLB123$BT))
##
## Call:
## lm(formula = y ~ ALLB123$BT)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59070 -0.25251 -0.07008 0.16395 1.29635
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.58222 0.08506 53.873 < 2e-16 ***
## ALLB123$BTB2 -0.43689 0.10513 -4.156 8.52e-05 ***
## ALLB123$BTB3 -0.72193 0.11494 -6.281 2.00e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3707 on 75 degrees of freedom
## Multiple R-squared: 0.3461, Adjusted R-squared: 0.3287
## F-statistic: 19.85 on 2 and 75 DF, p-value: 1.207e-07
library(ALL); data(ALL)
ALLB123 <- ALL[,ALL$BT %in% c("B1","B2","B3")]
y <- exprs(ALLB123)["1242_at",]
summary(lm(y ~ ALLB123$BT))
##
## Call:
## lm(formula = y ~ ALLB123$BT)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48408 -0.13461 -0.01297 0.12232 0.67138
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.55083 0.05673 115.483 <2e-16 ***
## ALLB123$BTB2 0.03331 0.07011 0.475 0.636
## ALLB123$BTB3 -0.04675 0.07665 -0.610 0.544
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2473 on 75 degrees of freedom
## Multiple R-squared: 0.01925, Adjusted R-squared: -0.006898
## F-statistic: 0.7362 on 2 and 75 DF, p-value: 0.4823
library("ALL"); data(ALL)
ALLBm <- ALL[,which(ALL$BT %in% c("B","B1","B2","B3","B4") & ALL$mol.biol %in% c("BCR/ABL","NEG"))]
facmolb <- factor(ALLBm$mol.biol)
facB <- factor(ceiling(as.integer(ALLBm$BT)/3),levels=1:2,labels=c("B012","B34"))
anova(lm(exprs(ALLBm)["32069_at",] ~ facB * facmolb))
## Analysis of Variance Table
##
## Response: exprs(ALLBm)["32069_at", ]
## Df Sum Sq Mean Sq F value Pr(>F)
## facB 1 1.1659 1.1659 4.5999 0.0352127 *
## facmolb 1 3.2162 3.2162 12.6891 0.0006433 ***
## facB:facmolb 1 1.1809 1.1809 4.6592 0.0340869 *
## Residuals 75 19.0094 0.2535
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm(exprs(ALLBm)["32069_at",] ~ facB * facmolb))
##
## Call:
## lm(formula = exprs(ALLBm)["32069_at", ] ~ facB * facmolb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.21905 -0.31589 -0.05137 0.24404 1.52015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.7649 0.1073 63.026 < 2e-16 ***
## facBB34 -0.5231 0.1686 -3.103 0.0027 **
## facmolbNEG -0.6020 0.1458 -4.128 9.39e-05 ***
## facBB34:facmolbNEG 0.5016 0.2324 2.159 0.0341 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5034 on 75 degrees of freedom
## Multiple R-squared: 0.2264, Adjusted R-squared: 0.1954
## F-statistic: 7.316 on 3 and 75 DF, p-value: 0.0002285
data(ALL,package="ALL");library(ALL)
ALLB123 <- ALL[,ALL$BT %in% c("B1","B2","B3")]
y <- exprs(ALLB123)["1866_g_at",]
shapiro.test(residuals(lm(y ~ ALLB123$BT)))
##
## Shapiro-Wilk normality test
##
## data: residuals(lm(y ~ ALLB123$BT))
## W = 0.93462, p-value = 0.0005989
library(ALL); data(ALL); library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
ALLB123 <- ALL[,ALL$BT %in% c("B1","B2","B3")]
y <- exprs(ALLB123)["1866_g_at",]
bptest(lm(y ~ ALLB123$BT),studentize = FALSE)
##
## Breusch-Pagan test
##
## data: lm(y ~ ALLB123$BT)
## BP = 8.7311, df = 2, p-value = 0.01271
data(ALL,package="ALL");library(ALL)
ALLB123 <- ALL[,ALL$BT %in% c("B1","B2","B3")]
y <- exprs(ALLB123)["1866_g_at",]
oneway.test(y ~ ALLB123$BT)
##
## One-way analysis of means (not assuming equal variances)
##
## data: y and ALLB123$BT
## F = 14.157, num df = 2.000, denom df = 36.998, p-value = 2.717e-05
data(ALL,package="ALL");library(ALL)
ALLB123 <- ALL[,ALL$BT %in% c("B1","B2","B3")]
y <- exprs(ALLB123)["1866_g_at",]
kruskal.test(y ~ ALLB123$BT)
##
## Kruskal-Wallis rank sum test
##
## data: y by ALLB123$BT
## Kruskal-Wallis chi-squared = 30.667, df = 2, p-value = 2.192e-07
data(ALL, package = "ALL")
slotNames(ALL)
## [1] "experimentData" "assayData" "phenoData"
## [4] "featureData" "annotation" "protocolData"
## [7] ".__classVersion__"
row.names(exprs(ALL))[1:10]
## [1] "1000_at" "1001_at" "1002_f_at" "1003_s_at" "1004_at"
## [6] "1005_at" "1006_at" "1007_s_at" "1008_f_at" "1009_at"
ALL1pp <- ALL1 <- ALL[,ALL$mol == "ALL1/AF4"]
mads <- apply(exprs(ALL1), 2, mad)
meds <- apply(exprs(ALL1), 2, median)
dat <- sweep(exprs(ALL1), 2, meds)
exprs(ALL1pp) <- sweep(dat, 2, mads, FUN="/")
cvval <- apply(exprs(ALL1pp),1,function(x){sd(x)/abs(mean(x))})
library("genefilter")
f1 <- function(x)(IQR(x)>0.5)
f2 <- pOverA(.25, log2(100))
f3 <- function(x) (median(2^x) > 300)
f4 <- function(x) (shapiro.test(x)$p.value > 0.05)
f5 <- function(x) (sd(x)/abs(mean(x))<0.1)
f6 <- function(x) (sqrt(10)* abs(mean(x))/sd(x) > qt(0.975,9))
ff <- filterfun(f1,f2,f3,f4,f5,f6)
library("ALL"); data(ALL)
selected <- genefilter(exprs(ALL[,ALL$BT=="B"]), ff)
sum(selected)
## [1] 317
library("genefilter");library("ALL"); data(ALL)
patientB <- factor(ALL$BT %in% c("B","B1","B2","B3","B4"))
f1 <- function(x) (shapiro.test(x)$p.value > 0.05)
f2 <- function(x) (t.test(x ~ patientB)$p.value < 0.05)
sel1 <- genefilter(exprs(ALL[,patientB==TRUE]), filterfun(f1))
sel2 <- genefilter(exprs(ALL[,patientB==FALSE]), filterfun(f1))
sel3 <- genefilter(exprs(ALL), filterfun(f2))
selected <- sel1 & sel2 & sel3
ALLs <- ALL[selected,]
library(limma)
##
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
x <- matrix(as.integer(c(sel1,sel2,sel3)),ncol = 3,byrow=FALSE)
colnames(x) <- c("sel1","sel2","sel3")
vc <- vennCounts(x, include="both")
vennDiagram(vc)

library("ALL"); library("limma");
data(ALL, package = "ALL")
allB <- ALL[,which(ALL$BT %in% c("B","B1","B2"))]
design.ma <- model.matrix(~ 0 + factor(allB$BT))
colnames(design.ma) <- c("B","B1","B2")
fit <- lmFit(allB, design.ma)
fit <- eBayes(fit)
toptab <- topTable(fit, coef=2,5,adjust.method="fdr")
print(toptab[,1:5],digits=4)
## logFC AveExpr t P.Value adj.P.Val
## AFFX-hum_alu_at 13.42 13.50 326.0 3.165e-99 3.996e-95
## 32466_at 12.68 12.70 306.3 1.333e-97 8.413e-94
## 32748_at 12.08 12.11 296.3 9.771e-97 3.616e-93
## 35278_at 12.44 12.45 295.5 1.146e-96 3.616e-93
## 34593_g_at 12.64 12.58 278.0 4.431e-95 1.119e-91
cont.ma <- makeContrasts(B-B1,B1-B2, levels=factor(allB$BT))
cont.ma
## Contrasts
## Levels B - B1 B1 - B2
## B 1 0
## B1 -1 1
## B2 0 -1
fit1 <- contrasts.fit(fit, cont.ma)
fit1 <- eBayes(fit1)
toptabcon <- topTable(fit, coef=2,5,adjust.method="fdr")
print(toptabcon[,1:5],digits=4)
## logFC AveExpr t P.Value adj.P.Val
## AFFX-hum_alu_at 13.42 13.50 326.0 3.165e-99 3.996e-95
## 32466_at 12.68 12.70 306.3 1.333e-97 8.413e-94
## 32748_at 12.08 12.11 296.3 9.771e-97 3.616e-93
## 35278_at 12.44 12.45 295.5 1.146e-96 3.616e-93
## 34593_g_at 12.64 12.58 278.0 4.431e-95 1.119e-91
toptabcon <- topTable(fit1, coef=2,5,adjust.method="fdr")
print(toptabcon[,1:5],digits=4)
## logFC AveExpr t P.Value adj.P.Val
## 33358_at 1.4890 5.260 7.374 5.737e-10 7.242e-06
## 1389_at -1.7852 9.262 -7.081 1.816e-09 9.744e-06
## 1914_at 2.0976 4.939 7.019 2.315e-09 9.744e-06
## 36873_at 1.8646 4.303 6.426 2.361e-08 7.452e-05
## 37471_at 0.8701 6.551 6.106 8.161e-08 2.061e-04
library(multtest); data(golub)
index <- grep("Cyclin",golub.gnames[,2])
golub.gnames[index,2]
## [1] "CCND2 Cyclin D2"
## [2] "CDK2 Cyclin-dependent kinase 2"
## [3] "CCND3 Cyclin D3"
## [4] "CDKN1A Cyclin-dependent kinase inhibitor 1A (p21, Cip1)"
## [5] "CCNH Cyclin H"
## [6] "Cyclin-dependent kinase 4 (CDK4) gene"
## [7] "Cyclin G2 mRNA"
## [8] "Cyclin A1 mRNA"
## [9] "Cyclin-selective ubiquitin carrier protein mRNA"
## [10] "CDK6 Cyclin-dependent kinase 6"
## [11] "Cyclin G1 mRNA"
## [12] "CCNF Cyclin F"
dist.cyclin <- dist(golub[index,],method="euclidian")
diam <- as.matrix(dist.cyclin)
rownames(diam) <- colnames(diam) <- golub.gnames[index,3]
diam[1:5,1:5]
## D13639_at M68520_at M92287_at U09579_at U11791_at
## D13639_at 0.000000 8.821806 11.55349 10.056814 8.669112
## M68520_at 8.821806 0.000000 11.70156 5.931260 2.934802
## M92287_at 11.553494 11.701562 0.00000 11.991333 11.900558
## U09579_at 10.056814 5.931260 11.99133 0.000000 5.698232
## U11791_at 8.669112 2.934802 11.90056 5.698232 0.000000
library("genefilter"); library("ALL"); data(ALL)
closeto1389_at <- genefinder(ALL, "1389_at", 10, method = "euc")
closeto1389_at[[1]]$indices
## [1] 2653 1096 6634 9255 6639 11402 9849 2274 8518 10736
round(closeto1389_at[[1]]$dists,1)
## [1] 12.6 12.8 12.8 12.8 13.0 13.0 13.1 13.2 13.3 13.4
featureNames(ALL)[closeto1389_at[[1]]$indices]
## [1] "32629_f_at" "1988_at" "36571_at" "39168_at" "36576_at"
## [6] "41295_at" "39756_g_at" "32254_at" "38438_at" "40635_at"
names <- list(c("g1","g2","g3","g4","g5"),c("p1","p2"))
sl.clus.dat <- matrix(c(1,1,1,1.1,3,2,3,2.3,5,5),ncol = 2,
byrow = TRUE,dimnames = names)
plot(sl.clus.dat,type="n", xlim=c(0,6), ylim=c(0,6))
text(sl.clus.dat,labels=row.names(sl.clus.dat))

print(dist(sl.clus.dat,method="euclidian"),digits=3)
## g1 g2 g3 g4
## g2 0.10
## g3 2.24 2.19
## g4 2.39 2.33 0.30
## g5 5.66 5.59 3.61 3.36
sl.out<-hclust(dist(sl.clus.dat,method="euclidian"),method="single")
plot(sl.out)

sl.out<-hclust(dist(rnorm(20,0,1),method="euclidian"),method="single")
plot(sl.out)

x <- c(rnorm(10,0,0.1),rnorm(10,3,0.5),rnorm(10,10,1.0))
plot(hclust(dist(x,method="euclidian"),method="single"))

plot(sl.out)

data(golub, package="multtest")
clusdata <- data.frame(golub[1042,],golub[2124,])
colnames(clusdata)<-c("CCND3 Cyclin D3","Zyxin")
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
plot(clusdata, pch=as.numeric(gol.fac))
legend("topright",legend=c("ALL","AML"),pch=1:2)

plot(hclust(dist(clusdata,method="euclidian"),method="single"))

data <- rbind(matrix(rnorm(100,0,0.5), ncol = 2),
matrix(rnorm(100,2,0.5), ncol = 2))
cl <- kmeans(data, 2)
plot(data, col = cl$cluster)
points(cl$centers, col = 1:2, pch = 8, cex=2)

initial <- matrix(c(0,0,2,2), nrow = 2, ncol=2, byrow=TRUE)
cl<- kmeans(data, initial, nstart = 10)
n <- 100; nboot<-1000
boot.cl <- matrix(0,nrow=nboot,ncol = 4)
for (i in 1:nboot){
dat.star <- data[sample(1:n,replace=TRUE),]
cl <- kmeans(dat.star, initial, nstart = 10)
boot.cl[i,] <- c(cl$centers[1,],cl$centers[2,])
}
quantile(boot.cl[,1],c(0.025,0.975))
## 2.5% 97.5%
## -0.1727041 0.1178634
quantile(boot.cl[,2],c(0.025,0.975))
## 2.5% 97.5%
## -0.08834266 0.13606280
quantile(boot.cl[,3],c(0.025,0.975))
## 2.5% 97.5%
## 1.847427 2.138265
quantile(boot.cl[,4],c(0.025,0.975))
## 2.5% 97.5%
## 1.785780 2.030579
data <- data.frame(golub[1042,],golub[2124,])
colnames(data)<-c("CCND3 Cyclin D3","Zyxin")
cl <- kmeans(data, 2,nstart = 10)
cl
## K-means clustering with 2 clusters of sizes 11, 27
##
## Cluster means:
## CCND3 Cyclin D3 Zyxin
## 1 0.6355909 1.5866682
## 2 1.8938826 -0.2947926
##
## Clustering vector:
## [1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1
## [36] 1 1 1
##
## Within cluster sum of squares by cluster:
## [1] 4.733248 19.842225
## (between_SS / total_SS = 62.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"
quantile(boot.cl[,1],c(0.025,0.975))
## 2.5% 97.5%
## -0.1727041 0.1178634
quantile(boot.cl[,2],c(0.025,0.975))
## 2.5% 97.5%
## -0.08834266 0.13606280
quantile(boot.cl[,3],c(0.025,0.975))
## 2.5% 97.5%
## 1.847427 2.138265
quantile(boot.cl[,4],c(0.025,0.975))
## 2.5% 97.5%
## 1.785780 2.030579
library(multtest); data(golub)
x <- golub[2289,]; y <- golub[2430,]
cor(x,y)
## [1] 0.6376217
cor.test(x,y)
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 4.9662, df = 36, p-value = 1.666e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3993383 0.7952115
## sample estimates:
## cor
## 0.6376217
nboot <- 1000; boot.cor <- matrix(0,nrow=nboot,ncol = 1)
data <- matrix(c(x,y),ncol=2,byrow=FALSE)
for (i in 1:nboot){
dat.star <- data[sample(1:nrow(data),replace=TRUE),]
boot.cor[i,] <- cor(dat.star)[2,1]}
mean(boot.cor)
## [1] 0.6432279
quantile(boot.cor[,1],c(0.025,0.975))
## 2.5% 97.5%
## 0.2389299 0.9248476
library(multtest); data(golub)
corgol<- apply(golub, 1, function(x) cor(x,golub.cl))
o <- order(corgol)
eigen(cor(golub))$values[1:5]
## [1] 25.4382629 2.0757158 1.2484411 1.0713373 0.7365232
data <- golub; p <- ncol(data); n <- nrow(data) ; nboot<-1000
eigenvalues <- array(dim=c(nboot,p))
for (i in 1:nboot){dat.star <- data[sample(1:n,replace=TRUE),]
eigenvalues[i,] <- eigen(cor(dat.star))$values}
for (j in 1:p) print(quantile(eigenvalues[,j],c(0.025,0.975)))
## 2.5% 97.5%
## 24.86513 26.00970
## 2.5% 97.5%
## 1.933884 2.245744
## 2.5% 97.5%
## 1.143758 1.385539
## 2.5% 97.5%
## 0.9959979 1.1453831
## 2.5% 97.5%
## 0.6867921 0.8053297
## 2.5% 97.5%
## 0.5261660 0.6061389
## 2.5% 97.5%
## 0.4865392 0.5585574
## 2.5% 97.5%
## 0.4419577 0.5171381
## 2.5% 97.5%
## 0.3910773 0.4500239
## 2.5% 97.5%
## 0.3684624 0.4211697
## 2.5% 97.5%
## 0.3084073 0.3540221
## 2.5% 97.5%
## 0.2882908 0.3295121
## 2.5% 97.5%
## 0.2717718 0.3081364
## 2.5% 97.5%
## 0.2528265 0.2881842
## 2.5% 97.5%
## 0.2354121 0.2660459
## 2.5% 97.5%
## 0.2243074 0.2520104
## 2.5% 97.5%
## 0.2143896 0.2417358
## 2.5% 97.5%
## 0.2055815 0.2312879
## 2.5% 97.5%
## 0.1937281 0.2183918
## 2.5% 97.5%
## 0.1833810 0.2075669
## 2.5% 97.5%
## 0.1755144 0.1994077
## 2.5% 97.5%
## 0.1685520 0.1901211
## 2.5% 97.5%
## 0.1626335 0.1827831
## 2.5% 97.5%
## 0.1564219 0.1749540
## 2.5% 97.5%
## 0.1502405 0.1692852
## 2.5% 97.5%
## 0.1455630 0.1638323
## 2.5% 97.5%
## 0.1407281 0.1576954
## 2.5% 97.5%
## 0.1364985 0.1526834
## 2.5% 97.5%
## 0.1326813 0.1481741
## 2.5% 97.5%
## 0.1280307 0.1429988
## 2.5% 97.5%
## 0.1233945 0.1385494
## 2.5% 97.5%
## 0.1192567 0.1342291
## 2.5% 97.5%
## 0.1156207 0.1299444
## 2.5% 97.5%
## 0.1106859 0.1253995
## 2.5% 97.5%
## 0.1046511 0.1197956
## 2.5% 97.5%
## 0.09850563 0.11377120
## 2.5% 97.5%
## 0.0932918 0.1073594
## 2.5% 97.5%
## 0.08287218 0.09662925
for (j in 1:5) cat(j,as.numeric(quantile(eigenvalues[,j],
c(0.025,0.975))),"\n" )
## 1 24.86513 26.0097
## 2 1.933884 2.245744
## 3 1.143758 1.385539
## 4 0.9959979 1.145383
## 5 0.6867921 0.8053297
pca <- princomp(golub, center = TRUE, cor=TRUE, scores=TRUE)
## Warning: In princomp.default(golub, center = TRUE, cor = TRUE, scores = TRUE) :
## extra argument 'center' will be disregarded
o <- order(pca$scores[,2])
golub.gnames[o[1:10],2]
## [1] "TCL1 gene (T cell leukemia) extracted from H.sapiens mRNA for Tcell leukemia/lymphoma 1"
## [2] "CD24 signal transducer mRNA and 3' region"
## [3] "GB DEF = (lambda) DNA for immunoglobin light chain"
## [4] "MB-1 gene"
## [5] "Terminal transferase mRNA"
## [6] "IGB Immunoglobulin-associated beta (B29)"
## [7] "C-myb gene extracted from Human (c-myb) gene, complete primary cds, and five complete alternatively spliced cds"
## [8] "Adenosine triphosphatase, calcium"
## [9] "RETINOBLASTOMA BINDING PROTEIN P48"
## [10] "Cytoplasmic dynein light chain 1 (hdlc1) mRNA"
golub.gnames[o[3041:3051],2]
## [1] "GB DEF = Cystic fibrosis antigen mRNA"
## [2] "CYSTATIN A"
## [3] "LYZ Lysozyme"
## [4] "MACROPHAGE INFLAMMATORY PROTEIN 1-ALPHA PRECURSOR"
## [5] "GRO2 GRO2 oncogene"
## [6] "Azurocidin gene"
## [7] "LGALS3 Lectin, galactoside-binding, soluble, 3 (galectin 3) (NOTE: redefinition of symbol)"
## [8] "DF D component of complement (adipsin)"
## [9] "CST3 Cystatin C (amyloid angiopathy and cerebral hemorrhage)"
## [10] "Interleukin 8 (IL8) gene"
## [11] "INTERLEUKIN-8 PRECURSOR"
biplot(princomp(data,cor=TRUE),pc.biplot=TRUE,cex=0.5,expand=0.8)

data(golub, package = "multtest")
factor <- factor(golub.cl)
o1 <- grep("CD",golub.gnames[,2])
o2 <- grep("Op",golub.gnames[,2])
o3 <- grep("MCM",golub.gnames[,2])
o <- c(o1,o2,o3)
pt <- apply(golub, 1, function(x) t.test(x ~ gol.fac)$p.value)
oo <- o[pt[o]<0.01]
Z <- as.matrix(scale(golub, center = TRUE, scale = TRUE))
K <- eigen(cor(Z))
P <- Z %*% -K$vec[,1:2]
leu <- data.frame(P[oo,], row.names= oo)
attach(leu)
cl <- hclust(dist(leu,method="euclidian"),method="single")
plot(cl)

a <- as.integer(rownames(leu)[cl$order])
for (i in 1:length(a)) cat(a[i],golub.gnames[a[i],2],"\n")
## 1910 FCGR2B Fc fragment of IgG, low affinity IIb, receptor for (CD32)
## 2874 GB DEF = Fas (Apo-1, CD95)
## 2350 SLC6A8 gene (creatine transporter) extracted from Human Xq28 cosmid, creatine transporter (SLC6A8) gene, and CDM gene, partial cds
## 1737 TFRC Transferrin receptor (p90, CD71)
## 1911 ME491 gene extracted from H.sapiens gene for Me491/CD63 antigen
## 2749 ITGAX Integrin, alpha X (antigen CD11C (p150), alpha polypeptide)
## 2761 CD36 CD36 antigen (collagen type I receptor, thrombospondin receptor)
## 792 ANPEP Alanyl (membrane) aminopeptidase (aminopeptidase N, aminopeptidase M, microsomal aminopeptidase, CD13)
## 2321 ICAM1 Intercellular adhesion molecule 1 (CD54), human rhinovirus receptor
## 68 PRKCD Protein kinase C, delta
## 808 CD33 CD33 antigen (differentiation antigen)
## 2611 Cell division control related protein (hCDCrel-1) mRNA
## 450 Opioid-Binding Cell Adhesion Molecule
## 504 CD3Z CD3Z antigen, zeta polypeptide (TiT3 complex)
## 313 CD38 CD38 antigen (p45)
## 1756 CD3G CD3G antigen, gamma polypeptide (TiT3 complex)
## 893 CD72 CD72 antigen
## 182 NT5 5' nucleotidase (CD73)
## 2653 CD2 CD2 antigen (p50), sheep red blood cell receptor
## 2289 MCM3 Minichromosome maintenance deficient (S. cerevisiae) 3
## 2430 MCM3 Minichromosome maintenance deficient (S. cerevisiae) 3
## 316 P105MCM mRNA
## 1616 KAI1 Kangai 1 (suppression of tumorigenicity 6, prostate; CD82 antigen (R2 leukocyte antigen, antigen detected by monoclonal and antibody IA4))
## 1101 CDC10 Cell division cycle 10 (homologous to CDC10 of S. cerevisiae
## 2673 CD19 gene
## 2459 CD24 signal transducer mRNA and 3' region
## 892 CD9 CD9 antigen
## 1882 CD22 CD22 antigen
## 885 CD53 CD53 antigen
## 849 Oncoprotein 18 (Op18) gene
## 2397 T-CELL ANTIGEN CD7 PRECURSOR
## 1754 Catalase (EC 1.11.1.6) 5'flank and exon 1 mapping to chromosome 11, band p13 (and joined CDS)
## 1798 CD37 CD37 antigen
## 2233 GB DEF = CD36 gene exon 15
data(golub, package = "multtest")
gol.true <- factor(golub.cl,levels=0:1,labels= c(" ALL","not ALL"))
gol.pred <- factor(golub[1042,]>1.27,levels=c("TRUE","FALSE"),
labels=c("ALL","notALL"))
table(gol.pred,gol.true)
## gol.true
## gol.pred ALL not ALL
## ALL 25 1
## notALL 2 10
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:multtest':
##
## wapply
## The following object is masked from 'package:stats':
##
## lowess
gol.true <- factor(golub.cl,levels=0:1,labels= c("TRUE","FALSE"))
pred <- prediction(golub[1042,], gol.true)
perf <- performance(pred, "tpr", "fpr" )
plot(perf)

set.seed(123); n<-10 ; sigma <- 0.5
fac <- factor(c(rep(1,n),rep(2,n),rep(3,n)))
levels(fac) <- c("ALL1","ALL2","AML")
geneA <- c(rnorm(10,0,sigma),rnorm(10,2,sigma),rnorm(10,4,sigma))
dat <- data.frame(fac,geneA)
library(rpart)
rp <- rpart(fac ~ geneA, method="class",data=dat)
plot(rp, branch=0,margin=0.1); text(rp, digits=3, use.n=TRUE)

set.seed(123)
n<-10 ; sigma <- 0.5
fac <- factor(c(rep(1,n),rep(2,n),rep(3,n)))
levels(fac) <- c("ALL1","ALL2","AML")
geneA <- c(rnorm(20,0,sigma),rnorm(10,2,sigma))
geneB <- c(rnorm(10,0,sigma),rnorm(20,2,sigma))
geneC <- c(rnorm(30,1,sigma))
dat <- data.frame(fac,geneA,geneB,geneC)
library(rpart)
rp <- rpart(fac ~ geneA + geneB + geneC, method="class",data=dat)
summary(rp)
## Call:
## rpart(formula = fac ~ geneA + geneB + geneC, data = dat, method = "class")
## n= 30
##
## CP nsplit rel error xerror xstd
## 1 0.50 0 1 1.35 0.08215838
## 2 0.01 2 0 0.85 0.13570802
##
## Variable importance
## geneB geneA geneC
## 45 39 15
##
## Node number 1: 30 observations, complexity param=0.5
## predicted class=ALL1 expected loss=0.6666667 P(node) =1
## class counts: 10 10 10
## probabilities: 0.333 0.333 0.333
## left son=2 (10 obs) right son=3 (20 obs)
## Primary splits:
## geneB < 0.8365932 to the left, improve=10.000000, (0 missing)
## geneA < 1.025055 to the left, improve=10.000000, (0 missing)
## geneC < 0.7845772 to the left, improve= 2.159091, (0 missing)
## Surrogate splits:
## geneA < 0.04529778 to the left, agree=0.767, adj=0.3, (0 split)
##
## Node number 2: 10 observations
## predicted class=ALL1 expected loss=0 P(node) =0.3333333
## class counts: 10 0 0
## probabilities: 1.000 0.000 0.000
##
## Node number 3: 20 observations, complexity param=0.5
## predicted class=ALL2 expected loss=0.5 P(node) =0.6666667
## class counts: 0 10 10
## probabilities: 0.000 0.500 0.500
## left son=6 (10 obs) right son=7 (10 obs)
## Primary splits:
## geneA < 1.025055 to the left, improve=10.000000, (0 missing)
## geneB < 1.96844 to the left, improve= 2.525253, (0 missing)
## geneC < 0.8736851 to the left, improve= 1.666667, (0 missing)
## Surrogate splits:
## geneB < 1.96844 to the left, agree=0.75, adj=0.5, (0 split)
## geneC < 0.7845772 to the left, agree=0.75, adj=0.5, (0 split)
##
## Node number 6: 10 observations
## predicted class=ALL2 expected loss=0 P(node) =0.3333333
## class counts: 0 10 0
## probabilities: 0.000 1.000 0.000
##
## Node number 7: 10 observations
## predicted class=AML expected loss=0 P(node) =0.3333333
## class counts: 0 0 10
## probabilities: 0.000 0.000 1.000
library(rpart);data(golub); library(multtest)
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
gol.rp <- rpart(gol.fac ~ golub[1042,] , method="class")
predictedclass <- predict(gol.rp, type="class")
table(predictedclass, gol.fac)
## gol.fac
## predictedclass ALL AML
## ALL 25 1
## AML 2 10
summary(gol.rp)
## Call:
## rpart(formula = gol.fac ~ golub[1042, ], method = "class")
## n= 38
##
## CP nsplit rel error xerror xstd
## 1 0.7272727 0 1.0000000 1.0000000 0.2541521
## 2 0.0100000 1 0.2727273 0.3636364 0.1719828
##
## Variable importance
## golub[1042, ]
## 100
##
## Node number 1: 38 observations, complexity param=0.7272727
## predicted class=ALL expected loss=0.2894737 P(node) =1
## class counts: 27 11
## probabilities: 0.711 0.289
## left son=2 (26 obs) right son=3 (12 obs)
## Primary splits:
## golub[1042, ] < 1.198515 to the right, improve=10.37517, (0 missing)
##
## Node number 2: 26 observations
## predicted class=ALL expected loss=0.03846154 P(node) =0.6842105
## class counts: 25 1
## probabilities: 0.962 0.038
##
## Node number 3: 12 observations
## predicted class=AML expected loss=0.1666667 P(node) =0.3157895
## class counts: 2 10
## probabilities: 0.167 0.833
predict(gol.rp,type="class")
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## ALL ALL ALL ALL ALL ALL ALL ALL ALL ALL ALL ALL ALL ALL ALL ALL AML ALL
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## ALL ALL AML ALL ALL ALL ALL ALL ALL AML ALL AML AML AML AML AML AML AML
## 37 38
## AML AML
## Levels: ALL AML
predict(gol.rp, type="prob")
## ALL AML
## 1 0.9615385 0.03846154
## 2 0.9615385 0.03846154
## 3 0.9615385 0.03846154
## 4 0.9615385 0.03846154
## 5 0.9615385 0.03846154
## 6 0.9615385 0.03846154
## 7 0.9615385 0.03846154
## 8 0.9615385 0.03846154
## 9 0.9615385 0.03846154
## 10 0.9615385 0.03846154
## 11 0.9615385 0.03846154
## 12 0.9615385 0.03846154
## 13 0.9615385 0.03846154
## 14 0.9615385 0.03846154
## 15 0.9615385 0.03846154
## 16 0.9615385 0.03846154
## 17 0.1666667 0.83333333
## 18 0.9615385 0.03846154
## 19 0.9615385 0.03846154
## 20 0.9615385 0.03846154
## 21 0.1666667 0.83333333
## 22 0.9615385 0.03846154
## 23 0.9615385 0.03846154
## 24 0.9615385 0.03846154
## 25 0.9615385 0.03846154
## 26 0.9615385 0.03846154
## 27 0.9615385 0.03846154
## 28 0.1666667 0.83333333
## 29 0.9615385 0.03846154
## 30 0.1666667 0.83333333
## 31 0.1666667 0.83333333
## 32 0.1666667 0.83333333
## 33 0.1666667 0.83333333
## 34 0.1666667 0.83333333
## 35 0.1666667 0.83333333
## 36 0.1666667 0.83333333
## 37 0.1666667 0.83333333
## 38 0.1666667 0.83333333
library(rpart);data(golub); library(multtest)
row.names(golub)<- paste("gene", 1:3051, sep = "")
goldata <- data.frame(t(golub[1:3051,]))
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
gol.rp <- rpart(gol.fac~., data=goldata, method="class", cp=0.001)
plot(gol.rp, branch=0,margin=0.1); text(gol.rp, digits=3, use.n=TRUE)

golub.gnames[896,]
## [1] "2020" "FAH Fumarylacetoacetate"
## [3] "M55150_at"
library(faraway)
##
## Attaching package: 'faraway'
## The following object is masked from 'package:rpart':
##
## solder
logitmod <- glm((-golub.cl + 1) ~ golub[1042,],
family=binomial(link = "logit"))
pchisq(deviance(logitmod),df.residual(logitmod),lower=FALSE)
## [1] 0.99385
plot((-golub.cl + 1) ~ golub[1042,], xlim=c(-2,5), ylim = c(0,1),
xlab="CCND3 expression values ", ylab="Probability of ALL")
x <- seq(-2,5,.1)
lines(x,ilogit(-4.844124 + 4.439953*x))

pchisq(deviance(logitmod),df.residual(logitmod),lower=FALSE)
## [1] 0.99385
summary(logitmod)
##
## Call:
## glm(formula = (-golub.cl + 1) ~ golub[1042, ], family = binomial(link = "logit"))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8864 -0.1305 0.1371 0.2707 2.3950
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.844 1.849 -2.620 0.00880 **
## golub[1042, ] 4.440 1.488 2.984 0.00284 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 45.728 on 37 degrees of freedom
## Residual deviance: 18.270 on 36 degrees of freedom
## AIC: 22.27
##
## Number of Fisher Scoring iterations: 6
pred <- predict(logitmod,type="response") > 0.5
pred.fac <- factor(pred,levels=c(TRUE,FALSE),labels=c("ALL","not ALL"))
table(pred.fac,gol.fac)
## gol.fac
## pred.fac ALL AML
## ALL 26 2
## not ALL 1 9
library(seqinr)
##
## Attaching package: 'seqinr'
## The following object is masked from 'package:limma':
##
## zscore
## The following objects are masked from 'package:ape':
##
## as.alignment, consensus
choosebank()
## [1] "genbank" "embl" "emblwgs"
## [4] "swissprot" "ensembl" "hogenom7"
## [7] "hogenom" "hogenomdna" "hovergendna"
## [10] "hovergen" "hogenom5" "hogenom5dna"
## [13] "hogenom4" "hogenom4dna" "homolens"
## [16] "homolensdna" "hobacnucl" "hobacprot"
## [19] "phever2" "phever2dna" "refseq"
## [22] "greviews" "bacterial" "archaeal"
## [25] "protozoan" "ensprotists" "ensfungi"
## [28] "ensmetazoa" "ensplants" "ensemblbacteria"
## [31] "mito" "polymorphix" "emglib"
## [34] "refseqViruses" "ribodb" "taxodb"
choosebank("genbank")
query("ccnd","k=ccnd",virtual=TRUE)$nelem
## [1] 7
query("ccnd3hs","sp=homo sapiens AND k=ccnd3",virtual=TRUE)$nelem
## [1] 6
library(seqinr)
x <- s2c("GAATTC"); y <- s2c("GATTA"); d <- 2
s <- matrix(data=NA,nrow=length(y),ncol=length(x))
for (i in 1:(nrow(s))) for (j in 1:(ncol(s)))
{if (y[i]==x[j]) s[i,j]<- 2 else s[i,j]<- -1 }
rownames(s) <- c(y); colnames(s) <- c(x)
s
## G A A T T C
## G 2 -1 -1 -1 -1 -1
## A -1 2 2 -1 -1 -1
## T -1 -1 -1 2 2 -1
## T -1 -1 -1 2 2 -1
## A -1 2 2 -1 -1 -1
F <- matrix(data=NA,nrow=(length(y)+1),ncol=(length(x)+1))
rownames(F) <- c("",y); colnames(F) <- c("",x)
F[,1] <- -seq(0,length(y)*d,d); F[1,] <- -seq(0,length(x)*d,d)
for (i in 2:(nrow(F)))
for (j in 2:(ncol(F)))
{F[i,j] <- max(c(F[i-1,j-1]+s[i-1,j-1],F[i-1,j]-d,F[i,j-1]-d))}
F
## G A A T T C
## 0 -2 -4 -6 -8 -10 -12
## G -2 2 0 -2 -4 -6 -8
## A -4 0 4 2 0 -2 -4
## T -6 -2 2 3 4 2 0
## T -8 -4 0 1 5 6 4
## A -10 -6 -2 2 3 4 5
library(seqinr);library(Biostrings);data(BLOSUM50)
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:gplots':
##
## space
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: XVector
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:seqinr':
##
## translate
## The following object is masked from 'package:ape':
##
## complement
## The following object is masked from 'package:base':
##
## strsplit
x <- s2c("HEAGAWGHEE"); y <- s2c("PAWHEAE"); s <- BLOSUM50[y,x]; d <- 8
F <- matrix(data=NA,nrow=(length(y)+1),ncol=(length(x)+1))
F[1,] <- -seq(0,80,8); F[,1] <- -seq(0,56,8)
rownames(F) <- c("",y); colnames(F) <- c("",x)
for (i in 2:(nrow(F)))
for (j in 2:(ncol(F)))
{F[i,j] <- max(c(F[i-1,j-1]+s[i-1,j-1],F[i-1,j]-d,F[i,j-1]-d))}
F
## H E A G A W G H E E
## 0 -8 -16 -24 -32 -40 -48 -56 -64 -72 -80
## P -8 -2 -9 -17 -25 -33 -41 -49 -57 -65 -73
## A -16 -10 -3 -4 -12 -20 -28 -36 -44 -52 -60
## W -24 -18 -11 -6 -7 -15 -5 -13 -21 -29 -37
## H -32 -14 -18 -13 -8 -9 -13 -7 -3 -11 -19
## E -40 -22 -8 -16 -16 -9 -12 -15 -7 3 -5
## A -48 -30 -16 -3 -11 -11 -12 -12 -15 -5 2
## E -56 -38 -24 -11 -6 -12 -14 -15 -12 -9 1
library(Biostrings);data(BLOSUM50)
pairwiseAlignment(AAString("PAWHEAE"), AAString("HEAGAWGHEE"),
substitutionMatrix = "BLOSUM50",gapOpening = 0, gapExtension = -8,
scoreOnly = FALSE)
## Global PairwiseAlignmentsSingleSubject (1 of 1)
## pattern: [1] PA--W-HEAE
## subject: [2] EAGAWGHE-E
## score: 1
library(seqinr);library(Biostrings);data(BLOSUM50)
randallscore <- double()
for (i in 1:1000) {
x <- c2s(sample(rownames(BLOSUM50),7, replace=TRUE))
y <- c2s(sample(rownames(BLOSUM50),10, replace=TRUE))
randallscore[i] <- pairwiseAlignment(AAString(x), AAString(y),
substitutionMatrix = "BLOSUM50",gapOpening = 0, gapExtension = -8,
scoreOnly = TRUE)
}
sum(randallscore>1)/1000
## [1] 0.003
markov1 <- function(x,P,n){ seq <- x
for(k in 1:(n-1)){
seq[k+1] <- sample(x, 1, replace=TRUE, P[seq[k],])}
return(seq)
}
P <- matrix(c(1/6,5/6,0.5,0.5), 2, 2, byrow=TRUE)
rownames(P) <- colnames(P) <- StateSpace <- x <- c(1,2)
markov1(x,P,30)
## [1] 1 2 2 1 2 1 2 1 2 1 2 1 2 1 2 2 1 2 1 1 2 2 1 1 2 1 2 2 2 1
markov2 <- function(StateSpace,P,pi0,n){
seq <- character(n)
seq[1] <- sample(StateSpace, 1, replace=TRUE, pi0)
for(k in 1:(n-1)){
seq[k+1] <- sample(StateSpace, 1, replace=TRUE, P[seq[k],])}
return(seq)
}
P <- matrix(c(
1/6,5/6,0,0,
1/8,2/4,1/4,1/8,
0,2/6,3/6,1/6,
0,1/6,3/6,2/6),4,4,byrow=TRUE)
rownames(P) <- colnames(P) <- StateSpace <- c("a","c","g","t")
pi0 <- c(1/4,1/4,1/4,1/4)
x <- markov2(StateSpace,P,pi0,1000)
table(x)
## x
## a c g t
## 62 418 371 149
pi0 <- c(1/4,1/4,1/4,1/4)
P <- matrix(c(.01,.01,.01,.97,
.01,.01,.01,.97,
.01,.01,.01,.97,
.01,.28,.01,0.70),4,4,byrow=T)
rownames(P) <- colnames(P) <- StateSpace <- c("a","c","g","t")
x <- markov2(StateSpace,P,pi0,30000)
table(getTrans(x))
##
## * A C D F H I L M N P R S T V
## 1 1 68 2 5298 16 103 2140 1 1 21 23 2158 1 99
## W Y
## 2 65
nr <- count(x,2)
names(nr)
## [1] "aa" "ac" "ag" "at" "ca" "cc" "cg" "ct" "ga" "gc" "gg" "gt" "ta" "tc"
## [15] "tg" "tt"
A <- matrix(NA,4,4)
A[1,1]<-nr["aa"]; A[1,2]<-nr["ag"]; A[1,3]<-nr["ac"]; A[1,4]<-nr["at"]
A[2,1]<-nr["ga"]; A[2,2]<-nr["gg"]; A[2,3]<-nr["gc"]; A[2,4]<-nr["gt"]
A[3,1]<-nr["ca"]; A[3,2]<-nr["cg"]; A[3,3]<-nr["cc"]; A[3,4]<-nr["ct"]
A[4,1]<-nr["ta"]; A[4,2]<-nr["tg"]; A[4,3]<-nr["tc"]; A[4,4]<-nr["tt"]
rowsumA <- apply(A, 1, sum)
Phat <- sweep(A, 1, rowsumA, FUN="/")
rownames(Phat) <- colnames(Phat) <- c("a","g","c","t")
round(Phat,3)
## a g c t
## a 0.013 0.006 0.016 0.965
## g 0.014 0.014 0.014 0.958
## c 0.009 0.013 0.010 0.969
## t 0.011 0.009 0.276 0.705
P <- matrix(c(5/6,1/6,0.5,0.5),2,2,byrow=T)
pi0 <- c(2/3,1/3)
pi0 %*% P
## [,1] [,2]
## [1,] 0.7222222 0.2777778
P %*% P
## [,1] [,2]
## [1,] 0.7777778 0.2222222
## [2,] 0.6666667 0.3333333
P <- matrix(c(1/6,5/6,0.5,0.5),2,2,byrow=T)
V <- eigen(P,symmetric = FALSE)
V$values
## [1] 1.0000000 -0.3333333
V$vectors
## [,1] [,2]
## [1,] -0.7071068 -0.8574929
## [2,] -0.7071068 0.5144958
V$vec %*% diag(V$va)^(16) %*% solve(V$vec)
## [,1] [,2]
## [1,] 0.375 0.625
## [2,] 0.375 0.625
P <- matrix(c(1,0,0, 1/4,1/2,1/4,0,0,1),3,3,byrow=T)
V <- eigen(P,symmetric = FALSE)
V$vec %*% diag(V$va)^(5) %*% solve(V$vec)
## [,1] [,2] [,3]
## [1,] 1.000000 0.00000 0.000000
## [2,] 0.484375 0.03125 0.484375
## [3,] 0.000000 0.00000 1.000000
library(Matrix)
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
##
## expand
Q <- 0.2 * Matrix(c(-3,1,1,1,1,-3,1,1,1,1,-3,1,1,1,1,-3),4)
rownames(Q) <- colnames(Q) <- c("A","G","C","T")
P <- as.matrix(expm(Q))
round(P,2)
## A G C T
## A 0.59 0.14 0.14 0.14
## G 0.14 0.59 0.14 0.14
## C 0.14 0.14 0.59 0.14
## T 0.14 0.14 0.14 0.59
alpha <- 3/6; beta <- 2/6; gamma<- 1/6; Q <- matrix(data=NA,4,4)
Q[1,2] <- Q[2,1] <- Q[3,4] <- Q[4,3] <- alpha
Q[1,3] <- Q[3,1] <- Q[2,4] <- Q[4,2] <- beta
Q[1,4] <- Q[4,1] <- Q[2,3] <- Q[3,2] <- gamma
diag(Q) <- -(alpha + beta + gamma)
Q
## [,1] [,2] [,3] [,4]
## [1,] -1.0000000 0.5000000 0.3333333 0.1666667
## [2,] 0.5000000 -1.0000000 0.1666667 0.3333333
## [3,] 0.3333333 0.1666667 -1.0000000 0.5000000
## [4,] 0.1666667 0.3333333 0.5000000 -1.0000000
Q <- Matrix(Q)
P <- as.matrix(expm(Q))
P
## [,1] [,2] [,3] [,4]
## [1,] 0.4550880 0.2288517 0.1767105 0.1393498
## [2,] 0.2288517 0.4550880 0.1393498 0.1767105
## [3,] 0.1767105 0.1393498 0.4550880 0.2288517
## [4,] 0.1393498 0.1767105 0.2288517 0.4550880
library(Matrix)
alpha <- 1/5; Q <- matrix(rep(alpha,16),4,4)
diag(Q) <- -3 * alpha
Q <- Matrix(Q)
P <- as.matrix(expm(Q))
V <- eigen(P,symmetric = FALSE)
V$vec %*% diag(V$va)^(50) %*% solve(V$vec)
## [,1] [,2] [,3] [,4]
## [1,] 0.25 0.25 0.25 0.25
## [2,] 0.25 0.25 0.25 0.25
## [3,] 0.25 0.25 0.25 0.25
## [4,] 0.25 0.25 0.25 0.25
library(ape);library(seqinr)
accnr <- paste("AJ5345",26:27,sep="")
seqbin <- read.GenBank(accnr, species.names = TRUE, as.character = FALSE)
dist.dna(seqbin, model = "JC69")
## AJ534526
## AJ534527 0.1326839
library(ape);library(seqinr)
accnr <- paste("AJ5345",26:35,sep="")
seq <- read.GenBank(accnr)
names(seq) <- attr(seq, "species")
dist <- dist.dna(seq, model = "K80")
plot(nj(dist))

data(golub, package="multtest")
rowindex <- agrep("^oncogene",golub.gnames[,2])
oncogol <- golub[rowindex,]
oncogolub.gnames <- golub.gnames[rowindex,]
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
meangol <- apply(oncogol[,gol.fac=="ALL"],1,mean)
o <- order(meangol,decreasing=TRUE)
oncogolub.gnames[o[1:3],2]
## [1] "Cellular oncogene c-fos (complete sequence)"
## [2] "GB DEF = VAV oncogene"
## [3] "FYN FYN oncogene related to SRC, FGR, YES"
meangol <- apply(oncogol[,gol.fac=="AML"],1,mean)
o <- order(meangol,decreasing=TRUE)
oncogolub.gnames[o[1:3],2]
## [1] "PIM1 Pim-1 oncogene" "JUNB Jun B proto-oncogene"
## [3] "Proto-oncogene BCL3 gene"
data(golub, package="multtest")
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
efs <- apply(golub[,gol.fac=="ALL"],1,function(x) mean(x)/sd(x))
o <- order(efs,decreasing=TRUE)
efs[o[1:5]]
## [1] 11.138128 10.638308 9.155108 8.954115 8.695353
golub.gnames[o[1:5],2]
## [1] "YWHAZ Tyrosine 3-monooxygenase/tryptophan 5-monooxygenase activation protein, zeta polypeptide"
## [2] "ZNF91 Zinc finger protein 91 (HPF7, HTF10)"
## [3] "HnRNP-E2 mRNA"
## [4] "54 kDa protein mRNA"
## [5] "Immunophilin homolog ARA9 mRNA"
refs <- apply(golub[,gol.fac=="ALL"],1,function(x) median(x)/mad(x))
o <- order(refs,decreasing=TRUE)
refs[o[1:5]]
## [1] 14.51217 13.57425 13.27698 13.14419 12.91608
golub.gnames[o[1:5],2]
## [1] "COX6B gene (COXG) extracted from Human DNA from overlapping chromosome 19 cosmids R31396, F25451, and R31076 containing COX6B and UPKA, genomic sequence"
## [2] "AFFX-HSAC07/X00351_M_at (endogenous control)"
## [3] "ATP5A1 ATP synthase, H+ transporting, mitochondrial F1 complex, alpha subunit, isoform 1, cardiac muscle"
## [4] "ATP SYNTHASE GAMMA CHAIN, MITOCHONDRIAL PRECURSOR"
## [5] "YWHAZ Tyrosine 3-monooxygenase/tryptophan 5-monooxygenase activation protein, zeta polypeptide"
data(golub, package = "multtest")
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
stripchart(golub[1042,] ~ gol.fac,method="jitter")

stripchart(golub[1042,] ~ gol.fac,method="jitter",vertical = TRUE)

stripchart(golub[1042,] ~ gol.fac,method="jitter",col=c("red", "blue"),
vertical = TRUE)

stripchart(golub[1042,] ~ gol.fac,method="jitter",col=c("red", "blue"),
pch="*",vertical = TRUE)
title("CCND3 Cyclin D3 expression value for ALL and AMl patients")

an <- sqrt(2*log(n)) - 0.5*(log(log(n))+log(4*pi))*(2*log(n))^(-1/2)
bn <- (2*log(n))^(-1/2)
e <- double(); n <- 10000 # Serfling p.90
for (i in 1:1000) e[i] <- (max(rnorm(n))-an)/bn
plot(density(e),ylim=c(0,0.5))
f<-function(x){exp(-x)*exp(-exp(-x))}
curve(f,range(density(e)$x),add=TRUE,col = "blue")
curve(dnorm,add=TRUE,col = "red")

library(multtest);data(golub)
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
shapiro.test(golub[i,gol.fac=="ALL"])
##
## Shapiro-Wilk normality test
##
## data: golub[i, gol.fac == "ALL"]
## W = 0.9667, p-value = 0.5173
library(multtest); data(golub)
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
pt <- apply(golub, 1, function(x) t.test(x ~ gol.fac)$p.value)
index <-agrep("^antigen",golub.gnames[,2])
golub.index<-golub[index,]
pt.index<-pt[index]
golub.gnames.index<-golub.gnames[index,]
golub.gnames.index[order(pt.index)[1:length(index)],2]
## [1] "ME491 gene extracted from H.sapiens gene for Me491/CD63 antigen"
## [2] "CD22 CD22 antigen"
## [3] "ITGAX Integrin, alpha X (antigen CD11C (p150), alpha polypeptide)"
## [4] "CD33 CD33 antigen (differentiation antigen)"
## [5] "CD38 CD38 antigen (p45)"
## [6] "CD37 CD37 antigen"
## [7] "CD2 CD2 antigen (p50), sheep red blood cell receptor"
## [8] "APS Prostate specific antigen"
## [9] "GB DEF = T-cell antigen receptor gene T3-delta"
## [10] "PCNA Proliferating cell nuclear antigen"
## [11] "KAI1 Kangai 1 (suppression of tumorigenicity 6, prostate; CD82 antigen (R2 leukocyte antigen, antigen detected by monoclonal and antibody IA4))"
## [12] "CD36 CD36 antigen (collagen type I receptor, thrombospondin receptor)"
## [13] "CD72 CD72 antigen"
## [14] "CD3Z CD3Z antigen, zeta polypeptide (TiT3 complex)"
## [15] "CD53 CD53 antigen"
## [16] "CD3G CD3G antigen, gamma polypeptide (TiT3 complex)"
## [17] "Autoantigen DFS70 mRNA, partial cds"
## [18] "CD9 CD9 antigen"
## [19] "CD58 CD58 antigen, (lymphocyte function-associated antigen 3)"
## [20] "CALR Autoantigen calreticulin"
## [21] "GB DEF = CD1 R2 gene for MHC-related antigen"
## [22] "ITGA4 Integrin, alpha 4 (antigen CD49D, alpha 4 subunit of VLA-4 receptor)"
## [23] "CD1B CD1b antigen (thymocyte antigen)"
## [24] "CD48 CD48 antigen (B-cell membrane protein)"
## [25] "Autoantigen p542 mRNA, 3' end of cds"
## [26] "CD69 CD69 antigen (early T cell activation antigen)"
## [27] "Hepatitis delta antigen interacting protein A (dipA) mRNA"
## [28] "GB DEF = Cystic fibrosis antigen mRNA"
## [29] "SSB Sjogren syndrome antigen B (autoantigen La)"
## [30] "NASP Nuclear autoantigenic sperm protein (histone-binding)"
## [31] "GB DEF = B-lymphocyte cell-surface antigen B1 (CD20)"
## [32] "PRTN3 Proteinase 3 (serine proteinase, neutrophil, Wegener granulomatosis autoantigen)"
## [33] "Renal cell carcinoma antigen RAGE-4 mRNA, complete putative cds"
## [34] "CDW52 CDW52 antigen (CAMPATH-1 antigen)"
## [35] "62 kDa paraneoplastic antigen mRNA, 3' end"
## [36] "CD59 CD59 antigen p18-20 (antigen identified by monoclonal antibodies 16.3A5, EJ16, EJ30, EL32 and G344)"
## [37] "Thyroid autoantigen (truncated actin-binding protein) mRNA"
## [38] "ITGAM Integrin, alpha M (complement component receptor 3, alpha; also known as CD11b (p170), macrophage antigen alpha polypeptide)"
## [39] "MAGE-11 antigen (MAGE11) gene"
## [40] "CD40 CD40 antigen"
## [41] "TRA1 Homologue of mouse tumor rejection antigen gp96"
## [42] "CD27 CD27 antigen (T cell activation antigen)"
## [43] "CD47 CD47 antigen (Rh-related antigen, integrin-associated signal transducer)"
## [44] "G22P1 Thyroid autoantigen 70kD (Ku antigen)"
## [45] "GB DEF = Proliferating cell nuclear antigen (PCNA) gene, promoter region"
## [46] "Melanoma antigen p15 mRNA"
## [47] "Breast tumor autoantigen mRNA, complete sequence"
## [48] "PECAM1 Platelet/endothelial cell adhesion molecule (CD31 antigen)"
## [49] "Golgi complex autoantigen golgin-97 mRNA"
## [50] "CGM7 Carcinoembryonic antigen gene family member 7"
## [51] "ITGA2B Integrin, alpha 2b (platelet glycoprotein IIb of IIb/IIIa complex, antigen CD41B)"
## [52] "Autoantigen mRNA"
## [53] "CD14 CD14 antigen"
## [54] "ITGAL Integrin, alpha L (antigen CD11A (p180), lymphocyte function-associated antigen 1; alpha polypeptide)"
## [55] "HLA-E MHC class I antigen HLA-E"
## [56] "Surface antigen mRNA"
## [57] "ITGB1 Integrin, beta 1 (fibronectin receptor, beta polypeptide, antigen CD29 includes MDF2, MSK12)"
## [58] "CD1C CD1c antigen (thymocyte antigen)"
## [59] "CD70 CD70 antigen (CD27 ligand)"
## [60] "Golgi antigen gcp372"
## [61] "CD5 CD5 antigen (p56-62)"
## [62] "CD97 CD97 antigen (leucocyte antigen)"
## [63] "KAI1 Kangai 1 (suppression of tumorigenicity 6, prostate; CD82 antigen (R2 leukocyte antigen, antigen detected by monoclonal and antibody IA4))"
## [64] "CD40LG CD40 antigen ligand (hyper IgM syndrome)"
## [65] "Autoantigen pericentriol material 1 (PCM-1) mRNA"
## [66] "ANX11 Annexin XI (56kD autoantigen)"
## [67] "Class I histocompatibility antigen-like protein mRNA"
## [68] "HLA-SB alpha gene (class II antigen) extracted from Human HLA-SB(DP) alpha gene"
## [69] "MAGE-5a antigen (MAGE5a) gene"
## [70] "A33 antigen precursor mRNA"
## [71] "CD34 CD34 antigen (hemopoietic progenitor cell antigen)"
## [72] "CD1A CD1a antigen (thymocyte antigen)"
## [73] "Nucleolar autoantigen No55 mRNA"
## [74] "Breast epithelial antigen BA46 mRNA"
## [75] "CD8A CD8 antigen, alpha polypeptide (p32)"
all66 <- golub[66,gol.fac=="ALL"]
all790 <- golub[790,gol.fac=="ALL"]
boxplot(all66,all790)

mean(all66);mean(all790)
## [1] 1.182503
## [1] -1.174024
median(all66);median(all790)
## [1] 1.23023
## [1] -1.28137
sd(all66);sd(all790)
## [1] 0.4408175
## [1] 0.4809953
IQR(all66)/1.349 ;IQR(all790)/1.349
## [1] 0.4279985
## [1] 0.1532024
mean(all66);mean(all790)
## [1] 1.182503
## [1] -1.174024
mad(all66);mad(all790)
## [1] 0.4523413
## [1] 0.1864814
shapiro.test(all66);shapiro.test(all790)
##
## Shapiro-Wilk normality test
##
## data: all66
## W = 0.98698, p-value = 0.9758
##
## Shapiro-Wilk normality test
##
## data: all790
## W = 0.66224, p-value = 1.206e-06
library(multtest);data(golub)
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
allsh <- apply(golub[,gol.fac=="ALL"], 1, function(x) shapiro.test(x)$p.value)
amlsh <- apply(golub[,gol.fac=="AML"], 1, function(x) shapiro.test(x)$p.value)
100 * sum(allsh>0.05)/length(allsh)
## [1] 58.27598
100 * sum(amlsh>0.05)/length(amlsh)
## [1] 78.56441
100 * sum(allsh>0.05 & allsh>0.05)/length(allsh)
## [1] 58.27598
library("genefilter")
data(ALL, package = "ALL")
ALLB <- ALL[,ALL$BT %in% c("B1","B2","B3","B4")]
f1 <- function(x) (shapiro.test(x)$p.value > 0.05)
sel1 <- genefilter(exprs(ALL[,ALLB$BT=="B1"]), filterfun(f1))
sel2 <- genefilter(exprs(ALL[,ALLB$BT=="B2"]), filterfun(f1))
sel3 <- genefilter(exprs(ALL[,ALLB$BT=="B3"]), filterfun(f1))
sel4 <- genefilter(exprs(ALL[,ALLB$BT=="B4"]), filterfun(f1))
selected <- sel1 & sel2 & sel3 & sel4
library(limma)
x <- matrix(as.integer(c(sel2,sel3,sel4)),ncol = 3,byrow=FALSE)
colnames(x) <- c("sel2","sel3","sel4")
vc <- vennCounts(x, include="both")
vennDiagram(vc)

library("ALL")
library("limma");
allB <- ALL[,which(ALL$BT %in% c("B1","B2","B3","B4"))]
facB123 <- factor(allB$BT)
cont.ma <- makeContrasts(B2-B1,B3-B2,B4-B3, levels=facB123)
design.ma <- model.matrix(~ 0 + facB123)
colnames(design.ma) <- c("B1","B2","B3","B4")
fit <- lmFit(allB, design.ma)
fit1 <- contrasts.fit(fit, cont.ma)
fit1 <- eBayes(fit1)
topTable(fit1, coef=2,5,adjust.method="BH")
## logFC AveExpr t P.Value adj.P.Val B
## 35991_at 0.5964481 4.144598 6.624128 2.578836e-09 0.0000325578 10.842989
## 33873_at 0.5707770 7.217570 6.083524 2.891823e-08 0.0001825464 8.625253
## 35614_at 1.7248509 5.663477 5.961231 4.946078e-08 0.0002081474 8.132884
## 36711_at -2.3664712 7.576108 -5.759565 1.187487e-07 0.0003054110 7.329631
## 37902_at 0.8470235 4.258491 5.742783 1.276579e-07 0.0003054110 7.263298
library(multtest);data(golub);
gol.fac <- factor(golub.cl,levels=0:1, labels= c("ALL","AML"))
o1 <- grep("oncogene",golub.gnames[,2])
plot(hclust(dist(golub[o1,],method="euclidian"),method="single"))

o3 <- grep("receptor",golub.gnames[,2])
plot(hclust(dist(golub[o3,],method="euclidian"),method="single"))

library(ROCR); data(golub, package = "multtest")
gol.true <- factor(golub.cl,levels=0:1,labels= c("TRUE","FALSE"))
auc.values <- apply(golub,1,
function(x) performance(prediction(x, gol.true),"auc")@y.values[[1]])
o <- order(auc.values,decreasing=TRUE)
golub.gnames[o[1:25],2]
## [1] "TCF3 Transcription factor 3 (E2A immunoglobulin enhancer binding factors E12/E47)"
## [2] "Macmarcks"
## [3] "VIL2 Villin 2 (ezrin)"
## [4] "TOP2B Topoisomerase (DNA) II beta (180kD)"
## [5] "C-myb gene extracted from Human (c-myb) gene, complete primary cds, and five complete alternatively spliced cds"
## [6] "RETINOBLASTOMA BINDING PROTEIN P48"
## [7] "RB1 Retinoblastoma 1 (including osteosarcoma)"
## [8] "CCND3 Cyclin D3"
## [9] "ALDR1 Aldehyde reductase 1 (low Km aldose reductase)"
## [10] "T-COMPLEX PROTEIN 1, GAMMA SUBUNIT"
## [11] "SPTAN1 Spectrin, alpha, non-erythrocytic 1 (alpha-fodrin)"
## [12] "Inducible protein mRNA"
## [13] "Translational initiation factor 2 beta subunit (elF-2-beta) mRNA"
## [14] "NUCLEOLYSIN TIA-1"
## [15] "Putative enterocyte differentiation promoting factor mRNA, partial cds"
## [16] "IEF SSP 9502 mRNA"
## [17] "ACADM Acyl-Coenzyme A dehydrogenase, C-4 to C-12 straight chain"
## [18] "PROTEASOME IOTA CHAIN"
## [19] "X-LINKED HELICASE II"
## [20] "Stimulator of TAR RNA binding (SRB) mRNA"
## [21] "MYL1 Myosin light chain (alkali)"
## [22] "Transcriptional activator hSNF2b"
## [23] "HKR-T1"
## [24] "ADA Adenosine deaminase"
## [25] "Transcriptional activator hSNF2b"
library(multtest);library(ROCR);data(golub)
golub.clchanged <- -golub.cl +1
pred <- prediction(golub[1042,], golub.clchanged)
perf <- performance(pred, "sens", "spec")
plot(perf)

library(rpart)
predictors <- matrix(rnorm(100*4,0,1),100,4)
colnames(predictors) <- letters[1:4]
groups <- gl(2,50)
simdata <- data.frame(groups,predictors)
rp<-rpart(groups ~ a + b + c + d,method="class",data=simdata)
predicted <- predict(rp,type="class")
table(predicted,groups)
## groups
## predicted 1 2
## 1 44 19
## 2 6 31
plot(rp, branch=0,margin=0.1); text(rp, digits=3, use.n=TRUE)

table(predicted,groups)
## groups
## predicted 1 2
## 1 44 19
## 2 6 31