Bioinformatics

chisq.test(c(75,29),p = c(3/4,1/4))

## 
##  Chi-squared test for given probabilities
## 
## data:  c(75, 29)
## X-squared = 0.46154, df = 1, p-value = 0.4969

chisq.test(c(4,5),p = c(1/2,1/2))

## Warning in chisq.test(c(4, 5), p = c(1/2, 1/2)): Chi-squared approximation
## may be incorrect

## 
##  Chi-squared test for given probabilities
## 
## data:  c(4, 5)
## X-squared = 0.11111, df = 1, p-value = 0.7389

p<-(3/4)^3
cat("P(None of three letters in a codon is a cytosine) = ",p,fill = T)

## P(None of three letters in a codon is a cytosine) =  0.421875

n1<-(3/4)^3*64
cat("Number of triplet codes which can be formed not having a cytosine) = ",n1,fill = T)

## Number of triplet codes which can be formed not having a cytosine) =  27

p<-(3/4)^3
q<-1-p
n2<-q*64
cat("P(Codons having at least one cytosine = ",q,fill = T)

## P(Codons having at least one cytosine =  0.578125

cat("Codons having at least one cytosine = ",n2,fill = T)

## Codons having at least one cytosine =  37

num<-choose(4,1)*choose(8,1)
deno<-choose(15,2)
#p = Probability of drawing a red and a black ball
p = num/deno
cat("P(Drawing a red and a black ball) = ",p,fill = T)

## P(Drawing a red and a black ball) =  0.3047619

# Num = Favorable number of cases
# Deno = Exhaustive number of cases
# p = probability of getting a uniform spot
Num<-1100
Deno<-10000
p = Num/Deno
cat("P(Getting a uniform spot) = ",p,fill = T)

## P(Getting a uniform spot) =  0.11

setS<-c(1,2,3,4,5,8,10,12)
SetA<-c(1,2,3,8,10)
SetB<-c(2,4,8,12)
union(SetA,SetB)

## [1]  1  2  3  8 10  4 12

intersect(SetA,SetB)

## [1] 2 8

#n1 = number of flies, n2 = number of frogs, n3 = number of mice
#n = n1 + n2 + n3
n1<-3
n2<-2
n3<-5
n = n1 + n2 + n3
p = factorial(n)/(factorial(n1)*factorial(n2)*factorial(n3))
cat("number of ways in which three kinds of objects can be arranged = ", p,fill = T )

## number of ways in which three kinds of objects can be arranged =  2520

#n = number of mice in a cage, k = number of mice drawn from the cage
#c = Number of ways in which k mice can be drawn
n<-20
k<-5
c = choose(n,k)
cat("Number of ways in which 5 mice can be drawn from the cage = ",c,fill = T)

## Number of ways in which 5 mice can be drawn from the cage =  15504

#n = Total number of spots
# uc = Number of uniform and circular spots
# c = Number of circular spots
# u = Number of uniform spots
n<-20000
c<-15500
u<-9000
uc<-5000
p = c/n + u/n-uc/n
cat("Probability that a randomly chosen spot is acceptable = ", p, fill = T )

## Probability that a randomly chosen spot is acceptable =  0.975

# A is the event that the target cDNA will be labeled with the dye R
#B is the event that the target cDNA will hybridize with labeled dye R
PA<-0.95
PAintersectionB<-0.85
PBgivenA<-PAintersectionB/PA
cat("Probability that the labeled target cDNA hybridizes on a spot = ", PBgivenA, fill = T)

## Probability that the labeled target cDNA hybridizes on a spot =  0.8947368

# PA is the probability that gene 3030 expresses
# PB is the probability that gene 1039 expresses
# PS is the probability that both genes 3030 and 1039 express at the same time
PA<-0.55
PB<-0.85
PS<-PA*PB
cat("Probability that both genes 3030 and 1039 express at the same time
 = ", PS, fill = T)

## Probability that both genes 3030 and 1039 express at the same time
##  =  
## 0.4675

#Three microarray experiments: E1, E2, and E3
# Probability of choosing E1, E2, E3 are PE1, PE2, PE3 respectively
# Red Spot = R, Green Spot = G, Black Spot = B
#A is the event that two spots chosen are red and black

PE1<-1/3
PE2<-1/3
PE3<-1/3
# In E1: two red spots, seven green spots and five black spots, E1N: Total number of Spots

E1R<-2
E1G<-7
E1B<-5
E1N<-E1R + E1G + E1B
E1N

## [1] 14

#In E2: three red spots, nine green spots and three black spots, E2N: Total number of Spots
E2R<-3
E2G<-7
E2B<-3
E2N<-E2R + E2G + E2B
E2N

## [1] 13

# In E3: four red spots, eight green spots, and three black spots, E3N: Total number of Spots
E3R<-4
E3G<-8
E3B<-3
E3N<-E3R + E3G + E3B
E3N

## [1] 15

#PAgivenE1 = P(A|E1), PAgivenE2 = P(A|E2, PAgivenE3 = P(A|E3)
PAgivenE1<-(choose(E1R,1)*choose(E1B,1))/choose(E1N,2)
cat("P(A|E1) = ", PAgivenE1, fill = T)

## P(A|E1) =  0.1098901

PAgivenE2<-(choose(E2R,1)*choose(E2B,1))/choose(E2N,2)
cat("P(A|E2) = ", PAgivenE2, fill = T)

## P(A|E2) =  0.1153846

PAgivenE3<-(choose(E3R,1)*choose(E3B,1))/choose(E3N,2)
cat("P(A|E3) = ", PAgivenE3, fill = T)

## P(A|E3) =  0.1142857

# Deno1 = Sum(P(E_i)*P(A|E_i)), i = 1,2,3
Deno1<-(PE1*PAgivenE1 + PE2*PAgivenE2 + PE3*PAgivenE3)
cat("Sum(P(E_i)*P(A|E_i)), i = 1,2,3 = ", Deno1, fill = T )

## Sum(P(E_i)*P(A|E_i)), i = 1,2,3 =  0.1131868

#PE1givenA = P(E1|A), PE2givenA = P(E2|A), PE3givenA = P(E3|A)
PE1givenA<-PE1*PAgivenE1/Deno1
cat("P(E1|A) = ", PE1givenA, fill = T )

## P(E1|A) =  0.3236246

PE2givenA<-PE2*PAgivenE2/Deno1
cat("P(E2|A) = ", PE2givenA, fill = T )

## P(E2|A) =  0.3398058

PE3givenA<-PE3*PAgivenE3/Deno1
cat("P(E3|A) = ", PE3givenA, fill = T )

## P(E3|A) =  0.3365696

# EX = Expected Value
integrand<-function(x){(x/5)*exp(-x/5)}
EX<-integrate(integrand, lower = 0, upper = Inf)
print("EX = ")

## [1] "EX = "

EX

## 5 with absolute error < 3.8e-05

# EX = Expected Value, p[i] = Probability getting a number i
rm(list = ls())
for (i in 1:6){p<-1/6}
p

## [1] 0.1666667

for (i in 1:6){p[i]<-1/6}
p

## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667

for (i in 1:6){ex<-i*p[i]}
ex

## [1] 1

for (i in 1:6){ex[i]<-i*p[i]}
ex

## [1] 0.1666667 0.3333333 0.5000000 0.6666667 0.8333333 1.0000000

ex1<-sum (ex)
cat("E(X) = ", ex1, fill = T)

## E(X) =  3.5

# EX = Expected Value, p[i] = Probability getting a number i
rm(list = ls())
x<- c(2,3,4,5,6,7,8,9,10,11,12)
x

##  [1]  2  3  4  5  6  7  8  9 10 11 12

for (i in 1:6){p<-1/6}
p

## [1] 0.1666667

for (i in 1:6) {p[i]<-i/36}
p

## [1] 0.02777778 0.05555556 0.08333333 0.11111111 0.13888889 0.16666667

for (i in 7:11) {p[i]<-(12-i)/36}
p

##  [1] 0.02777778 0.05555556 0.08333333 0.11111111 0.13888889 0.16666667
##  [7] 0.13888889 0.11111111 0.08333333 0.05555556 0.02777778

for (i in 1:11){ex<-x*p[i]}
ex

##  [1] 0.05555556 0.08333333 0.11111111 0.13888889 0.16666667 0.19444444
##  [7] 0.22222222 0.25000000 0.27777778 0.30555556 0.33333333

for (i in 1:11){ex[i]<-x[i]*p[i]}
x

##  [1]  2  3  4  5  6  7  8  9 10 11 12

##  [1] 0.02777778 0.05555556 0.08333333 0.11111111 0.13888889 0.16666667
##  [7] 0.13888889 0.11111111 0.08333333 0.05555556 0.02777778

ex

##  [1] 0.05555556 0.16666667 0.33333333 0.55555556 0.83333333 1.16666667
##  [7] 1.11111111 1.00000000 0.83333333 0.61111111 0.33333333

ex1<-sum(ex)
cat("E(X) = ", ex1, fill = T )

## E(X) =  7

rm(list = ls())
genetable<-matrix(c(0,0,0,1/32,1/32,3/32,3/32,1/16, 1/16,1/16,1/8,2/8,0,1/8,1/64,1/64,1/64,1/64,0,0,0), nrow = 3, byrow = T )
print("Joint Probability Matrix")

## [1] "Joint Probability Matrix"

genetable

##          [,1]     [,2]     [,3]     [,4]    [,5]    [,6]    [,7]
## [1,] 0.000000 0.000000 0.000000 0.031250 0.03125 0.09375 0.09375
## [2,] 0.062500 0.062500 0.062500 0.125000 0.25000 0.00000 0.12500
## [3,] 0.015625 0.015625 0.015625 0.015625 0.00000 0.00000 0.00000

rownames(genetable)<-c("0","1","2")
colnames(genetable)<-c("0","1","2","3","4","5","6")
print("Joint Probability Matrix with Headings")

## [1] "Joint Probability Matrix with Headings"

genetable

##          0        1        2        3       4       5       6
## 0 0.000000 0.000000 0.000000 0.031250 0.03125 0.09375 0.09375
## 1 0.062500 0.062500 0.062500 0.125000 0.25000 0.00000 0.12500
## 2 0.015625 0.015625 0.015625 0.015625 0.00000 0.00000 0.00000

#Checking whether sum of all probabilities is 1 or not
sum(genetable)

## [1] 1

py1<-sum(genetable[1,])
py2<-sum(genetable[2,])
py3<-sum(genetable[3,])
print("Marginal Probabilities P[ Y = y]")

## [1] "Marginal Probabilities P[ Y = y]"

cat("P( Y = 0) = ", py1, fill = T )

## P( Y = 0) =  0.25

cat("P( Y = 1) = ", py2, fill = T )

## P( Y = 1) =  0.6875

cat("P( Y = 2) = ", py3, fill = T )

## P( Y = 2) =  0.0625

px1<-sum(genetable[,1])
px2<-sum(genetable[,2])
px3<-sum(genetable[,3])
px4<-sum(genetable[,4])
px5<-sum(genetable[,5])
px6<-sum(genetable[,6])
px7<-sum(genetable[,7])
print("Marginal Probabilities P[X = x]")

## [1] "Marginal Probabilities P[X = x]"

cat("P(X = 0) = ", px1, fill = T )

## P(X = 0) =  0.078125

cat("P(X = 1) = ", px2, fill = T )

## P(X = 1) =  0.078125

cat("P(X = 2) = ", px3, fill = T )

## P(X = 2) =  0.078125

cat("P(X = 3) = ", px4, fill = T )

## P(X = 3) =  0.171875

cat("P(X = 4) = ", px5, fill = T )

## P(X = 4) =  0.28125

cat("P(X = 5) = ", px6, fill = T )

## P(X = 5) =  0.09375

cat("P(X = 6) = ", px7, fill = T )

## P(X = 6) =  0.21875

#converting P[ X = x] in a matrix form
mpx<-matrix(c(px1,px2,px3,px4,px5,px6,px7),nrow = 1)
#Combining P[ X = x, Y = y] and P[X = x]
ppx<-rbind(genetable,mpx)
#Converting P[ Y = y] in a matrix form with last row as sum of P[X = x]
mpy<-matrix(c(py1,py2,py3,1), nrow = 4, byrow = T )
#Combining P[X = x, Y = y], P[X = x] and P[ Y = y]
ppxy<-cbind(ppx,mpy)
#Inserting row and column headings
rownames(ppxy)<-c("0","1","2","P[X = x}")
colnames(ppxy)<-c("0","1","2","3","4","5","6","P[Y = y]")
print("Joint Probability Matrix with marginal Probabilities")

## [1] "Joint Probability Matrix with marginal Probabilities"

ppxy

##                 0        1        2        3       4       5       6
## 0        0.000000 0.000000 0.000000 0.031250 0.03125 0.09375 0.09375
## 1        0.062500 0.062500 0.062500 0.125000 0.25000 0.00000 0.12500
## 2        0.015625 0.015625 0.015625 0.015625 0.00000 0.00000 0.00000
## P[X = x} 0.078125 0.078125 0.078125 0.171875 0.28125 0.09375 0.21875
##          P[Y = y]
## 0          0.2500
## 1          0.6875
## 2          0.0625
## P[X = x}   1.0000

#pl2y2 = P[x< = 2,y = 2]
# pr1 = probabilities
pr1<-c(ppxy[3,1],ppxy[3,2],ppxy[3,3])
cat("P(X = 0, Y = 2) = ", ppxy[3,1], fill = T )

## P(X = 0, Y = 2) =  0.015625

cat("P(X = 1, Y = 2) = ", ppxy[3,2], fill = T )

## P(X = 1, Y = 2) =  0.015625

cat("P(X = 2, Y = 2) = ", ppxy[3,3], fill = T )

## P(X = 2, Y = 2) =  0.015625

pl2y2<-sum(pr1)
cat("P( X< = 2, Y = 2) = ", pl2y2, fill = T )

## P( X< = 2, Y = 2) =  0.046875

#pxle3 = P[X< = 3]
#pr2 = probabilities
pr2<-c(mpx[1],mpx[2],mpx[3],mpx[4] )
pxle3<- sum(pr2)
cat("P[X< = 3] = ", pxle3, fill = T )

## P[X< = 3] =  0.40625

#pr3 = P[Y = 2]
pr3<-py3
cat("P[Y = 3] = ", pr3, fill = T )

## P[Y = 3] =  0.0625

rownames(mpy)<-c("P[Y = 0]","P[Y = 1]","P[Y = 2","Sum P[Y = y]")
print("Marginal Distribution of Y, P[Y = y], and Sum of P[Y = y]")

## [1] "Marginal Distribution of Y, P[Y = y], and Sum of P[Y = y]"

mpy

##                [,1]
## P[Y = 0]     0.2500
## P[Y = 1]     0.6875
## P[Y = 2      0.0625
## Sum P[Y = y] 1.0000

colnames(mpx)<-c("P[X = 0]","P[X = 1]","P[X = 2]","P[X = 3]","P[X = 4]","P[X = 5]","P[X = 6]")
print("Marginal Distribution of X, P[X = x]")

## [1] "Marginal Distribution of X, P[X = x]"

mpx

##      P[X = 0] P[X = 1] P[X = 2] P[X = 3] P[X = 4] P[X = 5] P[X = 6]
## [1,] 0.078125 0.078125 0.078125 0.171875  0.28125  0.09375  0.21875

#px1gy2 = P[X = 1|Y = 2)
px1gy2<-genetable[3,2]/py3
cat("P[X = 1|Y = 2] = ",px1gy2,fill = T )

## P[X = 1|Y = 2] =  0.25

#checking whether f(x,y) is a probability function
Pxy<-integrate (function(y){
sapply(y,function(y){
integrate(function(x){
sapply(x,function(x)(4*x*y))
}, 0, 1)$value
})
}, 0,1)
print("Value of int int f(x,y)dxdy")

## [1] "Value of int int f(x,y)dxdy"

Pxy

## 1 with absolute error < 1.1e-14

# Assume x to be a constant
# Px1 = f(x)
Px1<-integrate(function(y){
4*y
}, 0, 1)$value
Px1

## [1] 2

# the pdf f(x) = x*Px1
# Assume y to be a constant
# Py1 = f(y)
Py1<-integrate(function(x){
4*x
}, 0, 1)$value
Py1

## [1] 2

# the pdf f(y) = y*Py1

#Regression and Correlation
rm(list = ls())
x<-c(6,9,6,7,8,7,7,7,9,9)
y<-c(8,6,6,7,8,7,7,9,9,9)
# show values of X and Y
x

##  [1] 6 9 6 7 8 7 7 7 9 9

##  [1] 8 6 6 7 8 7 7 9 9 9

#Scatter Plot between X and Y
plot(x,y)

linreg = lm(x ~ y)
linreg

## 
## Call:
## lm(formula = x ~ y)
## 
## Coefficients:
## (Intercept)            y  
##      5.0484       0.3226

plot(linreg)

# fitted line on the scatter plot
abline(5.0484,0.3226)

# Confidence intervals and tests
summary(linreg)

## 
## Call:
## lm(formula = x ~ y)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6290 -0.7903 -0.3064  0.8790  2.0161 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   5.0484     2.5821   1.955   0.0863 .
## y             0.3226     0.3362   0.960   0.3653  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.184 on 8 degrees of freedom
## Multiple R-squared:  0.1032, Adjusted R-squared:  -0.008871 
## F-statistic: 0.9209 on 1 and 8 DF,  p-value: 0.3653

#Estimated value for given x = 11
x1<-11
yest<-5.0484 + 0.3226*x1
#print estimated value and x = 11
x1

## [1] 11

yest

## [1] 8.597

#Predicting all values of Y for X values
pred = predict(linreg)
pred

##        1        2        3        4        5        6        7        8 
## 7.629032 6.983871 6.983871 7.306452 7.629032 7.306452 7.306452 7.951613 
##        9       10 
## 7.951613 7.951613

#Correlation
rm(list = ls())
x<-c(0.512,0.614,0.621,0.519,0.525,0.617,0.775,0.555,0.674,0.677)
y<-c(0.423,0.523,0.444,0.421,0.456,0.545,0.672,0.423,0.567,0.576)
# show values of X and Y
x

##  [1] 0.512 0.614 0.621 0.519 0.525 0.617 0.775 0.555 0.674 0.677

##  [1] 0.423 0.523 0.444 0.421 0.456 0.545 0.672 0.423 0.567 0.576

#Scatter Plot between X and Y
plot(x,y)

#Correlation between X and Y
cor(x,y)

## [1] 0.9319725

#Binomial Distribution
rm(list = ls( ))
#Part A, n = 10, p = 0.22, x = 3
n<-10
p<-0.22
x<-3
dbinom(x,n,p)

## [1] 0.2244458

#Part B: n = 10, p = 0.22, x = at least 4
x1<-3
p1<-1-pbinom(3,n,p)
p1

## [1] 0.1586739

#Poisson Distribution
rm(list = ls( ))
#Part A, lambda1 = 1.5, x = 0
lambda1<-1.5
x<-0
dpois(x,lambda1)

## [1] 0.2231302

#Part B, x = 2
x<-2
dpois(x,lambda1)

## [1] 0.2510214

#Negative Binomial Distribution
# x = number of failures prior to getting r successes
# r = number of successes needed
# p = probability of successes in each trial
x<-4
r<-2
p<-0.05
p1<-1-pnbinom(x,r,p)
p1

## [1] 0.9672262

#Geometric Distribution
# x = number of failures prior to getting first success
# p = probability of successes in each trial
p<-0.10
#Part A, x = 2
x<-2
dgeom(x,p)

## [1] 0.081

#Part B
ex<-1/0.10
ex

## [1] 10

p1<-1-pnbinom(x,r,p)
p1

## [1] 0.9477

#Hypergeometric Distribution
# x = number of expressed genes needed from the
# sample which is taken for an in-depth analysis
# n = sample size taken for in-depth analysis
# N = Total number of genes under consideration
# M1 = Number of unexpressed genes among N genes
# M = Number of expressed genes among N genes
x<-2
n<-6
M1<-40
M<-80
dhyper(x,M,M1,n)

## [1] 0.07906174

#Multinomial Distribution
# x1 = vector of genes having mutation due to ionizing radiations,
# genes having mutation due to nonionizing radiations,
# genes having mutation due to chemicals
# p1 = vector of corresponding probabilities for x1
x1<-c(2,2,3)
p1<-c(2/9,1/5,26/45)
dmultinom(x = x1, prob = p1)

## [1] 0.08000862

#Multinomial Distribution
# x1 = vector of genes having mutation due to ionizing radiations,
# genes having mutation due to nonionizing radiations,
# genes having mutation due to chemicals
# p1 = vector of corresponding probabilities for x1
x1<-c(3,2,1)
p1<-c(1/4,1/2,1/4)
dmultinom(x = x1, prob = p1)

## [1] 0.05859375

#Uniform Distribution
# min = lower limit of replication time
# max = upper limit of replication time
# lower = minimum wait time for the scientist
# upper = maximum wait time for the scientist
min<-0
max<-30
lower<-20
upper<-30
p1<- punif(upper,min,max) - punif(lower,min,max)
p1

## [1] 0.3333333

#Normal Distribution
# mean = mean of the normal distribution
# sd = standard deviation of the normal distribution
# x = number of mutations among progeny males
mean<-54
sd<-5
#Part A
#p1 = probability that number of mutations
# among progeny of males will be more than 65
x<-65
p1<-1-pnorm(x, mean, sd)
p1

## [1] 0.01390345

# Part B
#(x1, x2): number of mutations among progeny of males
x1<-47
x2<-62
#p2 = probability that number of mutations
# among progeny of males will be between (x1, x2)
p2<-pnorm(x2, mean, sd)- pnorm(x1, mean, sd)
p2

## [1] 0.864444

#Normal Distribution
# mean = mean of the normal distribution
# sd = standard deviation of the normal distribution
# x = gene expression level
mean<-540
sd<-25
#p1 = probability that gene is expressed
#(x1, x2): gene expression level
x1<-500
x2<-600
p1<-pnorm(x2, mean, sd)-pnorm(x1, mean, sd)
p1

## [1] 0.9370032

#Normal Distribution
# mean = mean of the normal distribution
# sd = standard deviation of the normal distribution
# x = mean intensity of a spot
mean<-750
sd<-50
#Part A
#p1 = probability that the mean intensity of a spot
#is less than x
x<-825
p1<-pnorm(x, mean, sd)
p1

## [1] 0.9331928

#p2 = probability that the mean intensity of a spot is greater than x
x<-650
p2<-1- pnorm(x, mean, sd)
p2

## [1] 0.9772499

library(TeachingDemos)
x<-c(95, 90, 99, 98, 88, 86, 92, 95, 97, 99, 99, 98, 95, 87, 88, 89, 91, 92, 99, 91, 92, 87, 86, 88, 87, 87, 88, 87, 97, 98)
stdevx<-4.75
z.test(x,mu = mean(x),stdevx,conf.level = 0.95)

## 
##  One Sample z-test
## 
## data:  x
## z = 0, n = 30.00000, Std. Dev. = 4.75000, Std. Dev. of the sample
## mean = 0.86723, p-value = 1
## alternative hypothesis: true mean is not equal to 92.16667
## 95 percent confidence interval:
##  90.46693 93.86640
## sample estimates:
## mean of x 
##  92.16667

x<-c(75, 85, 95, 105, 115)
t.test(x,conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  x
## t = 13.435, df = 4, p-value = 0.0001776
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##   75.36757 114.63243
## sample estimates:
## mean of x 
##        95

two.z.test = function(m,n,xbar, ybar, sdx, sdy,conf.level) {
  alpha = 1-conf.level 
  zstar = qnorm(1-alpha/2)
  sdp = sqrt((((sdx)^2)/m) + (((sdy)^2)/n)) 
  zstar 
  xbar - ybar + c(-zstar*sdp,zstar*sdp) 
  }
x<-c(80, 50, 10, 50, 100, 80, 80, 0, 60, 100, 90, 0, 10)
y<-c(80, 50, 5, 70, 80, 80, 50, 0, 80, 10, 70, 0, 5)
xbar<-mean(x)
ybar<-mean(y)
sdx<-37
sdy<-34
m<-length(x)
n<-length(y)
two.z.test(m,n,xbar, ybar,sdx, sdy, 0.90)

## [1] -12.92378  32.92378

x<-c(12, 14, 16, 18, 14, 9, 16, 13)
y<-c(11, 11, 12, 13, 13, 11, 13, 11, 12, 13)
x

## [1] 12 14 16 18 14  9 16 13

##  [1] 11 11 12 13 13 11 13 11 12 13

t.test(x,y, conf.level = 0.95, var.equal = TRUE)

## 
##  Two Sample t-test
## 
## data:  x and y
## t = 2.1419, df = 16, p-value = 0.04793
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.02055439 3.97944561
## sample estimates:
## mean of x mean of y 
##        14        12

x<-c(1.3,1.4,3.8,0.9,0.6,1.7,2.5,2.1,0.7)
int.chi = function(x,alpha = 0.10){
n = length(x)
s2 = var(x,na.rm = T)
lw = s2*(n-1)/qchisq(1-alpha/2,n-1)
up = s2*(n-1)/qchisq(alpha/2,n-1)
c(lw, up)
}
int.chi(x)

## [1] 0.5352313 3.0373594

sam.size = function(varx,eps,alpha){
n = (qnorm(alpha/2)^2)*varx/(eps^2)
c(n)
}
sam.size(1.35,1.25,0.01)

## [1] 5.732551

#Based on normal distribution
#n = sample size
#conf.level = confidence level
#mu = mean under null hypothesis
#sdx = standard deviation
#delta = difference between true mean and mu
power.z.test = function(n,mu,sdx,delta,conf.level){
alpha = 1 - conf.level
zstar = qnorm(1 - alpha/2)
error = zstar*sdx/sqrt(n)
left<-mu-error
right<-mu + error
assumed<- mu + delta
Zleft<-(left-assumed)/(sdx/sqrt(n))
Zright<-(right-assumed)/(sdx/sqrt(n))
p<-pnorm(Zright)-pnorm(Zleft)
power = 1-p
c(power)
}
power.z.test(25,75.5,15,2.5,0.95)

## [1] 0.132558

#Based on t-test
#Based on one-sample t-test
power.t.test(n = 25,delta = 2.5,sd = 15,sig.level = 0.05, type = "one.sample",alternative = "two.sided",strict = TRUE)

## 
##      One-sample t test power calculation 
## 
##               n = 25
##           delta = 2.5
##              sd = 15
##       sig.level = 0.05
##           power = 0.1259956
##     alternative = two.sided

#Based on normal distribution
#n = sample size
#mu = mean under null hypothesis
#xbar = sample mean
#varx = population variance
#conf.level = confidence level
z.test.onesample = function(n,mu, xbar, varx, alternative = "two sided", conf.level) {
alpha = 1-conf.level
p<-2*pnorm(xbar, mu, sqrt(varx/n))
print ("Two-sided alternative")
cat("p-value = ",p,fill = T)
cat("Alpha = ",alpha, fill = T)
if(p < alpha)"Null hypothesis is rejected"
else ("Decision: Since p > alpha, failed to reject Null hypothesis")
}
z.test.onesample(34,0.28,0.20,0.14, alternative = "two sided", 0.95)

## [1] "Two-sided alternative"
## p-value =  0.2125039
## Alpha =  0.05

## [1] "Decision: Since p > alpha, failed to reject Null hypothesis"

#n = sample size
#mu = mean under null hypothesis
#xbar = sample mean
#sdx = sample standard deviation
#conf.level = confidence level
t.test.onesample = function(n,mu, xbar, sdx, alternative = "two sided",
conf.level){
alpha = 1-conf.level
t<-(xbar-mu)*sqrt(n)/sdx
cat("t-test statistic value = ",t,fill = T)
df<-n-1
p<-2*(1-pt(abs(t), df))
print("Two-sided alternative")
cat("p-value = ",p,fill = T)
cat("Alpha =",alpha,fill = T)
if(p<alpha)"Null hypothesis is rejected"
else("Decision: Since p > alpha,failed to reject Null hypothesis")
}
t.test.onesample(16,300, 310, 15, alternative = "two sided", 0.95)

## t-test statistic value =  2.666667
## [1] "Two-sided alternative"
## p-value =  0.01759515
## Alpha = 0.05

## [1] "Null hypothesis is rejected"

# using a beta prior
library(LearnBayes)

## 
## Attaching package: 'LearnBayes'

## The following object is masked from 'package:TeachingDemos':
## 
##     triplot

q2 = list(p = .9,x = .5)
q1 = list(p = .4,x = .3)
c = beta.select(q1,q2)
alpha1 = c[1]
alpha2 = c[2]
s1 = 10
f1 = 15
curve(dbeta(x,alpha1 + s1,alpha2 + f1), from = 0, to = 1,
xlab = "probability",ylab = "Density",lty = 1,lwd = 4)
curve(dbeta(x,s1 + 1,f1 + 1),add = TRUE,lty = 2,lwd = 4)
curve(dbeta(x,alpha1,alpha2),add = TRUE,lty = 3,lwd = 4)
legend(.9,4,c("Prior","Likelihood","Posterior"),
lty = c(3,2,1),lwd = c(3,3,3))

posterior1 = rbeta(2000, alpha1 + s1, alpha2 + f1)
hist(posterior1,xlab = "probability")

quantile(posterior1, c(0.05, 0.95))

##        5%       95% 
## 0.2601879 0.5052479

#Gibbs sampling
# bivariate normal
gsampler<-function (n, rho1)
{ mat <- matrix(ncol = 2, nrow = n)
x <- 0
y <- 0
mat[1, ] <- c(x, y)
for (i in 2:n) {
z<-sqrt(1-rho1^2)
x <- rnorm(1, rho1 * y, z)
y <- rnorm(1, rho1 * x, z)
mat[i, ] <- c(x, y)
}
mat
}
bivariate1<-gsampler(20000,0.95)
par(mfrow = c(3,2))
plot(bivariate1,col = 1:20000)
plot(bivariate1,type = "l")
plot(ts(bivariate1[,1]))
plot(ts(bivariate1[,2]))
hist(bivariate1[,1],50)
hist(bivariate1[,2],50)

par(mfrow = c(1,1))

ttable<-matrix(c(0.1, .2, .1, .6, .2, .1, .3, .4, .3, .3, .2, .2, .1, .2, .3, .4),nrow = 4,byrow = T)
print("First order Markov Chain Probability matrix")

## [1] "First order Markov Chain Probability matrix"

rownames(ttable)<- c("a", "g", "c","t")
colnames(ttable)<- c("a", "g", "c", "t")
print("Transition Matrix")

## [1] "Transition Matrix"

ttable

##     a   g   c   t
## a 0.1 0.2 0.1 0.6
## g 0.2 0.1 0.3 0.4
## c 0.3 0.3 0.2 0.2
## t 0.1 0.2 0.3 0.4

pc<-0.2
px<-ttable[1,4]*ttable[2,1]*ttable[2,2]*ttable[3,1]*pc
cat("Probability of finding a DNA sequence CGGAT =", px, fill = T)

## Probability of finding a DNA sequence CGGAT = 0.00072

ttable1<-matrix(c(0.174, .28, .415, .131, .169, .355, .269, .207, .162, .325, .372, .141, .081, .345, .373, .201),nrow = 4,byrow = T)
print("First order Markov Chain Probability matrix-Model-1")

## [1] "First order Markov Chain Probability matrix-Model-1"

rownames(ttable1)<-c("a", "g", "c", "t")
colnames(ttable1)<-c("a", "g", "c", "t")
print("Transition Matrix-Model 1")

## [1] "Transition Matrix-Model 1"

ttable1

##       a     g     c     t
## a 0.174 0.280 0.415 0.131
## g 0.169 0.355 0.269 0.207
## c 0.162 0.325 0.372 0.141
## t 0.081 0.345 0.373 0.201

ttable2<-matrix(c(0.301, .210, .275, .214, .321, .295, .076, .308, .242, .243, .302, .213, .175, .245, .295, .285), nrow = 4,byrow = T)
print("First order Markov Chain Probability matrix-Model-2")

## [1] "First order Markov Chain Probability matrix-Model-2"

rownames(ttable2)<-c("a", "g", "c", "t")
colnames(ttable2)<-c("a", "g", "c", "t")
print("Transition Matrix-Model 2")

## [1] "Transition Matrix-Model 2"

ttable2

##       a     g     c     t
## a 0.301 0.210 0.275 0.214
## g 0.321 0.295 0.076 0.308
## c 0.242 0.243 0.302 0.213
## t 0.175 0.245 0.295 0.285

t11<-ttable1[1,1]
t21<-ttable2[1,1]
r1c1<-(log2(t11/t21))
t12<-ttable1[1,2]
t22<-ttable2[1,2]
r1c2<-(log2(t12/t22))
t13<-ttable1[1,3]
t23<-ttable2[1,3]
r1c3<-(log2(t13/t23))
t14<-ttable1[1,4]
t24<-ttable2[1,4]
r1c4<-(log2(t14/t24))
t11<-ttable1[2,1]
t21<-ttable2[2,1]
r2c1<-(log2(t11/t21))
t12<-ttable1[2,2]
t22<-ttable2[2,2]
r2c2<-(log2(t12/t22))
t13<-ttable1[2,3]
t23<-ttable2[2,3]
r2c3<-(log2(t13/t23))
t14<-ttable1[2,4]
t24<-ttable2[2,4]
r2c4<-(log2(t14/t24))
t11<-ttable1[3,1]
t21<-ttable2[3,1]
r3c1<-(log2(t11/t21))
t12<-ttable1[3,2]
t22<-ttable2[3,2]
r3c2<-(log2(t12/t22))
t13<-ttable1[3,3]
t23<-ttable2[3,3]
r3c3<-(log2(t13/t23))
t14<-ttable1[3,4]
t24<-ttable2[3,4]
r3c4<-(log2(t14/t24))
t11<-ttable1[4,1]
t21<-ttable2[4,1]
r4c1<-(log2(t11/t21))
t12<-ttable1[4,2]
t22<-ttable2[4,2]
r4c2<-(log2(t12/t22))
t13<-ttable1[4,3]
t23<-ttable2[4,3]
r4c3<-(log2(t13/t23))
t14<-ttable1[4,4]
t24<-ttable2[4,4]
r4c4<-(log2(t14/t24))
ttable3<-matrix(c(r1c1, r1c2, r1c3, r1c4,
r2c1, r2c2, r2c3, r2c4,
r3c1, r3c2, r3c3, r3c4,
r4c1, r4c2, r4c3, r4c4),
nrow = 4, byrow = T)
print("log-odd ratio Matrix")

## [1] "log-odd ratio Matrix"

rownames(ttable2)<-c("a", "g", "c", "t")
colnames(ttable2)<-c("a", "g", "c", "t")
print("Log-odd ratio matrix")

## [1] "Log-odd ratio matrix"

ttable3

##            [,1]      [,2]      [,3]       [,4]
## [1,] -0.7906762 0.4150375 0.5936797 -0.7080440
## [2,] -0.9255501 0.2671041 1.8235348 -0.5732996
## [3,] -0.5790132 0.4194834 0.3007541 -0.5951583
## [4,] -1.1113611 0.4938146 0.3384607 -0.5037664

max<-max(ttable3)
cat("Log-odd ratio of the island CpG is = ", max, fill = T)

## Log-odd ratio of the island CpG is =  1.823535

library(Biodem)
p1<-matrix(c(1/4, 1/4, 1/2,
1/4, 1/2, 1/4,
0, 2/3, 1/3), nrow = 3, byrow = T)
print("Transition Matrix-P")

## [1] "Transition Matrix-P"

p1

##      [,1]      [,2]      [,3]
## [1,] 0.25 0.2500000 0.5000000
## [2,] 0.25 0.5000000 0.2500000
## [3,] 0.00 0.6666667 0.3333333

pi01<-matrix(c(0, 0, 1), nrow = 1,byrow = T)
print("Starting Probability of the Chain")

## [1] "Starting Probability of the Chain"

pi01

##      [,1] [,2] [,3]
## [1,]    0    0    1

p4<-mtx.exp(p1, 4)
print("4th power of Matrix P")

## [1] "4th power of Matrix P"

p4

##           [,1]      [,2]      [,3]
## [1,] 0.1723090 0.5114294 0.3162616
## [2,] 0.1688368 0.5124421 0.3187211
## [3,] 0.1712963 0.5073302 0.3213735

pi1<-pi01 %*% p4
print("Probability PI(1)")

## [1] "Probability PI(1)"

pi1

##           [,1]      [,2]      [,3]
## [1,] 0.1712963 0.5073302 0.3213735

pi2<-pi1 %*% p4
print("Probability PI'(2)")

## [1] "Probability PI'(2)"

pi2

##          [,1]      [,2]      [,3]
## [1,] 0.170222 0.5106258 0.3191522

pi3<-pi2 %*% p4
print("Probability PI'(3)")

## [1] "Probability PI'(3)"

pi3

##           [,1]      [,2]      [,3]
## [1,] 0.1702128 0.5106383 0.3191489

pi4<-pi3 %*% p4
print("Probability PI'(4)")

## [1] "Probability PI'(4)"

pi4

##           [,1]      [,2]      [,3]
## [1,] 0.1702128 0.5106383 0.3191489

pi5<-pi4 %*% p4
print("Probability PI'(5)")

## [1] "Probability PI'(5)"

pi5

##           [,1]      [,2]      [,3]
## [1,] 0.1702128 0.5106383 0.3191489

pi6<-pi5 %*% p4
print("Probability PI'(6)")

## [1] "Probability PI'(6)"

pi6

##           [,1]      [,2]      [,3]
## [1,] 0.1702128 0.5106383 0.3191489

pi7<-pi6 %*% p4
print("Probability PI'(7)")

## [1] "Probability PI'(7)"

pi7

##           [,1]      [,2]      [,3]
## [1,] 0.1702128 0.5106383 0.3191489

pi8<-pi7 %*% p4
print("Probability PI'(8)")

## [1] "Probability PI'(8)"

pi8

##           [,1]      [,2]      [,3]
## [1,] 0.1702128 0.5106383 0.3191489

pi9<-pi8 %*% p4
print("Probability PI'(9)")

## [1] "Probability PI'(9)"

pi9

##           [,1]      [,2]      [,3]
## [1,] 0.1702128 0.5106383 0.3191489

pi10<-pi9 %*% p4
print("Probability PI'(10)")

## [1] "Probability PI'(10)"

pi10

##           [,1]      [,2]      [,3]
## [1,] 0.1702128 0.5106383 0.3191489

library(Biodem)
p1<-matrix(c(0.3, 0.4, 0.3, 0.3, 0.3, 0.4, 0.4, 0.3, 0.3), nrow = 3, byrow = T)
print("Transition Matrix-P")

## [1] "Transition Matrix-P"

p1

##      [,1] [,2] [,3]
## [1,]  0.3  0.4  0.3
## [2,]  0.3  0.3  0.4
## [3,]  0.4  0.3  0.3

pn<-mtx.exp(p1, 100)
print("nth power Transition matrix")

## [1] "nth power Transition matrix"

pn

##           [,1]      [,2]      [,3]
## [1,] 0.3333333 0.3333333 0.3333333
## [2,] 0.3333333 0.3333333 0.3333333
## [3,] 0.3333333 0.3333333 0.3333333

mrna <- data.frame(expr = c(95, 98, 100, 105, 85, 88,94, 92, 78, 88, 92, 91, 72,
88, 82, 73, 75, 77),
samples = factor (c(rep ("mRNA1",6), rep("mRNA2", 6), rep("mRNA3", 6))))
mrna

##    expr samples
## 1    95   mRNA1
## 2    98   mRNA1
## 3   100   mRNA1
## 4   105   mRNA1
## 5    85   mRNA1
## 6    88   mRNA1
## 7    94   mRNA2
## 8    92   mRNA2
## 9    78   mRNA2
## 10   88   mRNA2
## 11   92   mRNA2
## 12   91   mRNA2
## 13   72   mRNA3
## 14   88   mRNA3
## 15   82   mRNA3
## 16   73   mRNA3
## 17   75   mRNA3
## 18   77   mRNA3

# mean, variance and standard deviation
sapply(split(mrna$expr,mrna$samples),mean)

##    mRNA1    mRNA2    mRNA3 
## 95.16667 89.16667 77.83333

sapply(split(mrna$expr,mrna$samples),var)

##    mRNA1    mRNA2    mRNA3 
## 56.56667 33.76667 37.36667

sqrt(sapply(split(mrna$expr,mrna$samples), var))

##    mRNA1    mRNA2    mRNA3 
## 7.521081 5.810909 6.112828

# Box-Plot
boxplot(split(mrna$expr,mrna$samples), xlab = "Samples",ylab = "Expression
          Levels", col = "red")

fitmrna<- lm(expr~samples, data = mrna)
summary(fitmrna)

## 
## Call:
## lm(formula = expr ~ samples, data = mrna)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.1667  -4.3333   0.8333   3.8333  10.1667 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    95.167      2.664  35.729 6.25e-16 ***
## samplesmRNA2   -6.000      3.767  -1.593 0.132042    
## samplesmRNA3  -17.333      3.767  -4.602 0.000346 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.524 on 15 degrees of freedom
## Multiple R-squared:  0.5929, Adjusted R-squared:  0.5386 
## F-statistic: 10.92 on 2 and 15 DF,  p-value: 0.001183

anova(fitmrna)

## Analysis of Variance Table
## 
## Response: expr
##           Df Sum Sq Mean Sq F value   Pr(>F)   
## samples    2 929.78  464.89  10.921 0.001183 **
## Residuals 15 638.50   42.57                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

genetherapy <- data.frame(expr = c(96, 99, 100, 104, 84, 93, 90, 75, 80, 90, 70, 90, 84, 76, 78, 78, 87, 67, 66, 76),
subjects = factor(c(rep("TreatmentA",5), rep("TreatmentB", 5), 
                    rep("TreatmentC", 5), rep("TreatmentD", 5))))
genetherapy

##    expr   subjects
## 1    96 TreatmentA
## 2    99 TreatmentA
## 3   100 TreatmentA
## 4   104 TreatmentA
## 5    84 TreatmentA
## 6    93 TreatmentB
## 7    90 TreatmentB
## 8    75 TreatmentB
## 9    80 TreatmentB
## 10   90 TreatmentB
## 11   70 TreatmentC
## 12   90 TreatmentC
## 13   84 TreatmentC
## 14   76 TreatmentC
## 15   78 TreatmentC
## 16   78 TreatmentD
## 17   87 TreatmentD
## 18   67 TreatmentD
## 19   66 TreatmentD
## 20   76 TreatmentD

sapply(split(genetherapy$expr,genetherapy$subjects),mean)

## TreatmentA TreatmentB TreatmentC TreatmentD 
##       96.6       85.6       79.6       74.8

sapply(split(genetherapy$expr,genetherapy$subjects),var)

## TreatmentA TreatmentB TreatmentC TreatmentD 
##       57.8       59.3       58.8       74.7

sapply(split(genetherapy$expr,genetherapy$subjects),sd)

## TreatmentA TreatmentB TreatmentC TreatmentD 
##   7.602631   7.700649   7.668116   8.642916

boxplot(split(genetherapy$expr,genetherapy$subjects),xlab = "Subjects",ylab
= "Expression Levels",col = "red")

fitgene<- lm(expr~subjects, data = genetherapy)
summary(fitgene)

## 
## Call:
## lm(formula = expr ~ subjects, data = genetherapy)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -12.60  -6.15   1.80   4.40  12.20 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          96.600      3.540  27.290 7.59e-15 ***
## subjectsTreatmentB  -11.000      5.006  -2.197 0.043066 *  
## subjectsTreatmentC  -17.000      5.006  -3.396 0.003692 ** 
## subjectsTreatmentD  -21.800      5.006  -4.355 0.000491 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.915 on 16 degrees of freedom
## Multiple R-squared:  0.5695, Adjusted R-squared:  0.4888 
## F-statistic: 7.056 on 3 and 16 DF,  p-value: 0.003092

anova(fitgene)

## Analysis of Variance Table
## 
## Response: expr
##           Df Sum Sq Mean Sq F value   Pr(>F)   
## subjects   3 1326.2  442.05  7.0559 0.003092 **
## Residuals 16 1002.4   62.65                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Tissue type "Abdomen" is represented by 1
# Tissue type "Head" is represented by 2
effect <- data.frame(mlevel = c(0.21, 0.35, 0.65, 0.97, 1.25, 1.01, 0.15, 0.20, 0.75, 1.10, 0.9, 0.95), 
tissuetype = factor(rep(rep(1:2, rep(6,2)),1)) ,
BXCposition = factor(rep(c("1","2","3","4","5","6"), c(1,1,1,1,1,1))))
effect

##    mlevel tissuetype BXCposition
## 1    0.21          1           1
## 2    0.35          1           2
## 3    0.65          1           3
## 4    0.97          1           4
## 5    1.25          1           5
## 6    1.01          1           6
## 7    0.15          2           1
## 8    0.20          2           2
## 9    0.75          2           3
## 10   1.10          2           4
## 11   0.90          2           5
## 12   0.95          2           6

sapply(split(effect$mlevel,effect$tissuetype), mean)

##     1     2 
## 0.740 0.675

sapply(split(effect$mlevel,effect$BXCposition), mean)

##     1     2     3     4     5     6 
## 0.180 0.275 0.700 1.035 1.075 0.980

sapply(split(effect$mlevel,effect$tissuetype), var)

##       1       2 
## 0.16540 0.16275

sapply(split(effect$mlevel,effect$BXCposition), var)

##       1       2       3       4       5       6 
## 0.00180 0.01125 0.00500 0.00845 0.06125 0.00180

sapply(split(effect$mlevel,effect$tissuetype), sd)

##         1         2 
## 0.4066940 0.4034229

sapply(split(effect$mlevel,effect$BXCposition), sd)

##          1          2          3          4          5          6 
## 0.04242641 0.10606602 0.07071068 0.09192388 0.24748737 0.04242641

boxplot(split(effect$mlevel,effect$tissuetype),xlab = "Tissue type",ylab = "Methylation Level",col = "blue")

boxplot(split(effect$mlevel,effect$BXCposition),xlab = "BX-C position",ylab =
"Methylation level",col = "red")

fitexp<-lm(mlevel~tissuetype + BXCposition, data = effect)
fitexp

## 
## Call:
## lm(formula = mlevel ~ tissuetype + BXCposition, data = effect)
## 
## Coefficients:
##  (Intercept)   tissuetype2  BXCposition2  BXCposition3  BXCposition4  
##       0.2125       -0.0650        0.0950        0.5200        0.8550  
## BXCposition5  BXCposition6  
##       0.8950        0.8000

anova(fitexp)

## Analysis of Variance Table
## 
## Response: mlevel
##             Df  Sum Sq  Mean Sq F value   Pr(>F)   
## tissuetype   1 0.01268 0.012675  0.8244 0.405534   
## BXCposition  5 1.56388 0.312775 20.3431 0.002453 **
## Residuals    5 0.07688 0.015375                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Tissue type "Type-1" is represented by 1, "Type-2" by 2, and so on
# Time of Exposure "Time-1" is represented by 1, "Time-2" is represented by 2, and so on
# Dose level is represented by "A", "B", "C", "D"
effect <- data.frame(exp = c(90, 94, 86, 78, 82, 76, 94, 90, 65, 98, 95, 81, 91, 85, 71, 99), 
                     tissuetype = factor(rep(rep(1:4, rep(4,4)),1)),
                     time = factor(rep(c("Time-1","Time-2","Time-3","Time-4"), c(1,1,1,1))), 
dose = factor(rep(c("D","B","C","A", 
                    "C", "A", "D", "B", 
                    "A", "D", "B", "C", 
                    "B", "C", "A", "D"), 
                  c(1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1))))
effect

##    exp tissuetype   time dose
## 1   90          1 Time-1    D
## 2   94          1 Time-2    B
## 3   86          1 Time-3    C
## 4   78          1 Time-4    A
## 5   82          2 Time-1    C
## 6   76          2 Time-2    A
## 7   94          2 Time-3    D
## 8   90          2 Time-4    B
## 9   65          3 Time-1    A
## 10  98          3 Time-2    D
## 11  95          3 Time-3    B
## 12  81          3 Time-4    C
## 13  91          4 Time-1    B
## 14  85          4 Time-2    C
## 15  71          4 Time-3    A
## 16  99          4 Time-4    D

sapply(split(effect$exp,effect$tissuetype), mean)

##     1     2     3     4 
## 87.00 85.50 84.75 86.50

sapply(split(effect$exp,effect$time), mean)

## Time-1 Time-2 Time-3 Time-4 
##  82.00  88.25  86.50  87.00

sapply(split(effect$exp,effect$dose), mean)

##     A     B     C     D 
## 72.50 92.50 83.50 95.25

sapply(split(effect$exp,effect$tissuetype), var)

##         1         2         3         4 
##  46.66667  65.00000 228.25000 139.66667

sapply(split(effect$exp,effect$time), var)

##   Time-1   Time-2   Time-3   Time-4 
## 144.6667  96.2500 123.0000  90.0000

sapply(split(effect$exp,effect$dose), var)

##         A         B         C         D 
## 33.666667  5.666667  5.666667 16.916667

boxplot(split(effect$exp,effect$tissuetype),xlab = "Tissue type",ylab = "Expression Level",col = "blue")

boxplot(split(effect$exp,effect$time),xlab = "Time",ylab = "Expression Level",col = "red")

boxplot(split(effect$exp,effect$dose),xlab = "Dose",ylab = "Expression Level",col = "green")

fitexp<-lm(exp~tissuetype + time + dose, data = effect)
fitexp

## 
## Call:
## lm(formula = exp ~ tissuetype + time + dose, data = effect)
## 
## Coefficients:
## (Intercept)  tissuetype2  tissuetype3  tissuetype4   timeTime-2  
##       69.62        -1.50        -2.25        -0.50         6.25  
##  timeTime-3   timeTime-4        doseB        doseC        doseD  
##        4.50         5.00        20.00        11.00        22.75

anova(fitexp)

## Analysis of Variance Table
## 
## Response: exp
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## tissuetype  3   12.19    4.06  0.2889 0.8322001    
## time        3   89.19   29.73  2.1141 0.1998049    
## dose        3 1265.19  421.73 29.9896 0.0005219 ***
## Residuals   6   84.37   14.06                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

A <- c(-1, 1, -1,1)
B <- c(-1, -1, 1, 1)
I <- c(96, 95, 92, 92)
II <- c(78, 75, 75, 77)
III <- c(87, 85, 82, 85)
IV <- c(99, 98, 97, 97)
#Prepare Data Table
data <- data.frame(A, B, I, II, III, IV )
data

##    A  B  I II III IV
## 1 -1 -1 96 78  87 99
## 2  1 -1 95 75  85 98
## 3 -1  1 92 75  82 97
## 4  1  1 92 77  85 97

# Compute sums for (1), (a), (b), (ab)
sums <- apply(data[,3:6], 1, sum)
names(sums) <- c("(1)", "(a)", "(b)", "(ab)")
sums

##  (1)  (a)  (b) (ab) 
##  360  353  346  351

ymean <- sums/4
# Make interaction plots
par(mfrow = c(1,2))
interaction.plot(A, B, ymean)
interaction.plot(B, A, ymean)

y <- c(I, II, III, IV )
y

##  [1] 96 95 92 92 78 75 75 77 87 85 82 85 99 98 97 97

factorA <- as.factor(rep(A,4))
factorB <- as.factor(rep(B,4))
factorA

##  [1] -1 1  -1 1  -1 1  -1 1  -1 1  -1 1  -1 1  -1 1 
## Levels: -1 1

factorB

##  [1] -1 -1 1  1  -1 -1 1  1  -1 -1 1  1  -1 -1 1  1 
## Levels: -1 1

model <- lm(y ~ factorA + factorB + factorA*factorB)
anova(model)

## Analysis of Variance Table
## 
## Response: y
##                 Df  Sum Sq Mean Sq F value Pr(>F)
## factorA          1    0.25   0.250  0.0027 0.9595
## factorB          1   16.00  16.000  0.1720 0.6857
## factorA:factorB  1    9.00   9.000  0.0967 0.7611
## Residuals       12 1116.50  93.042

# Perform residual analysis
par(mfrow = c(1,2))
# Q-Q plot
qqnorm(model$residuals)
qqline(model$residuals)
# residual plot
plot(model$fitted.values,model$residuals)
abline(h = 0)


library(multtest)

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colMeans,
##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

# Enter p-values obtained from t-test
rawp<-c(0.020970375, 0.172992683, 0.489943608, 0.657616998, 
        0.716432262, 0.619406303, 0.0783585, 0.308670017, 
        0.477310852, 0.831505785, 0.664150848, 0.810859631, 
        0.296222829, 0.718301948, 0.683072811, 0.146935842, 
        0.751398362, 0.327199941, 0.472351013, 0.146701021)
mt.rawp2adjp(rawp, proc = c("Bonferroni", "Holm", "Hochberg", "SidakSS", "SidakSD", "BH", "BY"))

## $adjp
##             rawp Bonferroni      Holm  Hochberg   SidakSS   SidakSD
##  [1,] 0.02097037  0.4194075 0.4194075 0.4194075 0.3454894 0.3454894
##  [2,] 0.07835850  1.0000000 1.0000000 0.8315058 0.8044578 0.7878327
##  [3,] 0.14670102  1.0000000 1.0000000 0.8315058 0.9581183 0.9424796
##  [4,] 0.14693584  1.0000000 1.0000000 0.8315058 0.9583482 0.9424796
##  [5,] 0.17299268  1.0000000 1.0000000 0.8315058 0.9776031 0.9521205
##  [6,] 0.29622283  1.0000000 1.0000000 0.8315058 0.9991114 0.9948533
##  [7,] 0.30867002  1.0000000 1.0000000 0.8315058 0.9993781 0.9948533
##  [8,] 0.32719994  1.0000000 1.0000000 0.8315058 0.9996388 0.9948533
##  [9,] 0.47235101  1.0000000 1.0000000 0.8315058 0.9999972 0.9995343
## [10,] 0.47731085  1.0000000 1.0000000 0.8315058 0.9999977 0.9995343
## [11,] 0.48994361  1.0000000 1.0000000 0.8315058 0.9999986 0.9995343
## [12,] 0.61940630  1.0000000 1.0000000 0.8315058 1.0000000 0.9998324
## [13,] 0.65761700  1.0000000 1.0000000 0.8315058 1.0000000 0.9998324
## [14,] 0.66415085  1.0000000 1.0000000 0.8315058 1.0000000 0.9998324
## [15,] 0.68307281  1.0000000 1.0000000 0.8315058 1.0000000 0.9998324
## [16,] 0.71643226  1.0000000 1.0000000 0.8315058 1.0000000 0.9998324
## [17,] 0.71830195  1.0000000 1.0000000 0.8315058 1.0000000 0.9998324
## [18,] 0.75139836  1.0000000 1.0000000 0.8315058 1.0000000 0.9998324
## [19,] 0.81085963  1.0000000 1.0000000 0.8315058 1.0000000 0.9998324
## [20,] 0.83150579  1.0000000 1.0000000 0.8315058 1.0000000 0.9998324
##              BH BY
##  [1,] 0.4194075  1
##  [2,] 0.6919707  1
##  [3,] 0.6919707  1
##  [4,] 0.6919707  1
##  [5,] 0.6919707  1
##  [6,] 0.8179999  1
##  [7,] 0.8179999  1
##  [8,] 0.8179999  1
##  [9,] 0.8315058  1
## [10,] 0.8315058  1
## [11,] 0.8315058  1
## [12,] 0.8315058  1
## [13,] 0.8315058  1
## [14,] 0.8315058  1
## [15,] 0.8315058  1
## [16,] 0.8315058  1
## [17,] 0.8315058  1
## [18,] 0.8315058  1
## [19,] 0.8315058  1
## [20,] 0.8315058  1
## 
## $index
##  [1]  1  7 20 16  2 13  8 18 19  9  3  6  4 11 15  5 14 17 12 10
## 
## $h0.ABH
## NULL
## 
## $h0.TSBH
## NULL

Bioinformatics

Kushan De Silva

January 3, 2018