Applied Statistics and Probabilities

Chapter 1
- Code 1-1
- Code 1-2
- Code 1-3
- Code 1-4
- Code 1-5
- Code 1-6
Chapter 2
Chapter 3
- Example 3-8
Chapter 4
Chapter 6
- Code 6-1
- Code 6-2
- Code 6-3
- Code 6-4
Chapter 7
- Code 7-1
- Code 7-1
- Code 7-2
- Code 7-3
- Code 7-4
- Code 7-5
- Code 7-6
- Code 7-7
Chapter 8
- Code 8-1
- Code 8-2
- Code 8-3
- Code 8-4
Chapter 9
Chapter 10
- Code 10-1
- Code 10-2
- Code P201
- Code 10-3
- Code 10-4
Chapter 10
- Code 11-1
- Code 11-2
- Code 11-3
- Code 11-4
- Code 11-5
Chapter 12
- Code 12-1
- Code P256
- Code P257
- Code P259
- Code P263
- Code P265
- Code P266
Chapter 13
- Code 13-1
- Code 13-2
- Code P278
- Code 13-3
- Code 13-4
- Code 13-5
- Code P285
- Code P288 - 1
- Code P288 - 2
- Code P289
- Code P293
- Code 13-6
- Code 13-7
- Code 13-8
- Code P311
- Code 13-9
- Code 13-10
- Code 13-11
- Code 13-12
- Code 13-13
- Code P331
- Code P333
- Code P337
- Code P340
- Code P341
Chapter 14

R codes

Chapter 1

Code 1-1

x <- c(23.5, 19.6, 18.4, 18.2, 22.5, 13.3, 23.5, 20.5,
       23.2, 19.5,  27.2, 22.5, 22.9, 21.8, 20.4, 17.5,
       24.3, 22.2, 15.5, 16.7)

stem(x, scale = 3, width = 200, atom = 1e-08)

## 
##   The decimal point is at the |
## 
##   13 | 3
##   14 | 
##   15 | 5
##   16 | 7
##   17 | 5
##   18 | 24
##   19 | 56
##   20 | 45
##   21 | 8
##   22 | 2559
##   23 | 255
##   24 | 3
##   25 | 
##   26 | 
##   27 | 2

Code 1-2

library(UsingR)

a <- c(19.5,23.2, 20.5, 23.5, 13.3,22.5,18.2,18.4,19.6,
23.5,16.7,15.5,22.2,24.3,17.5,20.4,21.8,22.9,22.5,27.2)

par(mar = c(2, 1.5,0, 1)) 

DOTplot(a)

Code 1-3

set.seed(15051)
x <- round(runif(1000, 0, 100))
head(x)

## [1] 73 44  4  2  3 78

quantile(x, na.rm = TRUE)

##   0%  25%  50%  75% 100% 
##    0   23   50   75  100

quantile(x, probs = 0.5)

## 50% 
##  50

quantile(x, probs = seq(0, 1, 1/3))

##        0% 33.33333% 66.66667%      100% 
##         0        34        68       100

quantile(x, probs = seq(0, 1, 1/4))

##   0%  25%  50%  75% 100% 
##    0   23   50   75  100

boxplot(Sepal.Length ~ Species, outpch = NA,
        data=iris, col='lightyellow')

Code 1-4

x1 <- c(9, 5, 2, 7, 3, 6, 4, 5)   
w1 <- c(2, 3, 1, 5, 7, 1, 3, 7)    
weighted.mean(x1, w1)

## [1] 4.965517

x <- c(8, 9, 4, 1, 6, 4, 6, 2, 5)  
exp(mean(log(x)))

## [1] 4.209156

# install.packages("psych")         
library("psych")                  
geometric.mean(x)

## [1] 4.209156

x <- c(8, 9, 4, 1, 6, 4, 6, 2, 5) 
harmonic.mean(x)

## [1] 3.249749

Code 1-5

set.seed(213) 

x_norm <- rnorm(5000) 

hist(x_norm, prob = TRUE) 
lines(density(x_norm), col = 2, lwd = 3)

# install.packages("e1071")  
library(e1071)
# library(help=e1071)

skewness(x_norm)

## [1] 0.01792106

kurtosis(x_norm)

## [1] 0.1117041

Code 1-6

x <- c(2, 7, 7, 4, 5, 1, 3)
var(x)

## [1] 5.47619

var_pop <- function(x) {  
mean((x - mean(x))^2)
}
var_pop(x)

## [1] 4.693878

sd(x)

## [1] 2.340126

Chapter 2

Code P38

factorial(4)

## [1] 24

Code P39

perm <- function(n,k){choose(n,k) * factorial(k)}
perm(5,3)

## [1] 60

Code Code 1-1

library(ggvenn)

d <- tibble(value   = c(1, 2, 3, 5, 6, 7, 8, 9),
`Set 1` = c(T, F, T, T, F, T, F, T),
`Set 2` = c(T, F, F, T, F, F, F, T),
`Set 3` = c(T, T, F, F, F, F, T, T),
`Set 4` = c(F, F, F, F, T, T, F, F))
ggvenn(d, c("Set 1", "Set 2", "Set 3", "Set 4"))

x <- list(
A = c(2,4,6), 
B = c(1,2,3),
C = c(2,3,4,5,6))
ggvenn(x, 
fill_color = c("#0073C2FF", "#EFC000FF", "red"),)

#stroke_size = 0.5, set_name_size = 3

Chapter 3

Example 3-8

dpois(c(0,3), lambda=4)

## [1] 0.01831564 0.19536681

Chapter 4

Code 4-1

pnorm(1.96)

## [1] 0.9750021

pnorm(1.96, lower.tail = FALSE)

## [1] 0.0249979

qnorm(0.05)

## [1] -1.644854

qnorm(0.975)

## [1] 1.959964

alpha <- c(0.1, 0.05, 0.01, 0.005, 0.001)
qnorm(1 - alpha)

## [1] 1.281552 1.644854 2.326348 2.575829 3.090232

pnorm(1.5) - pnorm(-1)

## [1] 0.7745375

pnorm(13, 12, 3)

## [1] 0.6305587

Code 4-2

pchisq(3.84, df = 1)

## [1] 0.9499565

pf(4.43, 3, 5)

## [1] 0.9286377

1 - pf(4.43, 3, 5)

## [1] 0.07136232

pt(2.1, 15)

## [1] 0.9734724

1 - pt(2.1, 15)

## [1] 0.02652763

alpha <- c(0.1, 0.05, 0.01, 0.005, 0.001)
qnorm(1 - alpha)

## [1] 1.281552 1.644854 2.326348 2.575829 3.090232

qt(1 - alpha, 15)

## [1] 1.340606 1.753050 2.602480 2.946713 3.732834

pnorm(1.5) - pnorm(-1)

## [1] 0.7745375

pbinom(7, 10,0.5)

## [1] 0.9453125

Code 4-3

a <- c(17.5,17.63, 17.75, 17.86, 17.9, 17.96,
       18, 18.15, 18.22, 18.25)
qqnorm(a);qqline(a, col="red")

shapiro.test(a)

## 
##  Shapiro-Wilk normality test
## 
## data:  a
## W = 0.96262, p-value = 0.8153

library(ggplot2)
library(ggpubr)
ggqqplot(a)

Chapter 6

Code 6-1

qnorm(0.05, 24.2, 3.33/6); qnorm(0.95, 24.2, 3.33/6)

## [1] 23.28711

## [1] 25.11289

Code 6-2

a <- c(2.1, 2.7, 3.4, 0.8, 3.0, 1.7)
t.test(a, conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  a
## t = 5.8901, df = 5, p-value = 0.002005
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  1.286830 3.279837
## sample estimates:
## mean of x 
##  2.283333

Code 6-3

a <- c(13.9, 13.5,12.4, 12.7, 13.0, 12.8, 12.4, 13.4, 12.9)
t.test(a, conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  a
## t = 76.485, df = 8, p-value = 9.516e-13
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  12.60805 13.39195
## sample estimates:
## mean of x 
##        13

t.test(a, mu = 13.5)

## 
##  One Sample t-test
## 
## data:  a
## t = -2.9417, df = 8, p-value = 0.01866
## alternative hypothesis: true mean is not equal to 13.5
## 95 percent confidence interval:
##  12.60805 13.39195
## sample estimates:
## mean of x 
##        13

Code 6-4

prop.test(637,1000,correct=FALSE)

## 
##  1-sample proportions test without continuity correction
## 
## data:  637 out of 1000, null probability 0.5
## X-squared = 75.076, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.6067244 0.6662270
## sample estimates:
##     p 
## 0.637

Chapter 7

Code 7-1

library(EnvStats)
ciTableMean(n1 = 100, diff.or.mean = 993, SD = 35,
sample.type = "one.sample", ci.type = "upper",
conf.level = 0.95, digits = 1)

##            Mean=993
## SD=35 [-Inf, 998.8]

Code 7-1

2*(1 - pnorm(1.25))

## [1] 0.2112995

Code 7-2

2*(1 - pnorm(1.25))

## [1] 0.2112995

Code 7-3

a <- c(2.1, 2.7, 3.4, 0.8, 3.0, 1.7)
t.test(a, mu = 2, alternative = "greater")

## 
##  One Sample t-test
## 
## data:  a
## t = 0.73089, df = 5, p-value = 0.2488
## alternative hypothesis: true mean is greater than 2
## 95 percent confidence interval:
##  1.502186      Inf
## sample estimates:
## mean of x 
##  2.283333

Code 7-4

sigma <- 1.6 
mu0 <- 1000
mu1 <- 1005
alpha <- 0.05
crit <- qnorm(1-alpha/2, mu0, sigma)
(pow <- pnorm(crit, mu1, sigma, lower.tail=FALSE))

## [1] 0.8779978

(beta <- 1-pow)

## [1] 0.1220022

xLims <- c(993, 1013)
left  <- seq(xLims[1],   crit, length.out=100)
right <- seq(crit, xLims[2],   length.out=100)
yH0r  <- dnorm(right, mu0, sigma)
yH0l  <- dnorm(left,  mu0, sigma)
yH1l  <- dnorm(left,  mu1, sigma)
yH1r  <- dnorm(right, mu1, sigma)

curve(dnorm(x, mu0, sigma), xlim=xLims, lwd=2,
col="red", xlab="x", ylab="density", ylim=c(0, 0.27),
xaxs="i")
curve(dnorm(x, mu1, sigma), lwd=2, col="blue",
add=TRUE)
polygon(c(right, rev(right)), c(yH0r, numeric(length
(right))), border=NA,col=rgb(1, 0.3, 0.3, 0.6))

x<- seq(from = 993, to = 996.8, by = 
((996.8 - 993)/(100 - 1)))
y <- dnorm(x,1000,1.6)
y<-dnorm(x,1000,1.6)
polygon(c(993,x,996.8),c(0,y,0),border=NA,  col=rgb(1, 0.3, 0.3, 0.6))
polygon(c(left,  rev(left)),  c(yH1l, numeric(length
(left))),  border=NA,col=rgb(0.3, 0.3, 1, 0.6))
polygon(c(right, rev(right)), c(yH1r, numeric(length
(right))), border=NA,
density=5, lty=2, lwd=2, angle=45, col="darkgray")

abline(v=crit, lty=1, lwd=3, col="red")
text(crit-2,0.26,adj=0,label="critical value")
text(mu0-1.2,0.2, adj=1,label="distribution under H0")
text(mu1+1.4,0.2, adj=0,label="distribution under H1")
text(crit+1.3,0.07, adj=0,label="power", cex=1.3)
text(crit-5, 0.05,  expression(beta),  cex=1.3)
text(crit-3, 0.05,  "= 0.122",  cex=1.3)
text(crit+2, 0.02, 0.025, cex=1.3)

Code 7-5

x <- (0 : 120) / 10
plot(x, dchisq(x, 4), type = "l",
ylab = "Chi -squared density")
lines(x, dchisq(x, 5))
lines(x, dchisq(x, 6))
lines(x, dchisq(x, 7))
lines(x, dchisq(x, 8))
text(c(3.1,4.3,5.3,6.4, 8.1),c(.18 ,.153 ,.137 ,.123 ,.11),
labels = c("4 df", "5 df", "6 df", "7 df", "8 df"))

Code 7-6

library(EnvStats)
x <- c(140, 210, 110, 170, 90, 180, 250, 200, 125, 75)
varTest(x = x, alternative ="two.sided", sigma.squared
=10000, conf.level = 0.05)

## 
## Results of Hypothesis Test
## --------------------------
## 
## Null Hypothesis:                 variance = 10000
## 
## Alternative Hypothesis:          True variance is not equal to 10000
## 
## Test Name:                       Chi-Squared Test on Variance
## 
## Estimated Parameter(s):          variance = 3188.889
## 
## Data:                            x
## 
## Test Statistic:                  Chi-Squared = 2.87
## 
## Test Statistic Parameter:        df = 9
## 
## P-value:                         0.06154686
## 
## 5% Confidence Interval:          LCL = 3337.267
##                                  UCL = 3547.141

Code 7-7

# lcl = (n - 1) * sigma^2 / qchisq(p = alpha / 2, df = df, lower.tail = FALSE)
# ucl = (n - 1) * sigma^2 / qchisq(p = alpha / 2, df = df, lower.tail = TRUE)

lcl = (9) * 3.06/ qchisq(p = 0.05 / 2, df = 9,
                         lower.tail = FALSE)
ucl = (9) * 3.06/ qchisq(p = 0.05 / 2, df = 9,
                         lower.tail = TRUE)
paste(round(lcl ,3),"-", round(ucl ,3))

## [1] "1.448 - 10.199"

Chapter 8

Code 8-1

chisq.test(c(115,85))

## 
##  Chi-squared test for given probabilities
## 
## data:  c(115, 85)
## X-squared = 4.5, df = 1, p-value = 0.03389

Code 8-2

observed <- c(1178, 291, 273, 156)
expected <- c(9/16, 3/16, 3/16, 1/16)
chisq.test(observed , p = expected)

## 
##  Chi-squared test for given probabilities
## 
## data:  observed
## X-squared = 54.313, df = 3, p-value = 9.623e-12

Code 8-3

x <- rep(0:4, times = c(2,6,10,10,7)) 
mean(x)

## [1] 2.4

# 3 محاسبه احتمال مقادیر از صفر تا فقط 
probs = dpois(0:3, lambda=mean(x)) 
probs

## [1] 0.09071795 0.21772309 0.26126771 0.20901416

# محاسبه احتمال مقادیر 4 و بیشتر
complement = 1-sum(probs) 
#آزمون کای‌دو
chisq.test(x=c(2,6,10,10,7), p=c(probs, complement),
simulate.p.value=TRUE)

## 
##  Chi-squared test for given probabilities with simulated p-value (based
##  on 2000 replicates)
## 
## data:  c(2, 6, 10, 10, 7)
## X-squared = 1.9162, df = NA, p-value = 0.7596

Code 8-4

drug <- matrix(c(75,65,25, 35), nrow = 2)
chisq.test(drug , correct = FALSE)

## 
##  Pearson's Chi-squared test
## 
## data:  drug
## X-squared = 2.381, df = 1, p-value = 0.1228

chisq.test(drug , correct = TRUE)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  drug
## X-squared = 1.9286, df = 1, p-value = 0.1649

Chapter 9

Code 9-1

# ورود داده‌ها
d0 <- data.frame(y1 = c(4,5.2,5.7,4.2,4.8,3.9,4.1,3,4.6,6.8), 
                y2 = c(4.4,4.2,5.2,2.7,4.7,4.8,4,2.4,3.6,5.3))

var(d0)

##           y1        y2
## y1 1.1401111 0.7423333
## y2 0.7423333 0.9667778

# آزمون نرمال بودن داده‌های هر نمونه
shapiro.test(d0$y1)

## 
##  Shapiro-Wilk normality test
## 
## data:  d0$y1
## W = 0.95567, p-value = 0.7356

shapiro.test(d0$y2)

## 
##  Shapiro-Wilk normality test
## 
## data:  d0$y2
## W = 0.92527, p-value = 0.403

library(reshape2)
d <- melt(d0)
var.test(value ~ variable, data = d)

## 
##  F test to compare two variances
## 
## data:  value by variable
## F = 1.1793, num df = 9, denom df = 9, p-value = 0.81
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2929189 4.7478136
## sample estimates:
## ratio of variances 
##            1.17929

bartlett.test(value ~ variable, data = d)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  value by variable
## Bartlett's K-squared = 0.057905, df = 1, p-value = 0.8098

t.test(d0$y1,d0$y2, data=tips, var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  d0$y1 and d0$y2
## t = 1.0893, df = 18, p-value = 0.2904
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4643413  1.4643413
## sample estimates:
## mean of x mean of y 
##      4.63      4.13

library(plyr)
summary <- ddply(d, "variable", summarize,
                 mean=mean(value), d.sd=sd(value),
                 Lower=mean - 1.96*d.sd/sqrt(NROW(d)),
                 Upper=mean + 1.96*d.sd/sqrt(NROW(d)))
summary

##   variable mean      d.sd    Lower    Upper
## 1       y1 4.63 1.0677599 4.162034 5.097966
## 2       y2 4.13 0.9832486 3.699072 4.560928

library(ggplot2)
ggplot(summary, aes(mean, y = variable)) + geom_point() +
  geom_errorbarh(aes(xmin=Lower, xmax=Upper), height = 0.2)

Code 9-2

# عبارات m1 و m2 میانگین نمونه ها هستند.  
# عبارات s1 و s2 انحراف معیار نمونه‌ها هستند.
# عبارات n1 و n2 حجم نمونه‌ها هستند. 
# فرض صفر، برابری میانگین‌ها است. 
# در صورت عدم وجود فرض برابری واریانس‌ها: 

t.test2 <- function(m1,m2,s1,s2,n1,n2,m0=0,
                    equal.variance=FALSE)
{
if( equal.variance==FALSE ) 
{
se <- sqrt( (s1^2/n1) + (s2^2/n2) )
# محاسبه درجه آزادی با رابطه ساترویت 
df <- ( (s1^2/n1 + s2^2/n2)^2 )/( (s1^2/n1)^2/(n1-1) 
        + (s2^2/n2)^2/(n2-1))
} else
{
# محاسبه انحراف معیار تفاوت دو میانگین
se <- sqrt( (1/n1 + 1/n2) * ((n1-1)*s1^2 + (n2-1)*s2^2)/(n1+n2-2)) 
df <- n1+n2-2
}      
t <- (m1-m2-m0)/se 
dat <- c(m1-m2, se, t, 2*pt(-abs(t),df))    
names(dat) <- c("Difference of means", "Std Error", "t", "p-value")
return(dat) 
}

# مشاهده می‌شود که خروجی با خروجی t.test بر روی x1 و x2 همخوانی دارد. 
tt2 <- t.test2(4000, 3700, sqrt(38000), sqrt(41200), 
               11, 11, equal.variance=TRUE)
round(tt2,4)

## Difference of means           Std Error                   t             p-value 
##            300.0000             84.8528              3.5355              0.0021

Code 9-3

d <- data.frame(group = rep(c(1,2), each = 10),
                y = c(3,7,25,10,15,6,12,25,15,7,48,44,40,38,33,21,20,12,1,18))

shapiro.test(d[d$group == 1 ,]$y)

## 
##  Shapiro-Wilk normality test
## 
## data:  d[d$group == 1, ]$y
## W = 0.89361, p-value = 0.1861

library(ggplot2)
ggplot(d, aes(sample = y, colour = factor(group ))) +
   stat_qq() +
   stat_qq_line()

var.test(y ~ group , data = d)

## 
##  F test to compare two variances
## 
## data:  y by group
## F = 0.24735, num df = 9, denom df = 9, p-value = 0.04936
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.06143758 0.99581888
## sample estimates:
## ratio of variances 
##          0.2473473

# or bartlett.test(y ~ group , data = d)

t.test(y ~ group , data=d, var.equal=FALSE)

## 
##  Welch Two Sample t-test
## 
## data:  y by group
## t = -2.7669, df = 13.196, p-value = 0.01583
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  -26.694067  -3.305933
## sample estimates:
## mean in group 1 mean in group 2 
##            12.5            27.5

Code 9-3 (Part 2)

g <- data.frame(group = rep(c("a","b"), each = 10),
                y = c(3,7,25,10,15,6,12,25,15,7,48,44,40,38,33,21,20,12,1,18))
g$group <- as.factor(g$group)


ggplot(g, aes(x = group , y = y)) +
  geom_boxplot(aes(fill = group )) +
  geom_jitter(color="black",
              position=position_jitter(0.2)) +
  theme(legend.position = c(0.1, 0.85),
        legend.text = element_text(size=10, face="bold"),
        legend.background = element_rect(fill="transparent"))

Code 9-4

a <- c(4.0,5.2,5.7,4.3,4.8,3.9,4.1,3.0,4.6,6.8)
b <- c(4.4,4.2,5.2,2.7,4.7,4.8,4.0,2.4,3.6,5.3)
t.test(a, b, paired=TRUE)

## 
##  Paired t-test
## 
## data:  a and b
## t = 2.0074, df = 9, p-value = 0.07564
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.06471457  1.08471457
## sample estimates:
## mean difference 
##            0.51

Code 9-5

prop.test(x = c(546, 475), n = c(1000, 1000))

## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(546, 475) out of c(1000, 1000)
## X-squared = 9.8043, df = 1, p-value = 0.001741
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.02629417 0.11570583
## sample estimates:
## prop 1 prop 2 
##  0.546  0.475

Chapter 10

Code 10-1

# From the integers 1:10, draw 6 randon numbers
sample(x = 1:10, size = 6)

## [1] 10  5  6  9  1  8

Code 10-2

d <- read.table(text ="
L8 L16 H8 H16
7   12 14 19
8   17 18 25
15  13 19 22
11  18 17 23
9   19 16 18
10  15 18 20", header = T)

library(reshape2)
d <- melt(d)
colnames(d) <- c('treat ', 'y')
d$treat <- factor(d$treat)
anovaCRD <- aov(y ~ treat, data = d)
summary(anovaCRD)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## treat        3  382.8  127.60   19.61 3.59e-06 ***
## Residuals   20  130.2    6.51                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

library(dplyr)
d2 <- summarise(group_by(d, treat), mean = mean(y),
                sd = sd(y))
d2 <- data.frame(d2, temp = c("H", "H", "L", "L"),
                 daylength = c("16h", "8h", "16h", "8h"))
d2$temp <- as.factor(d2$temp)
d2

##       mean       sd temp daylength
## 1 15.95833 4.722556    H       16h
## 2 15.95833 4.722556    H        8h
## 3 15.95833 4.722556    L       16h
## 4 15.95833 4.722556    L        8h

d2$daylength <- factor(d2$daylength , levels
                      = c("8h", "16h"))
# بردار برای انتخاب رن گهای دلخواه در قالب ی colourpicker نصب بسته
library(ggplot2)
ggplot(d2, aes(daylength , mean , group =temp, fill=temp)) +
  geom_col(color= "black", position = "dodge") +
  scale_fill_manual(values = c("#BFEFFF", "#EED5D2")) +
  theme_bw() +
  geom_errorbar(aes(daylength , ymin = mean -sd, ymax=mean+sd), width=0.1, position
                = position_dodge(width = 0.9)) +
  theme(axis.text.x = element_text(size=12, color = "black", angle = 45,
                                   hjust = 1), axis.text.y = element_text(size=12,
                                                                          color = "black", angle = 0, hjust = 1),
        axis.title = element_text(size=12,face="italic")) +
  ggtitle("mean of treatments") +
  xlab("daylength") + ylab("growth (cm)") +
  theme(legend.position = c(0.87, 0.85), legend.text =
          element_text(colour="black", size=10, face="bold"),
        legend.background = element_rect(fill="transparent"))

Code P201

d0 <- read.table(text ="
L8 L16 H8 H16
7   12 14 19
8   17 18 25
15  13 19 22
11  18 17 23
9   19 16 18
10  15 18 20", header = T)

library(reshape2)
d <- melt(d0)
colnames(d) <- c('treat', 'y')

par(mfrow=c(1,2))
pairwise.t.test(d$y, d$treat , p.adjust.method = "none")

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  d$y and d$treat 
## 
##     L8      L16     H8     
## L16 0.00101 -       -      
## H8  0.00012 0.37611 -      
## H16 2.6e-07 0.00131 0.01037
## 
## P value adjustment method: none

plot.design(d)
plot(TukeyHSD(anovaCRD , conf.level = 0.95), las = 2)

Code 10-3

d0 <- read.table(text = "
A  B  C  D
4  8 25 33
5 11 28 21
2  9 20 48
5 12 15 18
1  7 30 31", header = T)
library(reshape2)
d <- melt(d0)
colnames(d) <- c('treat', 'y')
# aov تجزیەی واریانسبا استفاده از تابع
anovaCRD <- aov(y ~ treat , data = d)
summary(anovaCRD)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## treat        3 2300.2   766.7   16.61 3.59e-05 ***
## Residuals   16  738.4    46.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# lm تجزیەی واریانسبا استفاده از تابع
anovaCRDlm <- lm(y ~ treat , data = d)
anova(anovaCRD)

## Analysis of Variance Table
## 
## Response: y
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## treat      3 2300.2  766.72  16.614 3.595e-05 ***
## Residuals 16  738.4   46.15                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

par(mfrow=c(2,2))
plot(anovaCRDlm)

anovaCRDlm <- lm(log10(y) ~ treat , data = d)
anova(anovaCRDlm)

## Analysis of Variance Table
## 
## Response: log10(y)
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## treat      3 3.06895 1.02298  28.096 1.298e-06 ***
## Residuals 16 0.58257 0.03641                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

par(mfrow=c(2,2))
plot(anovaCRDlm)

Code 10-4

library("car")
d0 <- read.table(text ="
H16 H8 L16 L8
209 14 62 2
411 38 37 1
187 49 13 4
50  77 28 1
130 16 49 2
263 88 35 1", header = T)

library(reshape2)
d <- melt(d0)
dim(d)
d <- data.frame(d, r = rep(1:6,4))
colnames(d) <- c('t', 'y', 'r')
d$t <- factor(d$t)
# تجزیه واریانس
b1 <- aov(y ~ t, data = d)
par(bg = NA)
par(mfrow = c(2,2))
par(mar = c(2, 4.5, 3, 1))
plot(b1)

summary(b1)
shapiro.test(b1$residuals)

Chapter 10

Code 11-1

# ایجاد داده‌ها# ایجاد داده‌ها
d <- data.frame(x = c(1,2,3,4,5), y = c(1,1,2,2,4))  

# محاسبه ضریب همبستگی و مدل رگرسیون خطی ساده بین x و y 
cor(d$x,d$y)
model <- lm(y ~ x, data = d)
# ایجاد نمودار نقطه‌ای و خط رگرسیونی
plot(d$x, d$y, xlab= 'x', pch = 21, bg = 'red')
abline(model)

# کاربرد تابع lm ساده شده برای رسم فاصله اطمینان و فاصله پیش‌بینی 
library(UsingR)
simple.lm(d$x, d$y, show.ci=TRUE, conf.level=0.90)

# تابع simple.lm در بسته UsingR رگرسیون خطی ساده را به همراه  فاصله اطمینان و فاصله پیش‌بینی برای سطح معنی‌داری مورد نظر را رسم می‌کند.

# تجزیه رگرسیون و مشاهده خروجی
summary(model)

# مشاهده جدول تجزیه واریانس 
anova(model)  

# پیش‌بینی y برای یک مقدار x به همراه فاصله اطمینان
predict(model, data.frame(x = c(3.5, 4.5)), se.fit = TRUE, interval = "confidence", level = 0.95)

# پیش‌بینی y برای یک مقدار x به همراه فاصله پیش‌بینی 
predict(model, data.frame(x = c(3.5, 4.5)), se.fit = TRUE, interval = "prediction", level = 0.95)

# رسم نمودارهای تشخیصی
par(mfrow=c(2,2))
plot(model)

Code 11-2

d <- data.frame(x = c(1,2,3,4,5,6,7,8), 
                y = c(2,3,5,8,12,17,24,38))

cor(d$x,d$y)
model <- lm(y ~ x, data = d)
summary(model)

# رسم نمودارهای تشخیصی 
par(mfrow=c(2,2))
plot(model)

par(mfrow=c(1,1))

# نمودار پراکنش داده‌ها
plot(d$x, d$y, xlab= 'x', pch = 21, bg = 'red')
abline(model)

# نرمال بودن خطاها را می‌توان با دستورات زیر بررسی کرد
# استخراج مانده‌ها از آبجکت lm و بررسی نرمال بودن آن‌ها
model$residuals
shapiro.test(model$residuals)

# خلاصه مدل و نمودار پراکنش داده‌ها پس از تبدیل لگاریتمی متغیر وابسته در زیر نشان داده شده است. مشاهده می‌شود استفاده از تبدیل لگاریتمی ضریب تبیین داده‌ها را از  0.85  به  0.996 افزایش داده است. برای مقایسه بیشتر، می‌توان نمودارهای تشخیصی را با دستور plot(model) رسم و با حالت عدم تبدیل مقایسه کرد.
model <- lm(log(y) ~ x, data = d)

plot(d$x, log(d$y), xlab= 'x', pch = 21, bg = 'red')
abline(model)

# تبدیل بهینه داده‌ها با استفاده از تبدیل Box-Cox. مشاهده می‌شود با این تبدیل مقدار ضریب تبیین از روش تبدیل لگاریتمی هم بیشتر شده و به $ 0.998 $ افزایش یافته است.
# محاسبه مقدار بهینه لامبدا برای انجام تبدیل boxcox 
# library(MASS)
# bc <- boxcox(d$y ~ d$x)
# (lambda <- bc$x[which.max(bc$y)])

# برازش خط رگرسیونی جدید پس از تبدیل boxcox
# new_model <- lm(((d$y^lambda-1)/lambda) ~ d$x)
# par(mfrow=c(2,2))
# plot(new_model)
# summary(new_model)

Code 11-3

d <- data.frame(x = c(1,2,3,4,5), y = c(1,1,2,2,4))
model1 <- lm(y ~ -1 + x, data = d)
model2 <- lm(y ~ x, data = d)

# عدم وجود تفاوت معنی‌دار بین دو مل نشان می‌دهد که عرض از مبدأ تفاوت معنی‌داری با صفر ندارد. روش دوم استفاده از تابع AIC() و BIC() به‌صورت زیر است. کوچک‌تر بودن مقدار AIC در مدل فاقد عرض از مبدأ نشان‌دهنده برتری این مدل نسبت به مدل دارای عرض از مبدأ است.
anova(model1, model2)
AIC(model1, model2)
BIC(model1, model2)

Code 11-4

data <- read.table(text ="
water nutr yield
11 50 2841
8 21 1876
10 38 2934
10 18 1552
12 43 3065
12 65 3670
5  50 2005
8  48 3215
8  17 1930
6  70 2010
9  20 3111
9  29 2882
5  15 1683
7  14 1817
13 60 4066", header = T)
model = lm(yield ~ water + nutr , data = data)
summary(model)

Code 11-5

d <- data.frame(
temp = c(29.2, 28.1, 27.2, 26.4, 26.3, 26.2, 26.2,
26, 25.9, 25.7, 24.5, 24.4, 24, 23.9, 23.9, 23.7,
23.7,23.7, 23.6, 23.5, 34.2, 23.4, 23.2, 23.1, 23.1,
23, 22.9, 22.5, 22.5, 22.4, 21.7, 21.2, 20, 19.2, 19,
19, 18.8, 18, 18, 17.4),
yield = c(2.3, 3.1, 2.8, 2.4, 3.6, 2.3, 3.8, 3.1,
2.4, 2.1, 3.5, 3.8, 3.1, 2.9, 3.2, 3, 4.5, 3.5, 3.3,
3.7, 3, 4.4, 3.1, 2.6, 4.8, 3.2, 3.3, 3.1,3.4, 3.2,
4.2, 4.5, 4.7, 5, 6.2, 6, 6.1, 7.3, 6.6, 6.2))

model1 = lm(yield ~ temp + I(temp^2), data = d)
summary(model1)

library("ggplot2")
ggplot(d, aes(temp, yield)) +    
     geom_point() + 
     geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = FALSE) # افزودن منحنی رگرسیونی

model2 = lm(yield ~ temp, data = d)
summary(model2)

Chapter 12

Code 12-1

# روش اول 
a <- c(12, 14, 25, 31, 11, 16, 52, 19, 9, 25, 15, 21, 18, 12, 15, 40)
binom.test(11, 14)

# روش دوم
library(BSDA)
SIGN.test(a, y = NULL, md = 25, alternative = "two.sided", conf.level = 0.95)

Code P256

# آزمون Wilcoxon بر روی یک نمونه 
a <- c(2.7, 2.5, 2, 1.7, 2.3, 1.9, 2.4, 2.4, 2.2, 2.6)
wilcox.test(a, mu = 2, alternative = "two.sided", conf.int = F)

Code P257

# امتیاز بوته‌های گوجه‌فرنگی: تیمار A 
a <-c(11,12,5,7,5,5,8,9,16,8)
# امتیاز بوته‌های گوجه‌فرنگی: تیمار B 
b <-c(9,16,9,6,14,16,8,10,14,18)
# ایجاد دیتا‌فریم
my_data <- data.frame(group = rep(c("a", "b"), each = 10), weight = c(a,  b))

wilcox.test(weight ~ group, data = my_data, paired = TRUE)

Code P259

# آزمون رتبه ویلکاکسون بر روی دو نمونه
a <- c(60,62,68,50,78,66)
b <- c(65,91,75,82,83,78,90)
# آزمون U من‌ویتنی بر روی دو نمونه مستقل 
wilcox.test(a,b, conf.int = T)

Code P263

# Kriskal-Walis
t20 <- c(22.2, 17.3, 21.2, 25.2, 16.4)
t25 <- c(24.1, 30.3, 27.4, 26.4, 34.8)  
t30 <- c(25.9, 18.4, 23.2, 21.9, 22.6)
t35 <- c(23.9, 24.8, 28.2, 26.4, 21.7) 
kruskal.test(list(t20,t25,t30,t35))

# همچنین می‌توان به‌صورت زیر عمل کرد: 
x <- c(t20,t25,t30,t35)
g <- factor(rep(1:4, c(5, 5, 5, 5)))
kruskal.test(x, g)

Code P265

# آزمون Friedman برای تجزیه طرح بلوک‌های کامل تصادفی
t1 <- c(7.4, 6.5, 5.6)
t2 <- c(9.8, 6.8, 6.2)
t3 <- c(7.3, 6.1, 6.4)
t4 <- c(9.5, 8.0, 7.4)
y <- c(t1,t2,t3,t4)

t <- factor(rep(1:4, each = 3))
b <- factor(rep(1:3, 4))
c <- data.frame(y,t,b)
c
friedman.test(y~t|b)

Code P266

a <- c(4, 1, 9,5, 2, 10, 7, 3, 6, 8)
b <- c(5, 2, 10, 6, 1, 9, 7, 3, 4, 8)
cor.test(a, b, method = 'spearman')

Chapter 13

Code 13-1

z <- matrix(c(3,7,2,1,4,-7,6,5,1),nrow = 3)
det(z)
solve(z)

Code 13-2

a <- matrix(c (3,2,1,-1,1,2,1,0,-1),3)
c <- matrix(c(2,1,3),3)
x <- solve(a,c)
x

Code P278

# Sample vector
vector <- c(1, 5, 3,4)
# Calculate the length of the vector
vector_length <- sqrt(sum(vector^2))
# dividing each element by the vector length
normalized_vector <- vector / vector_length
# Print the normalized vector
print(normalized_vector)

Code 13-3

library(ggplot2)

df <- data.frame(x=c(2, 1), y=c(-1, 2))

ggplot(df , aes(x,y))+
  geom_abline(aes(slope=0,intercept=0)) +
  geom_vline(xintercept = 0) +
  geom_point(aes(x=0,y=0)) +
  geom_segment(aes(x=0,y=0,xend=2,yend=1), arrow=arrow(length = unit(0.5,"cm"),
             angle=20),lineend = "butt") +
  geom_segment(aes(x=0,y=0,xend=-1,yend=2), arrow=arrow(length = unit(0.5,"cm"),angle=20),
             lineend = "butt",linejoin = "round") +
  theme_minimal ()

Code 13-4

set.seed(3)
a <- sample(1:10, size = 100, replace = TRUE ,
prob = 10:1)
hist(a, main=NULL)

b <- rbeta(1000,5,2)
c <- rbeta(1000,2,5)
d <- rbeta(1000,2,7)
hist(c, main=NULL)

hist(d, freq=FALSE , main=NULL)
lines(density(d), col='red', lwd=3)
abline(v = c(mean(d),median(d)),
col=c("green","red"),lty=c(2,2), lwd=c(3, 3))

hist(sqrt(d), main=NULL)

plot(c, d)

plot(sqrt(c), sqrt(d))

library(ggplot2)
library(GGally)
x <- data.frame (a = sample(1:10, size = 100,
replace = TRUE , prob = 10:1),
b = rbeta(100,5,2),
c = rbeta(100,2,5),
d = rbeta(100,2,7))
ggpairs(x)

library(ggplot2)
pois <- data.frame(Lambda.1=rpois(10000, 1),
Lambda.2=rpois(10000, 2),
Lambda.5=rpois(10000, 5), Lambda.10=rpois(10000, 10),
Lambda.20=rpois(10000, 20))

library(reshape2)
pois <- melt(data=pois, variable.name="Lambda",
value.name="x")
ggplot(pois , aes(x=x)) +
geom_density(aes(group=Lambda , color=Lambda ,
fill=Lambda),
adjust=4, alpha=1/2) +
theme(legend.position = c(0.87, 0.7))

pdf(file = "chisquared.pdf")
x <- (0 : 120) / 10
plot (x, dchisq(x, 4), type = "l")
lines(x, dchisq(x, 5))
lines(x, dchisq(x, 6))
text( c(3, 4.2, 5.2), c(.18, .153, .137),
labels = c("4 df", "5 df", "6 df"))
dev.off()

plot(iris[1:4], pch = 21, bg=c('red','blue','green ',
'tomato ')[ unclass(iris$Species )])

Code 13-5

d <- data.frame(x = c(1,2,3,4,5), y = c(4,7,9,15,14), z = c(2.5,1,3,2,3 ))
cov(d)
cor(d)

Code P285

library(mvtnorm)
par(bg = NA)
par(mfrow= c(1,3))
par(mar = c(4, 4, 1, 1))

# ایجاد دو مجموعه داده تصادفی بامیانگین صفر و ضریب همبستگی برابر با -0.8
d1 <- rmvnorm(500, mean = c(0,0), sigma = matrix(c(1, -.8, -.8, 1), 2,2))
# رسم نمودار نقطه‌ای دو متغیر ایجاد شده: \#}
plot(d1, xlab="x1", ylab="x2", pch = 16, col = "red")

# ایجاد دو مجموعه داده تصادفی بامیانگین صفر و ضریب همبستگی برابر با 0.8
d2 <- rmvnorm(500, mean = c(0,0), sigma = matrix(c(1, .8, .8, 1), 2,2))
# رسم نمودار نقطه‌ای دو متغیر ایجاد شده: \#}
plot(d2, xlab="x1", ylab="x2", pch = 16, col = "red")
# ایجاد دو مجموعه داده تصادفی بامیانگین صفر و ضریب همبستگی برابر با 0
d3 <- rmvnorm(500, mean = c(0,0), sigma = matrix(c(1, 0, 0, 1), 2,2))
# رسم نمودار نقطه‌ای دو متغیر ایجاد شده: \#}
plot(d3, xlab="x1", ylab="x2", pch = 16, col = "red")

Code P288 - 1

cov <- matrix(c(5, 2, 2, 2),2,2)
x <- c(2,6)
mahalanobis(x, c(4,8), cov)

Code P288 - 2

d <- data.frame(f = factor(rep(c("a","b"), each=3)),
                y1 = c(6,10,8,6,7,8),
                y2 = c(9,6,3,5,3,1))
d

t(c(10, 6) - colMeans(d[,2:3])) %*% solve(var(d[,2:3])) %*% (c(10, 6) - colMeans(d[,2:3]))

Code P289

d <- data.frame(f = factor(rep(c("a","b"), each=3)),
                y1 = c(6,10,8,6,7,8),
                y2 = c(9,6,3,5,3,1))

# Calculate Mahalanobis distance
d$mahalanobis <- mahalanobis(d[,2:3], colMeans(d[,2:3]), cov(d[,2:3]))

# Calculate p-values using the Mahalanobis distances
d$pvalue <- pchisq(d$mahalanobis, df = 1, lower.tail=FALSE)

# Display the data frame
d

library(psych)
mardia(d[,2:3], plot=F)

describe(d[,2:3])

library(mvnormtest)
mshapiro.test(t(d[,2:3]))

outlier(d[,2:3], plot = TRUE, bad = 1,na.rm = TRUE)

Code P293

a <- data.frame(y1 = c(0.803, 1.037, -3.255, 2.82, 0.667, -2.072),
                y2 = c(-1.29, -0.172, -0.446, 2.641, 0.291, -1.024))

# محاسبه ماتریس واریانس-کواریانس نمونه 
s <- var(a)
round(s,2)
b <- eigen(s)
b

b <- data.frame(a=c(1,2,3,4,5,4,3),b=c(4,5,4,3,2,3,2),
                c=c(8,7,6,7,9,1,2))
A <- cov(b)
x <- eigen(A)
round(crossprod(x$vectors),3)

Code 13-6

# آزمون  T2   هتلینگ با استفاده از تابع manova
df <- data.frame(f = c("a","a","a","b","b","b"), 
                 y1 = c(6,10,8,6,7,8),
                 y2 = c(9,6,3,5,3,1))
m <- manova(cbind(y1,y2) ~ f, df)
summary(m,test = c("Pillai", "Wilks", "Hotelling-Lawley", "Roy"))

summary.aov(m)

# آزمون T2 هتلینگ برای مقایسه نمونه با یک بردار میانگین خاص
library(ICSNP)

muH0 <- c(0, 0) 
HotellingsT2(cbind(df$y1, df$y2) ~ df$f, mu = muH0)

muH0 <- c(-1, 2) 
HotellingsT2(cbind(df$y1, df$y2) ~ df$f, mu = muH0)

Code 13-7

library(ggplot2)
library(reshape2)
a <- data.frame(g=c(1,1,1,2,2,3,3,3),
y1=c(9,6,9,0,2,3,1,2),
y2=c(3,2,7,4,0,8,9,7))
a$g <- as.factor(a$g)

am <- melt(a, id = 'g')
am$variable <- factor(am$variable)
ggplot(am, aes(x=g, y=value, fill=variable)) +
geom_boxplot(position = "dodge2")

m <- manova(cbind(y1,y2) ~g, a)
summary(m,test = "Wilks")

summary(m,test = "Pillai")

summary(m,test = "Hotelling-Lawley")
summary(m,test = "Roy")
summary.aov(m)

# Pairwise comparison of the treatments
m1 <- manova(cbind(y1,y2) ~g, a, subset = g %in% c(1, 2))
summary(m1,test = c("Pillai", "Wilks", "Hotelling-Lawley", "Roy"))

m2 <- manova(cbind(y1,y2) ~g, a, subset = g %in% c(1, 3))
summary(m2,test = c("Pillai", "Wilks", "Hotelling-Lawley", "Roy"))

# Calculating SSCP matrixes
library(HoRM)
m <- manova(cbind(y1,y2) ~g, a)
SSCP.fn(fits = m)

Code 13-8

d <- read.table(text = "
group var1 var2 var3
a 10.59 10.48 4.73
a 11.05  9.02 5.33
a  9.94 10.77 3.46
a  7.99 15.56 5.15
a  9.08 14.76 5.46
b  9.38 14.41 1.46
b  9.52 12.51 3.00
b 10.81 11.12 5.19
b 10.68  9.76 2.25
b 10.95 11.61 3.97", header = TRUE)

ds <- scale(d[-1], center = T, scale = T)
cov <- round(cov(ds),2)
cov
eigen(cov)

pca <- prcomp(d[-1], center = TRUE , scale. = TRUE)

library(ggplot2)
library(ggbiplot )
library(ggrepel)

pca$label <- rownames(d)
ggbiplot(pca, obs.scale = 1, var.scale = 1, alpha = 0,
         groups = d$group) +
  theme(legend.direction = 'vertical', legend.position = 'right') +
  geom_point(size = 3, shape = 21, aes(fill = groups, color = groups)) +
  geom_text_repel(label=pca$label , colour = "red", size = 4, box.padding = unit(0.35, "lines"),
                  point.padding = unit(0.3, "lines")) +
  xlim(-3,3) + ylim(-2.1,2) +
  scale_color_manual(values=c("black", "black")) +
  scale_fill_manual(values = c("red", "green")) +
  theme_bw() +
  theme(legend.direction = 'vertical', legend.position = c(0.9, 0.85))

# Using factoextra package
require(factoextra )
library(dplyr )
library(Rcpp)

df <- read.table(text = "
group var1 var2 var3
a 10.59 10.48 4.73
a 11.05  9.02 5.33
a  9.94 10.77 3.46
a  7.99 15.56 5.15
a  9.08 14.76 5.46
b  9.38 14.41 1.46
b  9.52 12.51 3.00
b 10.81 11.12 5.19
b 10.68  9.76 2.25
b 10.95 11.61 3.97", header = TRUE)

res.pca <- prcomp(df[,2:4], scale = TRUE)
fviz_eig(res.pca)

fviz_pca_ind(res.pca, col.ind = "cos2", # Color by the quality of representation
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE) # Avoid text overlapping

fviz_pca_var(res.pca ,
             col.var = "contrib", # Color by contributions to the PC
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE) # Avoid text overlapping

fviz_pca_biplot(res.pca , repel = TRUE ,
                col.var = "#2E9FDF", # Variables color
                col.ind = "#696969") + # Individuals color
theme_bw()

Code P311

# Heatmap
library(pheatmap)
library(ggplot2)
library(ggplotify)
library(patchwork)

d <- read.table(text = "
group  var1   var2 var3
a 10.59 10.48 4.73
a 11.05  9.02 5.33
a  9.94 10.77 3.46
a  7.99 15.56 5.15
a  9.08 14.76 5.46
b  9.38 14.41 1.46
b  9.52 12.51 3.00
b 10.81 11.12 5.19
b 10.68  9.76 2.25
b 10.95 11.61 3.97", header = TRUE)
df <- d[,-1]
annotation <- data.frame(group = d[,1])

p1 <- pheatmap(df, annotation_rows = annotation,
               cluster_rows = T, cluster_cols = T,
               border_color='white ')

p2 <- pheatmap(cor(df), cluster_rows=T,
               cluster_cols = T, border_color = 'white ')

as.ggplot(p1) + as.ggplot(p2)

Code 13-9

b <- data.frame(a=(1:10),
y1=c(2.5,0.5,2.2,1.9,3.1,2.3,2,1,1.5,1.1),
y2=c(2.4,0.7,2.9,2.2,3,2.7,1.6,1.1,1.6,0.9), row.names = 1)

dist <- dist(b, method = "euclidean")

# the ape package can be used for PCoA
library(ape)
PCOA <- pcoa(dist)

# plot the eigenvalues
barplot(PCOA$values$Relative_eig[1:10])

biplot.pcoa(PCOA)

biplot.pcoa(PCOA , b)

Code 13-10

m <- matrix(ncol=5,nrow=5)
m[lower.tri(m)] <- c(9,3,6,11,7,5,10,9,2,8)
diag(m) <- 0
d <- as.dist(m, diag = TRUE)
cl <- hclust(d, method = "average")
plot(as.dendrogram(cl))

Code 13-11

a <- read.table(text = "
10 5
20 20
30 10
30 15
5 10")
d <- dist(a, method = "euclidean")
d
library(factoextra)
dend1 <- hcut(d, k=2, hc_method = "centroid")
dend2 <- hcut(d, k=2, hc_method = "ward.D2")
par(mfrow = c(1,2))
plot(as.dendrogram(dend1))
plot(as.dendrogram(dend2))

Code 13-12

d <- read.table(text = "
    place   TN NN   PL  FLW   FLL  SPS   SL
HawramaP1 10.4  3 28.5 10.1 20.50 11.6  7.2
IG12638P2 16.2  3 29.1 15.3 29.90 23.9  9.9
IG12768P2 15.4  4 25.8 11.9 25.20 25.3  9.6
IG12769P1 24.9  4 16.0 14.6 26.30 22.6  9.4
IG88725P1 17.9  4 26.0 11.3 21.70 19.4  7.3
IG88732P1 16.7  4 15.3 17.1 27.50 26.0  7.4
IG88751P1 16.7  4 19.3 12.9 26.20 21.4  8.2
IG88753P2  9.8  4 25.3 13.0 25.50 23.3  8.7
IG88882P2 20.1  3 40.2 16.7 22.20 21.0  9.6
KalakanP3  8.5  3 23.6  6.9 20.00 19.3  7.1
 MamokhP3 20.0  4 25.6  8.8 20.30 18.3  7.2
SeydsalP3 12.4  3 36.0  9.0 16.50 10.3  6.1
TarkhanP4 16.2  4 24.9  7.1 18.25 17.2  6.6
TazabadP4 13.4  3 26.8 13.7 22.35 20.0 11.9
TazabadP4 14.7  4 24.8  7.0 17.80 20.5  7.3
TirgaraP4 18.4  4 19.2  6.3 19.70 19.4  6.9
ArandanP4 12.2  4 23.2  5.6 17.30 17.1  6.4
BainjubP4 12.7  3 23.2  6.7 23.30 19.5  7.5
 ChatanP4 12.4  4 23.1 11.4 21.70 17.8  7.7
HanaglaP4  9.8  4 19.4  7.1 23.40 16.7  6.4
Ac49656P4 14.6  3 40.9 12.2 22.10 29.6 13.3
aI49657P4 20.7  5 15.0 11.7 24.90 24.9  6.6
aI49659P4 18.1  4 18.7 15.8 25.30 24.4  8.4
aI49660P4 14.0  5 23.8 18.6 28.10 27.8  9.1
aI49661P4 19.4  3 21.9  9.8 18.60 15.2  6.8
aI49662P4 24.8  4 31.6 13.4 21.20 29.5 13.6
aI49663P4 18.3  3 37.4 13.0 20.90 27.7 11.9
aI49664P4  9.9  4 24.2 19.6 24.70 28.9 11.3
aI49665P4 12.4  3 27.6 12.9 22.40 25.1 11.1
aI49666P5  8.7  3 32.1 13.0 20.00 26.8 11.7
IG19200P5  8.7  3 18.9  9.4 21.90 15.0  6.7", header = T)

d
library(factoextra)
dend <- hcut(d[,-1], k = 4, hc_metric = "euclidean",
             hc_method = "complete")
library(dendextend)
dend <- set(as.dendrogram(dend), "labels_cex", 0.8)
cols_branches <- c("red","green","blue","brown","black")
dend <- color_branches(dend , k=5, col = cols_branches)
plot(dend)

ann <- data.frame(place =(d[,1]))
rownames(ann) <- rownames(d)
labels_colors(dend) <- as.numeric(
  as.factor(ann[,1])[ order.dendrogram(dend )])
labels(dend) <- as.character(
  rownames(ann ))[ order.dendrogram(dend)]
dend %>% color_branches(dend , k=5,
                        groupLabels = T) %>% plot(horiz = F)

library(circlize)
circlize_dendrogram(dend , dend_track_height = 0.2,
                    groupLabels = T)

Code 13-13

d0 <- read.table(text = "
    place   TN NN   PL  FLW   FLL  SPS   SL
HawramaP1 10.4  3 28.5 10.1 20.50 11.6  7.2
IG12638P2 16.2  3 29.1 15.3 29.90 23.9  9.9
IG12768P2 15.4  4 25.8 11.9 25.20 25.3  9.6
IG12769P1 24.9  4 16.0 14.6 26.30 22.6  9.4
IG88725P1 17.9  4 26.0 11.3 21.70 19.4  7.3
IG88732P1 16.7  4 15.3 17.1 27.50 26.0  7.4
IG88751P1 16.7  4 19.3 12.9 26.20 21.4  8.2
IG88753P2  9.8  4 25.3 13.0 25.50 23.3  8.7
IG88882P2 20.1  3 40.2 16.7 22.20 21.0  9.6
KalakanP3  8.5  3 23.6  6.9 20.00 19.3  7.1
 MamokhP3 20.0  4 25.6  8.8 20.30 18.3  7.2
SeydsalP3 12.4  3 36.0  9.0 16.50 10.3  6.1
TarkhanP4 16.2  4 24.9  7.1 18.25 17.2  6.6
TazabadP4 13.4  3 26.8 13.7 22.35 20.0 11.9
Tazaba2P4 14.7  4 24.8  7.0 17.80 20.5  7.3
TirgaraP4 18.4  4 19.2  6.3 19.70 19.4  6.9
ArandanP4 12.2  4 23.2  5.6 17.30 17.1  6.4
BainjubP4 12.7  3 23.2  6.7 23.30 19.5  7.5
 ChatanP4 12.4  4 23.1 11.4 21.70 17.8  7.7
HanaglaP4  9.8  4 19.4  7.1 23.40 16.7  6.4
Ac49656P4 14.6  3 40.9 12.2 22.10 29.6 13.3
aI49657P4 20.7  5 15.0 11.7 24.90 24.9  6.6
aI49659P4 18.1  4 18.7 15.8 25.30 24.4  8.4
aI49660P4 14.0  5 23.8 18.6 28.10 27.8  9.1
aI49661P4 19.4  3 21.9  9.8 18.60 15.2  6.8
aI49662P4 24.8  4 31.6 13.4 21.20 29.5 13.6
aI49663P4 18.3  3 37.4 13.0 20.90 27.7 11.9
aI49664P4  9.9  4 24.2 19.6 24.70 28.9 11.3
aI49665P4 12.4  3 27.6 12.9 22.40 25.1 11.1
aI49666P5  8.7  3 32.1 13.0 20.00 26.8 11.7
IG19200P5  8.7  3 18.9  9.4 21.90 15.0  6.7", header = T)

d <- d0[-1]
k1 <- kmeans(d, centers = 3)
k2 <- kmeans(d, centers = 4)

plot(d[ ,c(3, 4)], xlab = "Pedancle length",
     ylab = "Flag leaf width", pch = 16, col =k2$cluster, cex = 2)
text(d[ ,3], d[ ,4], labels = d0$place, pos = 3)

library(ggplot2)
ggplot(d0, aes(PL, FLW, color = factor(k1$cluster), label = place)) +
  geom_point () +
  geom_text()

library(factoextra)
rownames(d) <- d0$place
p1 <- fviz_cluster(k1, data = d,repel = T,ellipse = T) + ggtitle("k = 3")
p2 <- fviz_cluster(k2, data = d,repel = T,ellipse = T) + ggtitle("k = 4")
library(gridExtra)
grid.arrange(p1, p2, nrow = 1)

fviz_cluster(k1, data = d, repel = T, ellipse = T)

Code P331

d <- read.table(text = "
7.151  8.322 12.006 2.195 1.162
7.453 10.166 12.200 2.272 1.734
8.618 10.592 13.577 4.592 2.552
5.976  9.425 10.802 2.213 0.906
6.841 10.324 12.366 2.915 1.995
5.961 11.171 11.049 3.813 3.651")

factor <- factanal(d, factors = 2, scores
                   = "regression",rotation = "varimax")
scores <- factor$scores
scores <- round(data.frame(scores), digits = 3)
scores

loadings <- subset(factor$loadings[1:5 ,])
loadings <- round(loadings , digits = 3)
loadings <- data.frame(loadings)
loadings
library(ggrepel)
library(ggplot2)
p1 <- ggplot(loadings , aes(Factor1, Factor2)) +
  geom_point(size =3) +
  geom_text_repel(label=rownames(loadings), colour
                  = "gray", size = 4, box.padding = unit(0.35, "lines"),
                  point.padding = unit(0.3, "lines")) +
  theme_bw()
p2 <- ggplot(scores , aes(Factor1, Factor2)) +
  geom_point(size = 3, color="brown") +
  geom_text_repel(label = rownames(scores), color
                  = "gray2", size = 4, box.padding = unit(0.35, "lines"),
                  point.padding = unit(0.3, "lines")) + theme_bw()
library(gridExtra)
grid.arrange(p1, p2, nrow = 1)

Code P333

train <- read.table(text = "
g x1  x2
1 -2  5
1  0  3
1 -1  1
2  0  6
2  2  4
2  1  2
3  1 -2
3  0  0
3 -1 -4", header = T)

library(MASS)
model <- lda(train$g ~., train , prior = c(1,1,1)/3)
model

predict <- predict(model, train)
predict

# Graph
attributes(model)
ldahist(data = predict$x[,1], g = train$g)

plot(model)

# Dotplot
test <- data.frame(g = 2, x1 = 1, x2 = 3)
predict2 <- predict(model , test)
names(predict2)
predict2$class
table(predict2$class , test$g)
cat("LDA accuracy is:",
sum(predict2$class == test$g)/ length(test$g))

# :رسم نمودار نقطەای اعضا براساس توابع تشخیصاول و دوم  
library(ggplot2)
library(ggrepel)

lda.data <- cbind(train , predict(model )$x)
lda.data$g <- as.factor(lda.data$g)
ggplot(lda.data , aes(LD1, LD2)) +
geom_point(aes(color = g), size = 3) +
geom_text_repel(label=rownames(lda.data),
colour = "black", size = 4, box.padding
= unit(0.35, "lines"),
point.padding = unit(0.3, "lines")) + theme_bw()

Code P337

a <- read.table(text = "
   rep cr  PWWl DWWl GPl  GSl   GHl  PWWf DWWf GPf   GSf GHf
  1 So 27.71 2.79  87 0.02 65.05  8.92 1.32  75 0.006 144
  2 So 26.88 2.50  77 0.02 43.92  4.84 1.28  50 0.005  43
  3 So 27.50 2.66  56 0.03 51.20  8.14 1.38  79 0.028 151
  4 So 23.15 2.71  91 0.02 54.79 10.53 1.57  63 0.009 130
  1 Co 12.82 1.32 100 0.03 40.04  3.57 0.59  92 0.006 162
  2 Co 12.17 1.22  85 0.04 39.46  4.16 0.83  80 0.016 146
  3 Co 14.49 1.28  90 0.06 44.63  4.26 0.69  55 0.015 136
  4 Co 13.18 1.18  95 0.03 51.55  4.59 0.72  72 0.020 151
  1 Sa  6.22 0.43  91 0.02 43.85  7.89 0.99  99 0.007 133
  2 Sa  5.50 0.40  77 0.02 47.49  5.55 0.80  66 0.041 153
  3 Sa  5.55 0.40  47 0.03 43.80  4.68 0.53  55 0.014 132
  4 Sa  5.86 0.49  86 0.02 51.44  8.25 0.91  71 0.049 127
  1 Ba  4.37 0.47  90 0.02 46.67  9.11 1.47  78 0.007 153
  2 Ba  4.69 0.47  89 0.02 43.09  6.46 1.06  79 0.023 147
  3 Ba  4.71 0.46  54 0.03 36.00  4.57 0.75  82 0.022 131
  4 Ba  4.10 0.41  95 0.02 40.32  2.61 0.49  74 0.034 143
  1 Ra  0.80 0.03  84 0.03 52.00  2.63 0.38  82 0.006 154
  2 Ra  0.93 0.04  85 0.02 53.17  1.81 0.28  51 0.005  33
  3 Ra  0.79 0.04  50 0.02 58.53  2.46 0.33  88 0.008 135
  4 Ra  0.74 0.04  86 0.02 45.24  1.77 0.24  77 0.009 144
  1 Su  8.73 0.74 100 0.03 31.53  8.32 0.86  67 0.006  86
  2 Su  9.13 0.82  89 0.03 32.95 12.43 1.29  54 0.005  26
  3 Su  8.21 0.68  94 0.05 53.71  7.19 0.77  61 0.052 103
  4 Su  8.41 0.77  96 0.03 39.10  7.81 0.75  72 0.023 118
  1 Wh  4.03 0.43 100 0.03 21.82  5.05 0.93  75 0.005 153
  2 Wh  3.81 0.41  97 0.03 65.13  3.27 0.71  42 0.005  58
  3 Wh  3.47 0.33  97 0.03 41.83  5.19 0.90  61 0.044 156
  4 Wh  4.39 0.48  97 0.02 24.50  3.62 0.71  79 0.014 173", header = T)

cancor(a[,3:7], a[,8:12])

Code P340

d <- read.table(text = "
Gen  Fl Chl Stem Height PodN PodL Yield   HI Thaus SeedP
  1 204  44  8.6   92.3 32.6  5.3  21.6 35.6   3.3  14.7
  2 202  43  8.4   91.1 28.3  5.9  14.3 24.8   3.8  11.4
  3 189  34 10.6   81.3 20.4  5.1  15.6 32.3   2.9   8.7
  4 205  42  8.5   93.7 27.8  5.6  21.9 37.6   4.9  13.8
  5 185  36 13.4   86.4 19.3  5.5  36.6 38.9   3.9  11.5
  6 190  31 11.3   75.1 17.7  5.4  14.5 32.9   3.1   8.4
  7 207  45  6.7   87.5 22.9  5.2  11.7 23.9   4.4   9.0
  8 206  45  7.7   90.2 26.0  6.0  13.3 21.2   4.3   8.8
  9 206  44  6.7   89.7 22.0  5.6  14.8 31.9   3.1  14.2
 10 185  34 10.6   89.3 20.4  5.1  15.6 32.3   2.9   8.7
 11 206  42  8.5   92.7 27.8  5.6  21.9 37.6   4.9  13.8", header = T)
d
factor <- factanal(d[-1], factors = 2, scores =
                     "regression", rotation = "varimax")
scores <- factor$scores
scores <- round(data.frame(scores), digits = 3); scores
loadings <- subset(factor$loadings[1:10 ,])
loadings <- round(loadings , digits = 3)
loadings <- data.frame(loadings)
loadings

library(ggrepel)
library(ggplot2)
p1 <- ggplot(loadings , aes(Factor1, Factor2)) +
  geom_point(size =3, color="red") +
  geom_text_repel(label=rownames(loadings),
                  colour = "blue", size = 4,
                  box.padding = unit(0.35, "lines"),
                  point.padding = unit(0.3, "lines")) +
  theme_bw()
p2 <- ggplot(scores , aes(Factor1, Factor2)) +
  geom_point(size = 3, color="red") +
  geom_text_repel(label = rownames(scores),
                  color = "blue", size = 4,
                  box.padding = unit(0.35, "lines"),
                  point.padding = unit(0.3, "lines")) + theme_bw()
library(gridExtra)
grid.arrange(p1, p2, nrow = 1)

Code P341

library(MASS)
train <- read.table(text = "
BCHR  A  B  C   D E  F  G  H
   a 12 23  4  53 2 12 10  5
   a  8 23 10  90 3 22 27 12
   a 16 24  8  95 1 27 17  8
   a 11 22  3  36 1  5 52  3
   a 17 23 14  86 2 39 24  8
   a  9 26 14  45 1 22 25  9
   a 15 22  7  78 2 17 11  5
   a  9 20  9  97 3 12 13  6
   b 18 19 12  80 3 20 11  6
   b 10 23 10 121 2 23 22 10
   b 16 21  7 143 2 26 16  8
   b 17 22  9  75 2 13 10  6
   b 20 21 17 199 1 50 26 15
   b 16 22 14  99 2 42 25 12
   b 23 21  7 164 1 24 11  7
   b 30 20 11 140 2 25 14  9", header = T)

test <- read.table(text = "
BCHR A  B C  D  E  F  G H
   a 7 18 8 90  2 30 50 6", header = T)

#model construction and prediction
model.1 <- lda(train$BCHR ~., train , prior = c(1,1)/2)
#model.2 <- qda(train$BCHR ~., train , prior = c(1,1)/2)

model.1
predict <- predict(model.1, test)
names(predict)
predict$class
table(predict$class , test$BCHR)

#accuracy
cat("LDA accuracy is:",
    sum(predict$class == test$BCHR)/ length(test$BCHR))
attributes(model.1)
p <- predict(model.1, train)
ldahist(data = p$x[,1], g = train$BCHR)

Chapter 14

Code P345

65 + 32
3 * 1.7 - 2
4 * 3 + 6/0.2
5^2
2 > 1
2 >= 1
4 != 4
14 %/% 3
14 %% 3
3 == 3 & 4 != 4
3 == 3 && 4 != 4
is.na(6)
!is.na(6)

# برای مشاهده‌ی راهنما در مورد یک کاراکتر خاص باید علامت سوال و به دنبال آن کرکتر را درون علامت نقل قول تایپ و اجرا کرد
?'%%'
?'%/%'
?"!"

Code P346

a <- 3
b <- "plant" 
1+2
a**3 
a^3 
x <- c(1,3,5) 
log(2)
log10(2)  
sqrt(16)
abs(2.25) 
round(2.227, digits = 2)
?round

Code P347

v1 <- c(1, 2, 3)
v2 <- c("A", "B", "C")
(c(v1, v2))
data.frame(v1, v2)
1:10
letters[1:5]
LETTERS[1:5]
which(c(6,2,1,3,4,2,1) > 3)
x <- seq(5, 15, 3)
x

rep(1:4, 3)
rep(1:4, each=3)
rep(1:4, each=2, times=2)

s <- c('male','male','female','male','female')
s == 'male'

Code P349

x <- matrix(c(1,3,2,1,-1,-2,1,1,1),3,3)
x
solve(x)
round(solve(x) %*%x, 3)

a1 <- sample(1:20, size = 9, replace = FALSE)
a1

a2 <- matrix(a1, nrow = 3)
a2

row.names(a2) <- seq(1:3)
colnames(a2) <- paste0("col", seq(1:3))
head(a2)

col4 <- c(rep("C1", 2),rep("C2", 1)) # ایجاد برداری با سه عنصر 
b <- data.frame(a2, col4) # افزودن ستون چهارم به
head(b)

Code P350

a <- matrix (1:10 , 2)
b <- as.vector(a)
b

a <- matrix(1:16, 4)
b <- matrix(1:16, 4)
c <- a + b
c <- as.data.frame(c)
c

# برای محاسبه‌ی جذر ماتریس می‌توان از بسته اگزم به‌صورت زیر استفاده کرد:
library(expm)
sqrtm(b)

Code P351

a <- list(x1 = 'red', x2 = 'green', x3 = c(21,32,11))
a
a[1]
a[3]
a$x3


# آرایه در اصل یک بردار چندبعدی و متشکل از داده‌های هم‌نوع است. دسترسی به عناصر مختلف یک آرایه از طریق کروشه ساده [] امکان‌پذیر است که در آن اولین و دومین عنصر شماره ردیف و ستون و سایر عناصر، ابعاد خارجی‌تر را نشان می‌دهند. تفاوت اصلی آرایه با ماترکس این است که ماتریس به دو بعد محدود است ولی آرایه می‌تواند چندین بعد داشته باشد.

theArray <- array(1:12, dim=c(2, 3, 2))
theArray

theArray[1, , ]

theArray[1, , 1]

theArray[, , 1]

Page 352

.Library        # مشاهده مسیر معمول نصب بسته‌ها  
.libPaths()     # مشاهده مسیرهای حاضر و معمول نصب بسته‌ها  

# دستورات زیر بسته‌های وابسته به یک بسته مورد نظر (برای مثال دی سک تو) را نشان می‌دهد:

if (!require("pacman")) install.packages("pacman")
pacman::p_depends(DESeq2, local = TRUE)

# برای ارجاع به آر یا یک بسته خاص (برای مثال دی سک تو) دستورات زیر را اجرا کنید:
citation()
citation("DESeq2")

# library()         # مشاهده بسته‌های نصب شده         
library("MASS")   # فراخوانی یک بسته

Page 353-354

# Installing from Github
library("devtools")
#install_github("yanlinlin82/ggvenn")

# Help of a package
# library(help = e1071)       

# Data sets of a package
# data(package = "datasets")       

class(iris)        # نمایش نوع داده‌ها
dim(iris)          # نمایش تعداد ردیف و ستون داده‌ها 
head(iris)         # نمایش شش ردیف اول داده‌ها
tail(iris)         # نمایش شش ردیف آخر داده‌ها  
str(iris)          # نمایش ساختار داده
summary(iris)      # نمایش چندک‌ها برای ستون‌های مختلف 
boxplot(iris[,1])  # نمایش نمودار جعبه‌ای ستون

hist(iris[,1])     # رسم هیستوگرام ستون اول

Page 355-357

# saving image directly in the computer
par(mfrow = c(2,2))
boxplot(iris[,1], col = "cyan")
boxplot(iris[,2], col = "green")
boxplot(iris[,3], col = "magenta")
boxplot(iris[,4], col = "gold")

dev.off()

# استخراج زیرمجموعه‌ای از جدول
x <- data.frame(col1 =c(1,3,2), col2 =c(1,-2,1), col3 =c(1,1,-1))
row.names(x) <- paste0("row", 21:23)
# نمایش دادن ردیف row23 
x[3,]
x[c("row21", "row23"),]
x[,3]
# نمایش ستون‌های خاصی به همان شکل ستونی 
x[,3, drop = F]
# مشخص کردن ردیف‌هایی که مقدار ستون اول آن‌ها بزرگ‌تر یا مساوی 2 است. 
x[,1] >= 2
# نمایش ردیف‌هایی که مقدار ستون یک آن‌ها بزرگ‌تر یا مساوی 2 است 
x[x[,1] >= 2,]

# نمایش شماره ردیف‌هایی که مقدار ستون یک آن‌ها بزرگ‌تر یا مساوی 2 است 
which(x[,1] >= 2)
# نمایش ردیف‌هایی که جمع آن‌ها بزرگ‌تر یا مساوی 3  است. 
x[rowSums(x) >= 3,]
# نمایش شماره ردیفی که 3 را دارد. 
grep("3", row.names(x))
# در مورد ساختار داده، می‌توان از علامت دلار هم برای مشخص کردن ستون‌ها استفاده کرد.
x[x$col1 >= 2,]

# Filtering using the subset function
subset(x, col1 >=2 & col3 < 0)

# Filtering based on a part of the word
library(data.table)
x[rownames(x)  %like%  "3" | rownames(x)  %like%  "2",]


library(data.table)
q <- c(2,3)
x[x$col1 %in% q,]

# توابع rowSums  و  colSums 
x <- matrix(c(0,0,0,0,0,0,2,3,4,5,6,9,1,2,3,0),4, byrow = T)
x

# استخراج ردیف‌های دارای جمع بزرگ‌تر از صفر 
x[rowSums(x) > 0,]

rowMeans(x)

# استخراج ردیف‌هایی که حداقل دو عضو بزرگ‌تر از صفر در آن‌ها وجو دارد 
x[rowSums(x > 0) > 2,]

Page 358

# setwd("C:/Users/user/Desktop/")

# data <- read.csv("C:/Users/user/Desktop/Data.csv", header = TRUE)

# data <- read.table("Data.csv", sep=";", header = TRUE)

library(openxlsx)
# a <- read.xlsx("E:/data.xlsx",2,colIndex = c(1:4), header=T)

# write.table(b, "outputname.txt", quote=F, se = "\t", na="NA")

Page 359 (Description statistics)

a <- matrix (c(5,4,3,6,7,1,2,3,2,8,7,3,9,4,8,1), 4) 
a

prod(a[,1])       # محاسبه حاصل‌ضرب همه مقادیر  
rank(a)           # محاسبه‌ی رتبه مقادیر یک بردار  
colnames(a)       # مشاهده یا تعریف نام ستون‌های یک ماتریس  
sum(a)            # جمع مقادیر  
sum(a[,3])
sd(a)             # محاسبه انحراف معیار  
sd(a[,4])              
sd(a, na.rm = T)  # حذف مقادیر گم‌شده قبل از محاسبه  
x <- c(2,5,NA,3,7) 
is.na(x)          # آیا ان ای در بردار ایکس وجود دارد؟  
any(is.na(x))
which(is.na(x))   # کدام عدد ان ای است؟ عدد سوم!  
which(x == 7)     # کدام عدد در بردار ایکس برابر با 7 است؟ عدد پنجم!

Page 360-364 (apply functions)

a <- matrix (c(1,1,2,2,7,1,2,3,2,8,7,3,9,4,8,1), 4)
a

apply(a, 2, sum,  na.rm = TRUE) # عدد 2 ستون‌ها و عدد 1 ردیف‌ها را در تابع اپلای مشخص می‌کند!  

apply(a[,3:4], 2, sum,  na.rm = TRUE)   

apply(a[,3:4], 2, mean, na.rm = TRUE)

m <- matrix(c(1:10),nrow = 5, ncol = 6)
sq = function(x) x^2
apply(m, c(1,2), sq)

m <- matrix(c(1:10),nrow = 5, ncol = 6)
n <- lapply(m, mean)
n2 <- unlist(n)
n2

d <- as.data.frame(m)
nd <- lapply(d, mean)
nd2 <- unlist(nd)
nd2

sapply(iris[,1:3], mean) # تابع سپلای برای ساختار داده یا لیست به کار می‌رود.  

b <- as.data.frame(a) # در کل از هر تابعی می‌توان در درون توابع اپلای استفاده کرد.   
sapply(b[3:4], function(x) x=x*x) 

a <- matrix(1:21, 7)
a <- as.data.frame(a)

inf <- function(x) list(mean = mean(x),
                        se = sd(x)/sqrt(length(x)), length = length(x))
sapply(a, inf)

t(sapply(a, inf))


par(bg = NA)                                                
par(mfrow=c(2,2)) 
par(mar = c(2, 4.5, 1.8, 1))    
hist(a[,1], breaks = 12, main = NULL, xlab = NULL,
     col = 'pink')
hist(a[,2], breaks = 12, main = NULL, xlab = NULL,
     col = 'pink')
hist(b[,1], breaks = 12, main = NULL, xlab = NULL,
     col = 'lightgreen')
hist(b[,2], breaks = 12, main = NULL, xlab = NULL,
     col = 'lightgreen')

tapply(iris$Sepal.Length, iris$Species, mean) # محاسبه میانگین طول سپال برای گونه‌های مختلف   

# محاسبه‌ی میانگین، انحراف معیار، خطای معیار ستون‌های جدول با استفاده از تابع اپلای:

# df <- read.table("1.txt",header=T)
# df

# mean <- apply(df[4:7], 2, FUN = mean, na.rm = T)
# mean

# cbind(mean)

# sd <- sapply(df[4:7], FUN = sd, na.rm = T)
# n <- sapply(df[4:7], FUN = length)
# cbind (mean , sd, n)

library(dplyr)
b <- summarise(group_by(iris, Species),
               mPW = mean(Petal.Width), mSW = mean(Sepal.Width),
               varPW = var(Petal.Width), varSW = var(Sepal.Width))
b

Page 364-365

# مصورسازی در R
# ggplot(data = <DATA>, mapping = aes(<MAPPINGS>)) 
# +  <GEOM_FUNCTION>()

# a <- read.csv(file = url("http://r-bio.github.io/data/surveys_complete.csv"))

# group_by and summarise
library(dplyr)
b <- summarise(group_by(iris, Species), 
               m =  mean(Petal.Width),
               var = var(Petal.Width))
b
# b$species <- as.factor(b$Species)

# library(ggplot2)
# ggplot(b) + 
#  geom_col(aes(Species, m, fill = species)) +
#  geom_errorbar(aes(Species, ymin= m-(var/10), ymax=m+(var/10)), width = 0.2)

Page 366

# برنامه‌نویسی  
y <- c()
for (i in 1:5){
  y[i] = i^2
}
y


for (i in 1:5) {
  i = i^2; print(i)
}

# نکته: می‌توان یک ساختار کنترلی مثلاً شرط یا شرایطی را قبل از عملکرد مورد نظر اضافه نمود:

for (i in 1:5) if(i > 3){
  i = i**2; print(i)
}

# مثال:
x <- c(2,4,6,8,10)
y <- c()
for (i in 1:5) {
  y[i] = x[i]^2
}
y

# مثال:
a <- 1:10
b <- c()
for (i in 1:length(a)){
  ifelse(i < 6, b[i] <- a[i]^2, b[i] <- a[i]^4)
}
b

# مثال: رساندن اعداد یک جدول به توان سه:

a <- matrix(1:9, 3, 3)
b <- matrix(0, 3, 3)
for(i in 1:ncol(a)){
  {
    for (j in 1:nrow(a)) {
      b[i,j] <- a[i,j]^3
    }
  }
}
b

# مثال: ایجاد جدول ضرب:

a <- 1:9
b <- matrix(0, 9, 9)
for(i in 1:9){
  {
    for (j in 1:9) {
      b[i,j] <- a[i]*a[j]
    }
  }
}
b

a <- c("ali", "puya", "saeed", "karim", "jamal")
b <- c(18, 15, 14, 19, 17)
for (i in 1:5) {cat(a[i],"got", b[i], "in Statistics", "\n")}


x = 2
while (x < 100) {x = x^2; print(x)}

# if and else
if (1 == 1) {
  print(1)
} else {
  print("NotCorrect")
}

# using ifelse
a <- 1:10
ifelse(a%%2 == 0, 'even', 'odd')

# repeat
x <- 10
repeat{x <- x/2; print(x); if(x < 1) break()}

# Writing a function
fact <- function(n){
  a = 1
  for(i in 1:n)
    a = a*i
  a
}
fact(5)


Number <- function(x){
  if(x  == 1) 
    "One"
  if(x  == 2) 
    "Two" 
  if(x  == 3) 
    "Three"
  else("NotFound")
}

# ifelse
med <- function(x) {
  sort(x)
  ifelse(length(x)%%2 == 0, (x[(length(x)/2)] +  x[(length(x)/2)+1])/2,
         x[((length(x)-1)/2)+1])
}

med(c(5,7,9,12,15,3))

# converting DNA to RNA
dna2rna <- function(x){
  if(!is.character(x))
    stop("needs character sequence")
  m <- toupper(x)
  chartr("T", "U", m)
}
a <- "ttagcggcgaatagcatcgcgaatccctagc" 
dna2rna(a)

# مثال: تابعی که تابع خاص دیگری را بر ستون‌های عددی  در یک ساختار داده اعمال  و از ستون‌های حرفی صرف‌‌‌نظر می‌کند:

meanDf <- function(x){
  b <- c()
  for(i in 1:ncol(x))
    if(class(x[,i]) != 'character')
      b[i] <- i
  b <- na.omit(b)
  sapply(x[,b], mean)
}
df <- data.frame(a = 1:5, b = letters[1:5], c = 5)
meanDf(df)

# ایجاد محدودیت برای آرگومان‌های یک تابع. 

agec <- function(age, gender){
  if(class(age) != "numeric") 
    stop("inter your age in the first argument!")
  if(gender != "male" & gender != "female")
    stop("gender should be male or female!")
  
  if (age > 12)
    if (age < 20)
      if (gender == "male")
        print("You are a teenage boy.")
  else
    print("You are a teenage girl.")
  else
    print("You are already an adult.")
  else
    print("You are still too young.")
}

d <- read.table(text = "
   group  var1  var2 var3
early.v1 10.59 10.48 4.73
early.v2 11.05  9.02 5.33
early.v3  9.94 10.77 3.46
early.v4  7.99 15.56 5.15
early.v5  9.08 14.76 5.46
late.v1   9.38 14.41 1.46
late.v2   9.52 12.51 3.00
late.v3  10.81 11.12 5.19
late.v4  10.68  9.76 2.25
late.v5  10.95 11.61 3.97", header = TRUE)

Applied Statistics and Probabilities

Ghader Mirzaghaderi

2025-01-22

Chapter 1

Code 1-1

Code 1-2

Code 1-3

Code 1-4

Code 1-5

Code 1-6

Chapter 2

Code P38

Code P39

Code Code 1-1

Chapter 3

Example 3-8

Chapter 4

Code 4-1

Code 4-2

Code 4-3

Chapter 6

Code 6-1

Code 6-2

Code 6-3

Code 6-4

Chapter 7

Code 7-1

Code 7-1

Code 7-2

Code 7-3

Code 7-4

Code 7-5

Code 7-6

Code 7-7

Chapter 8

Code 8-1

Code 8-2

Code 8-3

Code 8-4

Chapter 9

Code 9-1

Code 9-2

Code 9-3

Code 9-3 (Part 2)

Code 9-4

Code 9-5

Chapter 10

Code 10-1

Code 10-2

Code P201

Code 10-3

Code 10-4

Chapter 10

Code 11-1

Code 11-2

Code 11-3

Code 11-4

Code 11-5

Chapter 12

Code 12-1

Code P256

Code P257

Code P259

Code P263

Code P265

Code P266

Chapter 13

Code 13-1

Code 13-2

Code P278

Code 13-3

Code 13-4

Code 13-5

Code P285

Code P288 - 1

Code P288 - 2

Code P289

Code P293

Code 13-6

Code 13-7