R codes
x <- c(23.5, 19.6, 18.4, 18.2, 22.5, 13.3, 23.5, 20.5,
23.2, 19.5, 27.2, 22.5, 22.9, 21.8, 20.4, 17.5,
24.3, 22.2, 15.5, 16.7)
stem(x, scale = 3, width = 200, atom = 1e-08)
##
## The decimal point is at the |
##
## 13 | 3
## 14 |
## 15 | 5
## 16 | 7
## 17 | 5
## 18 | 24
## 19 | 56
## 20 | 45
## 21 | 8
## 22 | 2559
## 23 | 255
## 24 | 3
## 25 |
## 26 |
## 27 | 2
library(UsingR)
a <- c(19.5,23.2, 20.5, 23.5, 13.3,22.5,18.2,18.4,19.6,
23.5,16.7,15.5,22.2,24.3,17.5,20.4,21.8,22.9,22.5,27.2)
par(mar = c(2, 1.5,0, 1))
DOTplot(a)
set.seed(15051)
x <- round(runif(1000, 0, 100))
head(x)
## [1] 73 44 4 2 3 78
quantile(x, na.rm = TRUE)
## 0% 25% 50% 75% 100%
## 0 23 50 75 100
quantile(x, probs = 0.5)
## 50%
## 50
quantile(x, probs = seq(0, 1, 1/3))
## 0% 33.33333% 66.66667% 100%
## 0 34 68 100
quantile(x, probs = seq(0, 1, 1/4))
## 0% 25% 50% 75% 100%
## 0 23 50 75 100
boxplot(Sepal.Length ~ Species, outpch = NA,
data=iris, col='lightyellow')
x1 <- c(9, 5, 2, 7, 3, 6, 4, 5)
w1 <- c(2, 3, 1, 5, 7, 1, 3, 7)
weighted.mean(x1, w1)
## [1] 4.965517
x <- c(8, 9, 4, 1, 6, 4, 6, 2, 5)
exp(mean(log(x)))
## [1] 4.209156
# install.packages("psych")
library("psych")
geometric.mean(x)
## [1] 4.209156
x <- c(8, 9, 4, 1, 6, 4, 6, 2, 5)
harmonic.mean(x)
## [1] 3.249749
set.seed(213)
x_norm <- rnorm(5000)
hist(x_norm, prob = TRUE)
lines(density(x_norm), col = 2, lwd = 3)
# install.packages("e1071")
library(e1071)
# library(help=e1071)
skewness(x_norm)
## [1] 0.01792106
kurtosis(x_norm)
## [1] 0.1117041
x <- c(2, 7, 7, 4, 5, 1, 3)
var(x)
## [1] 5.47619
var_pop <- function(x) {
mean((x - mean(x))^2)
}
var_pop(x)
## [1] 4.693878
sd(x)
## [1] 2.340126
factorial(4)
## [1] 24
perm <- function(n,k){choose(n,k) * factorial(k)}
perm(5,3)
## [1] 60
library(ggvenn)
d <- tibble(value = c(1, 2, 3, 5, 6, 7, 8, 9),
`Set 1` = c(T, F, T, T, F, T, F, T),
`Set 2` = c(T, F, F, T, F, F, F, T),
`Set 3` = c(T, T, F, F, F, F, T, T),
`Set 4` = c(F, F, F, F, T, T, F, F))
ggvenn(d, c("Set 1", "Set 2", "Set 3", "Set 4"))
x <- list(
A = c(2,4,6),
B = c(1,2,3),
C = c(2,3,4,5,6))
ggvenn(x,
fill_color = c("#0073C2FF", "#EFC000FF", "red"),)
#stroke_size = 0.5, set_name_size = 3
dpois(c(0,3), lambda=4)
## [1] 0.01831564 0.19536681
pnorm(1.96)
## [1] 0.9750021
pnorm(1.96, lower.tail = FALSE)
## [1] 0.0249979
qnorm(0.05)
## [1] -1.644854
qnorm(0.975)
## [1] 1.959964
alpha <- c(0.1, 0.05, 0.01, 0.005, 0.001)
qnorm(1 - alpha)
## [1] 1.281552 1.644854 2.326348 2.575829 3.090232
pnorm(1.5) - pnorm(-1)
## [1] 0.7745375
pnorm(13, 12, 3)
## [1] 0.6305587
pchisq(3.84, df = 1)
## [1] 0.9499565
pf(4.43, 3, 5)
## [1] 0.9286377
1 - pf(4.43, 3, 5)
## [1] 0.07136232
pt(2.1, 15)
## [1] 0.9734724
1 - pt(2.1, 15)
## [1] 0.02652763
alpha <- c(0.1, 0.05, 0.01, 0.005, 0.001)
qnorm(1 - alpha)
## [1] 1.281552 1.644854 2.326348 2.575829 3.090232
qt(1 - alpha, 15)
## [1] 1.340606 1.753050 2.602480 2.946713 3.732834
pnorm(1.5) - pnorm(-1)
## [1] 0.7745375
pbinom(7, 10,0.5)
## [1] 0.9453125
a <- c(17.5,17.63, 17.75, 17.86, 17.9, 17.96,
18, 18.15, 18.22, 18.25)
qqnorm(a);qqline(a, col="red")
shapiro.test(a)
##
## Shapiro-Wilk normality test
##
## data: a
## W = 0.96262, p-value = 0.8153
library(ggplot2)
library(ggpubr)
ggqqplot(a)
qnorm(0.05, 24.2, 3.33/6); qnorm(0.95, 24.2, 3.33/6)
## [1] 23.28711
## [1] 25.11289
a <- c(2.1, 2.7, 3.4, 0.8, 3.0, 1.7)
t.test(a, conf.level = 0.95)
##
## One Sample t-test
##
## data: a
## t = 5.8901, df = 5, p-value = 0.002005
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 1.286830 3.279837
## sample estimates:
## mean of x
## 2.283333
a <- c(13.9, 13.5,12.4, 12.7, 13.0, 12.8, 12.4, 13.4, 12.9)
t.test(a, conf.level = 0.95)
##
## One Sample t-test
##
## data: a
## t = 76.485, df = 8, p-value = 9.516e-13
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 12.60805 13.39195
## sample estimates:
## mean of x
## 13
t.test(a, mu = 13.5)
##
## One Sample t-test
##
## data: a
## t = -2.9417, df = 8, p-value = 0.01866
## alternative hypothesis: true mean is not equal to 13.5
## 95 percent confidence interval:
## 12.60805 13.39195
## sample estimates:
## mean of x
## 13
prop.test(637,1000,correct=FALSE)
##
## 1-sample proportions test without continuity correction
##
## data: 637 out of 1000, null probability 0.5
## X-squared = 75.076, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.6067244 0.6662270
## sample estimates:
## p
## 0.637
library(EnvStats)
ciTableMean(n1 = 100, diff.or.mean = 993, SD = 35,
sample.type = "one.sample", ci.type = "upper",
conf.level = 0.95, digits = 1)
## Mean=993
## SD=35 [-Inf, 998.8]
2*(1 - pnorm(1.25))
## [1] 0.2112995
2*(1 - pnorm(1.25))
## [1] 0.2112995
a <- c(2.1, 2.7, 3.4, 0.8, 3.0, 1.7)
t.test(a, mu = 2, alternative = "greater")
##
## One Sample t-test
##
## data: a
## t = 0.73089, df = 5, p-value = 0.2488
## alternative hypothesis: true mean is greater than 2
## 95 percent confidence interval:
## 1.502186 Inf
## sample estimates:
## mean of x
## 2.283333
sigma <- 1.6
mu0 <- 1000
mu1 <- 1005
alpha <- 0.05
crit <- qnorm(1-alpha/2, mu0, sigma)
(pow <- pnorm(crit, mu1, sigma, lower.tail=FALSE))
## [1] 0.8779978
(beta <- 1-pow)
## [1] 0.1220022
xLims <- c(993, 1013)
left <- seq(xLims[1], crit, length.out=100)
right <- seq(crit, xLims[2], length.out=100)
yH0r <- dnorm(right, mu0, sigma)
yH0l <- dnorm(left, mu0, sigma)
yH1l <- dnorm(left, mu1, sigma)
yH1r <- dnorm(right, mu1, sigma)
curve(dnorm(x, mu0, sigma), xlim=xLims, lwd=2,
col="red", xlab="x", ylab="density", ylim=c(0, 0.27),
xaxs="i")
curve(dnorm(x, mu1, sigma), lwd=2, col="blue",
add=TRUE)
polygon(c(right, rev(right)), c(yH0r, numeric(length
(right))), border=NA,col=rgb(1, 0.3, 0.3, 0.6))
x<- seq(from = 993, to = 996.8, by =
((996.8 - 993)/(100 - 1)))
y <- dnorm(x,1000,1.6)
y<-dnorm(x,1000,1.6)
polygon(c(993,x,996.8),c(0,y,0),border=NA, col=rgb(1, 0.3, 0.3, 0.6))
polygon(c(left, rev(left)), c(yH1l, numeric(length
(left))), border=NA,col=rgb(0.3, 0.3, 1, 0.6))
polygon(c(right, rev(right)), c(yH1r, numeric(length
(right))), border=NA,
density=5, lty=2, lwd=2, angle=45, col="darkgray")
abline(v=crit, lty=1, lwd=3, col="red")
text(crit-2,0.26,adj=0,label="critical value")
text(mu0-1.2,0.2, adj=1,label="distribution under H0")
text(mu1+1.4,0.2, adj=0,label="distribution under H1")
text(crit+1.3,0.07, adj=0,label="power", cex=1.3)
text(crit-5, 0.05, expression(beta), cex=1.3)
text(crit-3, 0.05, "= 0.122", cex=1.3)
text(crit+2, 0.02, 0.025, cex=1.3)
x <- (0 : 120) / 10
plot(x, dchisq(x, 4), type = "l",
ylab = "Chi -squared density")
lines(x, dchisq(x, 5))
lines(x, dchisq(x, 6))
lines(x, dchisq(x, 7))
lines(x, dchisq(x, 8))
text(c(3.1,4.3,5.3,6.4, 8.1),c(.18 ,.153 ,.137 ,.123 ,.11),
labels = c("4 df", "5 df", "6 df", "7 df", "8 df"))
library(EnvStats)
x <- c(140, 210, 110, 170, 90, 180, 250, 200, 125, 75)
varTest(x = x, alternative ="two.sided", sigma.squared
=10000, conf.level = 0.05)
##
## Results of Hypothesis Test
## --------------------------
##
## Null Hypothesis: variance = 10000
##
## Alternative Hypothesis: True variance is not equal to 10000
##
## Test Name: Chi-Squared Test on Variance
##
## Estimated Parameter(s): variance = 3188.889
##
## Data: x
##
## Test Statistic: Chi-Squared = 2.87
##
## Test Statistic Parameter: df = 9
##
## P-value: 0.06154686
##
## 5% Confidence Interval: LCL = 3337.267
## UCL = 3547.141
# lcl = (n - 1) * sigma^2 / qchisq(p = alpha / 2, df = df, lower.tail = FALSE)
# ucl = (n - 1) * sigma^2 / qchisq(p = alpha / 2, df = df, lower.tail = TRUE)
lcl = (9) * 3.06/ qchisq(p = 0.05 / 2, df = 9,
lower.tail = FALSE)
ucl = (9) * 3.06/ qchisq(p = 0.05 / 2, df = 9,
lower.tail = TRUE)
paste(round(lcl ,3),"-", round(ucl ,3))
## [1] "1.448 - 10.199"
chisq.test(c(115,85))
##
## Chi-squared test for given probabilities
##
## data: c(115, 85)
## X-squared = 4.5, df = 1, p-value = 0.03389
observed <- c(1178, 291, 273, 156)
expected <- c(9/16, 3/16, 3/16, 1/16)
chisq.test(observed , p = expected)
##
## Chi-squared test for given probabilities
##
## data: observed
## X-squared = 54.313, df = 3, p-value = 9.623e-12
x <- rep(0:4, times = c(2,6,10,10,7))
mean(x)
## [1] 2.4
# 3 محاسبه احتمال مقادیر از صفر تا فقط
probs = dpois(0:3, lambda=mean(x))
probs
## [1] 0.09071795 0.21772309 0.26126771 0.20901416
# محاسبه احتمال مقادیر 4 و بیشتر
complement = 1-sum(probs)
#آزمون کایدو
chisq.test(x=c(2,6,10,10,7), p=c(probs, complement),
simulate.p.value=TRUE)
##
## Chi-squared test for given probabilities with simulated p-value (based
## on 2000 replicates)
##
## data: c(2, 6, 10, 10, 7)
## X-squared = 1.9162, df = NA, p-value = 0.7596
drug <- matrix(c(75,65,25, 35), nrow = 2)
chisq.test(drug , correct = FALSE)
##
## Pearson's Chi-squared test
##
## data: drug
## X-squared = 2.381, df = 1, p-value = 0.1228
chisq.test(drug , correct = TRUE)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: drug
## X-squared = 1.9286, df = 1, p-value = 0.1649
# ورود دادهها
d0 <- data.frame(y1 = c(4,5.2,5.7,4.2,4.8,3.9,4.1,3,4.6,6.8),
y2 = c(4.4,4.2,5.2,2.7,4.7,4.8,4,2.4,3.6,5.3))
var(d0)
## y1 y2
## y1 1.1401111 0.7423333
## y2 0.7423333 0.9667778
# آزمون نرمال بودن دادههای هر نمونه
shapiro.test(d0$y1)
##
## Shapiro-Wilk normality test
##
## data: d0$y1
## W = 0.95567, p-value = 0.7356
shapiro.test(d0$y2)
##
## Shapiro-Wilk normality test
##
## data: d0$y2
## W = 0.92527, p-value = 0.403
library(reshape2)
d <- melt(d0)
var.test(value ~ variable, data = d)
##
## F test to compare two variances
##
## data: value by variable
## F = 1.1793, num df = 9, denom df = 9, p-value = 0.81
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2929189 4.7478136
## sample estimates:
## ratio of variances
## 1.17929
bartlett.test(value ~ variable, data = d)
##
## Bartlett test of homogeneity of variances
##
## data: value by variable
## Bartlett's K-squared = 0.057905, df = 1, p-value = 0.8098
t.test(d0$y1,d0$y2, data=tips, var.equal=TRUE)
##
## Two Sample t-test
##
## data: d0$y1 and d0$y2
## t = 1.0893, df = 18, p-value = 0.2904
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4643413 1.4643413
## sample estimates:
## mean of x mean of y
## 4.63 4.13
library(plyr)
summary <- ddply(d, "variable", summarize,
mean=mean(value), d.sd=sd(value),
Lower=mean - 1.96*d.sd/sqrt(NROW(d)),
Upper=mean + 1.96*d.sd/sqrt(NROW(d)))
summary
## variable mean d.sd Lower Upper
## 1 y1 4.63 1.0677599 4.162034 5.097966
## 2 y2 4.13 0.9832486 3.699072 4.560928
library(ggplot2)
ggplot(summary, aes(mean, y = variable)) + geom_point() +
geom_errorbarh(aes(xmin=Lower, xmax=Upper), height = 0.2)
# عبارات m1 و m2 میانگین نمونه ها هستند.
# عبارات s1 و s2 انحراف معیار نمونهها هستند.
# عبارات n1 و n2 حجم نمونهها هستند.
# فرض صفر، برابری میانگینها است.
# در صورت عدم وجود فرض برابری واریانسها:
t.test2 <- function(m1,m2,s1,s2,n1,n2,m0=0,
equal.variance=FALSE)
{
if( equal.variance==FALSE )
{
se <- sqrt( (s1^2/n1) + (s2^2/n2) )
# محاسبه درجه آزادی با رابطه ساترویت
df <- ( (s1^2/n1 + s2^2/n2)^2 )/( (s1^2/n1)^2/(n1-1)
+ (s2^2/n2)^2/(n2-1))
} else
{
# محاسبه انحراف معیار تفاوت دو میانگین
se <- sqrt( (1/n1 + 1/n2) * ((n1-1)*s1^2 + (n2-1)*s2^2)/(n1+n2-2))
df <- n1+n2-2
}
t <- (m1-m2-m0)/se
dat <- c(m1-m2, se, t, 2*pt(-abs(t),df))
names(dat) <- c("Difference of means", "Std Error", "t", "p-value")
return(dat)
}
# مشاهده میشود که خروجی با خروجی t.test بر روی x1 و x2 همخوانی دارد.
tt2 <- t.test2(4000, 3700, sqrt(38000), sqrt(41200),
11, 11, equal.variance=TRUE)
round(tt2,4)
## Difference of means Std Error t p-value
## 300.0000 84.8528 3.5355 0.0021
d <- data.frame(group = rep(c(1,2), each = 10),
y = c(3,7,25,10,15,6,12,25,15,7,48,44,40,38,33,21,20,12,1,18))
shapiro.test(d[d$group == 1 ,]$y)
##
## Shapiro-Wilk normality test
##
## data: d[d$group == 1, ]$y
## W = 0.89361, p-value = 0.1861
library(ggplot2)
ggplot(d, aes(sample = y, colour = factor(group ))) +
stat_qq() +
stat_qq_line()
var.test(y ~ group , data = d)
##
## F test to compare two variances
##
## data: y by group
## F = 0.24735, num df = 9, denom df = 9, p-value = 0.04936
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.06143758 0.99581888
## sample estimates:
## ratio of variances
## 0.2473473
# or bartlett.test(y ~ group , data = d)
t.test(y ~ group , data=d, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: y by group
## t = -2.7669, df = 13.196, p-value = 0.01583
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## -26.694067 -3.305933
## sample estimates:
## mean in group 1 mean in group 2
## 12.5 27.5
g <- data.frame(group = rep(c("a","b"), each = 10),
y = c(3,7,25,10,15,6,12,25,15,7,48,44,40,38,33,21,20,12,1,18))
g$group <- as.factor(g$group)
ggplot(g, aes(x = group , y = y)) +
geom_boxplot(aes(fill = group )) +
geom_jitter(color="black",
position=position_jitter(0.2)) +
theme(legend.position = c(0.1, 0.85),
legend.text = element_text(size=10, face="bold"),
legend.background = element_rect(fill="transparent"))
a <- c(4.0,5.2,5.7,4.3,4.8,3.9,4.1,3.0,4.6,6.8)
b <- c(4.4,4.2,5.2,2.7,4.7,4.8,4.0,2.4,3.6,5.3)
t.test(a, b, paired=TRUE)
##
## Paired t-test
##
## data: a and b
## t = 2.0074, df = 9, p-value = 0.07564
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.06471457 1.08471457
## sample estimates:
## mean difference
## 0.51
prop.test(x = c(546, 475), n = c(1000, 1000))
##
## 2-sample test for equality of proportions with continuity correction
##
## data: c(546, 475) out of c(1000, 1000)
## X-squared = 9.8043, df = 1, p-value = 0.001741
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.02629417 0.11570583
## sample estimates:
## prop 1 prop 2
## 0.546 0.475
# From the integers 1:10, draw 6 randon numbers
sample(x = 1:10, size = 6)
## [1] 10 5 6 9 1 8
d <- read.table(text ="
L8 L16 H8 H16
7 12 14 19
8 17 18 25
15 13 19 22
11 18 17 23
9 19 16 18
10 15 18 20", header = T)
library(reshape2)
d <- melt(d)
colnames(d) <- c('treat ', 'y')
d$treat <- factor(d$treat)
anovaCRD <- aov(y ~ treat, data = d)
summary(anovaCRD)
## Df Sum Sq Mean Sq F value Pr(>F)
## treat 3 382.8 127.60 19.61 3.59e-06 ***
## Residuals 20 130.2 6.51
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(dplyr)
d2 <- summarise(group_by(d, treat), mean = mean(y),
sd = sd(y))
d2 <- data.frame(d2, temp = c("H", "H", "L", "L"),
daylength = c("16h", "8h", "16h", "8h"))
d2$temp <- as.factor(d2$temp)
d2
## mean sd temp daylength
## 1 15.95833 4.722556 H 16h
## 2 15.95833 4.722556 H 8h
## 3 15.95833 4.722556 L 16h
## 4 15.95833 4.722556 L 8h
d2$daylength <- factor(d2$daylength , levels
= c("8h", "16h"))
# بردار برای انتخاب رن گهای دلخواه در قالب ی colourpicker نصب بسته
library(ggplot2)
ggplot(d2, aes(daylength , mean , group =temp, fill=temp)) +
geom_col(color= "black", position = "dodge") +
scale_fill_manual(values = c("#BFEFFF", "#EED5D2")) +
theme_bw() +
geom_errorbar(aes(daylength , ymin = mean -sd, ymax=mean+sd), width=0.1, position
= position_dodge(width = 0.9)) +
theme(axis.text.x = element_text(size=12, color = "black", angle = 45,
hjust = 1), axis.text.y = element_text(size=12,
color = "black", angle = 0, hjust = 1),
axis.title = element_text(size=12,face="italic")) +
ggtitle("mean of treatments") +
xlab("daylength") + ylab("growth (cm)") +
theme(legend.position = c(0.87, 0.85), legend.text =
element_text(colour="black", size=10, face="bold"),
legend.background = element_rect(fill="transparent"))
d0 <- read.table(text ="
L8 L16 H8 H16
7 12 14 19
8 17 18 25
15 13 19 22
11 18 17 23
9 19 16 18
10 15 18 20", header = T)
library(reshape2)
d <- melt(d0)
colnames(d) <- c('treat', 'y')
par(mfrow=c(1,2))
pairwise.t.test(d$y, d$treat , p.adjust.method = "none")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: d$y and d$treat
##
## L8 L16 H8
## L16 0.00101 - -
## H8 0.00012 0.37611 -
## H16 2.6e-07 0.00131 0.01037
##
## P value adjustment method: none
plot.design(d)
plot(TukeyHSD(anovaCRD , conf.level = 0.95), las = 2)
d0 <- read.table(text = "
A B C D
4 8 25 33
5 11 28 21
2 9 20 48
5 12 15 18
1 7 30 31", header = T)
library(reshape2)
d <- melt(d0)
colnames(d) <- c('treat', 'y')
# aov تجزیەی واریانسبا استفاده از تابع
anovaCRD <- aov(y ~ treat , data = d)
summary(anovaCRD)
## Df Sum Sq Mean Sq F value Pr(>F)
## treat 3 2300.2 766.7 16.61 3.59e-05 ***
## Residuals 16 738.4 46.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# lm تجزیەی واریانسبا استفاده از تابع
anovaCRDlm <- lm(y ~ treat , data = d)
anova(anovaCRD)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## treat 3 2300.2 766.72 16.614 3.595e-05 ***
## Residuals 16 738.4 46.15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2))
plot(anovaCRDlm)
anovaCRDlm <- lm(log10(y) ~ treat , data = d)
anova(anovaCRDlm)
## Analysis of Variance Table
##
## Response: log10(y)
## Df Sum Sq Mean Sq F value Pr(>F)
## treat 3 3.06895 1.02298 28.096 1.298e-06 ***
## Residuals 16 0.58257 0.03641
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2))
plot(anovaCRDlm)
library("car")
d0 <- read.table(text ="
H16 H8 L16 L8
209 14 62 2
411 38 37 1
187 49 13 4
50 77 28 1
130 16 49 2
263 88 35 1", header = T)
library(reshape2)
d <- melt(d0)
dim(d)
d <- data.frame(d, r = rep(1:6,4))
colnames(d) <- c('t', 'y', 'r')
d$t <- factor(d$t)
# تجزیه واریانس
b1 <- aov(y ~ t, data = d)
par(bg = NA)
par(mfrow = c(2,2))
par(mar = c(2, 4.5, 3, 1))
plot(b1)
summary(b1)
shapiro.test(b1$residuals)
# ایجاد دادهها# ایجاد دادهها
d <- data.frame(x = c(1,2,3,4,5), y = c(1,1,2,2,4))
# محاسبه ضریب همبستگی و مدل رگرسیون خطی ساده بین x و y
cor(d$x,d$y)
model <- lm(y ~ x, data = d)
# ایجاد نمودار نقطهای و خط رگرسیونی
plot(d$x, d$y, xlab= 'x', pch = 21, bg = 'red')
abline(model)
# کاربرد تابع lm ساده شده برای رسم فاصله اطمینان و فاصله پیشبینی
library(UsingR)
simple.lm(d$x, d$y, show.ci=TRUE, conf.level=0.90)
# تابع simple.lm در بسته UsingR رگرسیون خطی ساده را به همراه فاصله اطمینان و فاصله پیشبینی برای سطح معنیداری مورد نظر را رسم میکند.
# تجزیه رگرسیون و مشاهده خروجی
summary(model)
# مشاهده جدول تجزیه واریانس
anova(model)
# پیشبینی y برای یک مقدار x به همراه فاصله اطمینان
predict(model, data.frame(x = c(3.5, 4.5)), se.fit = TRUE, interval = "confidence", level = 0.95)
# پیشبینی y برای یک مقدار x به همراه فاصله پیشبینی
predict(model, data.frame(x = c(3.5, 4.5)), se.fit = TRUE, interval = "prediction", level = 0.95)
# رسم نمودارهای تشخیصی
par(mfrow=c(2,2))
plot(model)
d <- data.frame(x = c(1,2,3,4,5,6,7,8),
y = c(2,3,5,8,12,17,24,38))
cor(d$x,d$y)
model <- lm(y ~ x, data = d)
summary(model)
# رسم نمودارهای تشخیصی
par(mfrow=c(2,2))
plot(model)
par(mfrow=c(1,1))
# نمودار پراکنش دادهها
plot(d$x, d$y, xlab= 'x', pch = 21, bg = 'red')
abline(model)
# نرمال بودن خطاها را میتوان با دستورات زیر بررسی کرد
# استخراج ماندهها از آبجکت lm و بررسی نرمال بودن آنها
model$residuals
shapiro.test(model$residuals)
# خلاصه مدل و نمودار پراکنش دادهها پس از تبدیل لگاریتمی متغیر وابسته در زیر نشان داده شده است. مشاهده میشود استفاده از تبدیل لگاریتمی ضریب تبیین دادهها را از 0.85 به 0.996 افزایش داده است. برای مقایسه بیشتر، میتوان نمودارهای تشخیصی را با دستور plot(model) رسم و با حالت عدم تبدیل مقایسه کرد.
model <- lm(log(y) ~ x, data = d)
plot(d$x, log(d$y), xlab= 'x', pch = 21, bg = 'red')
abline(model)
# تبدیل بهینه دادهها با استفاده از تبدیل Box-Cox. مشاهده میشود با این تبدیل مقدار ضریب تبیین از روش تبدیل لگاریتمی هم بیشتر شده و به $ 0.998 $ افزایش یافته است.
# محاسبه مقدار بهینه لامبدا برای انجام تبدیل boxcox
# library(MASS)
# bc <- boxcox(d$y ~ d$x)
# (lambda <- bc$x[which.max(bc$y)])
# برازش خط رگرسیونی جدید پس از تبدیل boxcox
# new_model <- lm(((d$y^lambda-1)/lambda) ~ d$x)
# par(mfrow=c(2,2))
# plot(new_model)
# summary(new_model)
d <- data.frame(x = c(1,2,3,4,5), y = c(1,1,2,2,4))
model1 <- lm(y ~ -1 + x, data = d)
model2 <- lm(y ~ x, data = d)
# عدم وجود تفاوت معنیدار بین دو مل نشان میدهد که عرض از مبدأ تفاوت معنیداری با صفر ندارد. روش دوم استفاده از تابع AIC() و BIC() بهصورت زیر است. کوچکتر بودن مقدار AIC در مدل فاقد عرض از مبدأ نشاندهنده برتری این مدل نسبت به مدل دارای عرض از مبدأ است.
anova(model1, model2)
AIC(model1, model2)
BIC(model1, model2)
data <- read.table(text ="
water nutr yield
11 50 2841
8 21 1876
10 38 2934
10 18 1552
12 43 3065
12 65 3670
5 50 2005
8 48 3215
8 17 1930
6 70 2010
9 20 3111
9 29 2882
5 15 1683
7 14 1817
13 60 4066", header = T)
model = lm(yield ~ water + nutr , data = data)
summary(model)
d <- data.frame(
temp = c(29.2, 28.1, 27.2, 26.4, 26.3, 26.2, 26.2,
26, 25.9, 25.7, 24.5, 24.4, 24, 23.9, 23.9, 23.7,
23.7,23.7, 23.6, 23.5, 34.2, 23.4, 23.2, 23.1, 23.1,
23, 22.9, 22.5, 22.5, 22.4, 21.7, 21.2, 20, 19.2, 19,
19, 18.8, 18, 18, 17.4),
yield = c(2.3, 3.1, 2.8, 2.4, 3.6, 2.3, 3.8, 3.1,
2.4, 2.1, 3.5, 3.8, 3.1, 2.9, 3.2, 3, 4.5, 3.5, 3.3,
3.7, 3, 4.4, 3.1, 2.6, 4.8, 3.2, 3.3, 3.1,3.4, 3.2,
4.2, 4.5, 4.7, 5, 6.2, 6, 6.1, 7.3, 6.6, 6.2))
model1 = lm(yield ~ temp + I(temp^2), data = d)
summary(model1)
library("ggplot2")
ggplot(d, aes(temp, yield)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = FALSE) # افزودن منحنی رگرسیونی
model2 = lm(yield ~ temp, data = d)
summary(model2)
# روش اول
a <- c(12, 14, 25, 31, 11, 16, 52, 19, 9, 25, 15, 21, 18, 12, 15, 40)
binom.test(11, 14)
# روش دوم
library(BSDA)
SIGN.test(a, y = NULL, md = 25, alternative = "two.sided", conf.level = 0.95)
# آزمون Wilcoxon بر روی یک نمونه
a <- c(2.7, 2.5, 2, 1.7, 2.3, 1.9, 2.4, 2.4, 2.2, 2.6)
wilcox.test(a, mu = 2, alternative = "two.sided", conf.int = F)
# امتیاز بوتههای گوجهفرنگی: تیمار A
a <-c(11,12,5,7,5,5,8,9,16,8)
# امتیاز بوتههای گوجهفرنگی: تیمار B
b <-c(9,16,9,6,14,16,8,10,14,18)
# ایجاد دیتافریم
my_data <- data.frame(group = rep(c("a", "b"), each = 10), weight = c(a, b))
wilcox.test(weight ~ group, data = my_data, paired = TRUE)
# آزمون رتبه ویلکاکسون بر روی دو نمونه
a <- c(60,62,68,50,78,66)
b <- c(65,91,75,82,83,78,90)
# آزمون U منویتنی بر روی دو نمونه مستقل
wilcox.test(a,b, conf.int = T)
# Kriskal-Walis
t20 <- c(22.2, 17.3, 21.2, 25.2, 16.4)
t25 <- c(24.1, 30.3, 27.4, 26.4, 34.8)
t30 <- c(25.9, 18.4, 23.2, 21.9, 22.6)
t35 <- c(23.9, 24.8, 28.2, 26.4, 21.7)
kruskal.test(list(t20,t25,t30,t35))
# همچنین میتوان بهصورت زیر عمل کرد:
x <- c(t20,t25,t30,t35)
g <- factor(rep(1:4, c(5, 5, 5, 5)))
kruskal.test(x, g)
# آزمون Friedman برای تجزیه طرح بلوکهای کامل تصادفی
t1 <- c(7.4, 6.5, 5.6)
t2 <- c(9.8, 6.8, 6.2)
t3 <- c(7.3, 6.1, 6.4)
t4 <- c(9.5, 8.0, 7.4)
y <- c(t1,t2,t3,t4)
t <- factor(rep(1:4, each = 3))
b <- factor(rep(1:3, 4))
c <- data.frame(y,t,b)
c
friedman.test(y~t|b)
a <- c(4, 1, 9,5, 2, 10, 7, 3, 6, 8)
b <- c(5, 2, 10, 6, 1, 9, 7, 3, 4, 8)
cor.test(a, b, method = 'spearman')
z <- matrix(c(3,7,2,1,4,-7,6,5,1),nrow = 3)
det(z)
solve(z)
a <- matrix(c (3,2,1,-1,1,2,1,0,-1),3)
c <- matrix(c(2,1,3),3)
x <- solve(a,c)
x
# Sample vector
vector <- c(1, 5, 3,4)
# Calculate the length of the vector
vector_length <- sqrt(sum(vector^2))
# dividing each element by the vector length
normalized_vector <- vector / vector_length
# Print the normalized vector
print(normalized_vector)
library(ggplot2)
df <- data.frame(x=c(2, 1), y=c(-1, 2))
ggplot(df , aes(x,y))+
geom_abline(aes(slope=0,intercept=0)) +
geom_vline(xintercept = 0) +
geom_point(aes(x=0,y=0)) +
geom_segment(aes(x=0,y=0,xend=2,yend=1), arrow=arrow(length = unit(0.5,"cm"),
angle=20),lineend = "butt") +
geom_segment(aes(x=0,y=0,xend=-1,yend=2), arrow=arrow(length = unit(0.5,"cm"),angle=20),
lineend = "butt",linejoin = "round") +
theme_minimal ()
set.seed(3)
a <- sample(1:10, size = 100, replace = TRUE ,
prob = 10:1)
hist(a, main=NULL)
b <- rbeta(1000,5,2)
c <- rbeta(1000,2,5)
d <- rbeta(1000,2,7)
hist(c, main=NULL)
hist(d, freq=FALSE , main=NULL)
lines(density(d), col='red', lwd=3)
abline(v = c(mean(d),median(d)),
col=c("green","red"),lty=c(2,2), lwd=c(3, 3))
hist(sqrt(d), main=NULL)
plot(c, d)
plot(sqrt(c), sqrt(d))
library(ggplot2)
library(GGally)
x <- data.frame (a = sample(1:10, size = 100,
replace = TRUE , prob = 10:1),
b = rbeta(100,5,2),
c = rbeta(100,2,5),
d = rbeta(100,2,7))
ggpairs(x)
library(ggplot2)
pois <- data.frame(Lambda.1=rpois(10000, 1),
Lambda.2=rpois(10000, 2),
Lambda.5=rpois(10000, 5), Lambda.10=rpois(10000, 10),
Lambda.20=rpois(10000, 20))
library(reshape2)
pois <- melt(data=pois, variable.name="Lambda",
value.name="x")
ggplot(pois , aes(x=x)) +
geom_density(aes(group=Lambda , color=Lambda ,
fill=Lambda),
adjust=4, alpha=1/2) +
theme(legend.position = c(0.87, 0.7))
pdf(file = "chisquared.pdf")
x <- (0 : 120) / 10
plot (x, dchisq(x, 4), type = "l")
lines(x, dchisq(x, 5))
lines(x, dchisq(x, 6))
text( c(3, 4.2, 5.2), c(.18, .153, .137),
labels = c("4 df", "5 df", "6 df"))
dev.off()
plot(iris[1:4], pch = 21, bg=c('red','blue','green ',
'tomato ')[ unclass(iris$Species )])
d <- data.frame(x = c(1,2,3,4,5), y = c(4,7,9,15,14), z = c(2.5,1,3,2,3 ))
cov(d)
cor(d)
library(mvtnorm)
par(bg = NA)
par(mfrow= c(1,3))
par(mar = c(4, 4, 1, 1))
# ایجاد دو مجموعه داده تصادفی بامیانگین صفر و ضریب همبستگی برابر با -0.8
d1 <- rmvnorm(500, mean = c(0,0), sigma = matrix(c(1, -.8, -.8, 1), 2,2))
# رسم نمودار نقطهای دو متغیر ایجاد شده: \#}
plot(d1, xlab="x1", ylab="x2", pch = 16, col = "red")
# ایجاد دو مجموعه داده تصادفی بامیانگین صفر و ضریب همبستگی برابر با 0.8
d2 <- rmvnorm(500, mean = c(0,0), sigma = matrix(c(1, .8, .8, 1), 2,2))
# رسم نمودار نقطهای دو متغیر ایجاد شده: \#}
plot(d2, xlab="x1", ylab="x2", pch = 16, col = "red")
# ایجاد دو مجموعه داده تصادفی بامیانگین صفر و ضریب همبستگی برابر با 0
d3 <- rmvnorm(500, mean = c(0,0), sigma = matrix(c(1, 0, 0, 1), 2,2))
# رسم نمودار نقطهای دو متغیر ایجاد شده: \#}
plot(d3, xlab="x1", ylab="x2", pch = 16, col = "red")
cov <- matrix(c(5, 2, 2, 2),2,2)
x <- c(2,6)
mahalanobis(x, c(4,8), cov)
d <- data.frame(f = factor(rep(c("a","b"), each=3)),
y1 = c(6,10,8,6,7,8),
y2 = c(9,6,3,5,3,1))
d
t(c(10, 6) - colMeans(d[,2:3])) %*% solve(var(d[,2:3])) %*% (c(10, 6) - colMeans(d[,2:3]))
d <- data.frame(f = factor(rep(c("a","b"), each=3)),
y1 = c(6,10,8,6,7,8),
y2 = c(9,6,3,5,3,1))
# Calculate Mahalanobis distance
d$mahalanobis <- mahalanobis(d[,2:3], colMeans(d[,2:3]), cov(d[,2:3]))
# Calculate p-values using the Mahalanobis distances
d$pvalue <- pchisq(d$mahalanobis, df = 1, lower.tail=FALSE)
# Display the data frame
d
library(psych)
mardia(d[,2:3], plot=F)
describe(d[,2:3])
library(mvnormtest)
mshapiro.test(t(d[,2:3]))
outlier(d[,2:3], plot = TRUE, bad = 1,na.rm = TRUE)
a <- data.frame(y1 = c(0.803, 1.037, -3.255, 2.82, 0.667, -2.072),
y2 = c(-1.29, -0.172, -0.446, 2.641, 0.291, -1.024))
# محاسبه ماتریس واریانس-کواریانس نمونه
s <- var(a)
round(s,2)
b <- eigen(s)
b
b <- data.frame(a=c(1,2,3,4,5,4,3),b=c(4,5,4,3,2,3,2),
c=c(8,7,6,7,9,1,2))
A <- cov(b)
x <- eigen(A)
round(crossprod(x$vectors),3)
# آزمون T2 هتلینگ با استفاده از تابع manova
df <- data.frame(f = c("a","a","a","b","b","b"),
y1 = c(6,10,8,6,7,8),
y2 = c(9,6,3,5,3,1))
m <- manova(cbind(y1,y2) ~ f, df)
summary(m,test = c("Pillai", "Wilks", "Hotelling-Lawley", "Roy"))
summary.aov(m)
# آزمون T2 هتلینگ برای مقایسه نمونه با یک بردار میانگین خاص
library(ICSNP)
muH0 <- c(0, 0)
HotellingsT2(cbind(df$y1, df$y2) ~ df$f, mu = muH0)
muH0 <- c(-1, 2)
HotellingsT2(cbind(df$y1, df$y2) ~ df$f, mu = muH0)
library(ggplot2)
library(reshape2)
a <- data.frame(g=c(1,1,1,2,2,3,3,3),
y1=c(9,6,9,0,2,3,1,2),
y2=c(3,2,7,4,0,8,9,7))
a$g <- as.factor(a$g)
am <- melt(a, id = 'g')
am$variable <- factor(am$variable)
ggplot(am, aes(x=g, y=value, fill=variable)) +
geom_boxplot(position = "dodge2")
m <- manova(cbind(y1,y2) ~g, a)
summary(m,test = "Wilks")
summary(m,test = "Pillai")
summary(m,test = "Hotelling-Lawley")
summary(m,test = "Roy")
summary.aov(m)
# Pairwise comparison of the treatments
m1 <- manova(cbind(y1,y2) ~g, a, subset = g %in% c(1, 2))
summary(m1,test = c("Pillai", "Wilks", "Hotelling-Lawley", "Roy"))
m2 <- manova(cbind(y1,y2) ~g, a, subset = g %in% c(1, 3))
summary(m2,test = c("Pillai", "Wilks", "Hotelling-Lawley", "Roy"))
# Calculating SSCP matrixes
library(HoRM)
m <- manova(cbind(y1,y2) ~g, a)
SSCP.fn(fits = m)
d <- read.table(text = "
group var1 var2 var3
a 10.59 10.48 4.73
a 11.05 9.02 5.33
a 9.94 10.77 3.46
a 7.99 15.56 5.15
a 9.08 14.76 5.46
b 9.38 14.41 1.46
b 9.52 12.51 3.00
b 10.81 11.12 5.19
b 10.68 9.76 2.25
b 10.95 11.61 3.97", header = TRUE)
ds <- scale(d[-1], center = T, scale = T)
cov <- round(cov(ds),2)
cov
eigen(cov)
pca <- prcomp(d[-1], center = TRUE , scale. = TRUE)
library(ggplot2)
library(ggbiplot )
library(ggrepel)
pca$label <- rownames(d)
ggbiplot(pca, obs.scale = 1, var.scale = 1, alpha = 0,
groups = d$group) +
theme(legend.direction = 'vertical', legend.position = 'right') +
geom_point(size = 3, shape = 21, aes(fill = groups, color = groups)) +
geom_text_repel(label=pca$label , colour = "red", size = 4, box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines")) +
xlim(-3,3) + ylim(-2.1,2) +
scale_color_manual(values=c("black", "black")) +
scale_fill_manual(values = c("red", "green")) +
theme_bw() +
theme(legend.direction = 'vertical', legend.position = c(0.9, 0.85))
# Using factoextra package
require(factoextra )
library(dplyr )
library(Rcpp)
df <- read.table(text = "
group var1 var2 var3
a 10.59 10.48 4.73
a 11.05 9.02 5.33
a 9.94 10.77 3.46
a 7.99 15.56 5.15
a 9.08 14.76 5.46
b 9.38 14.41 1.46
b 9.52 12.51 3.00
b 10.81 11.12 5.19
b 10.68 9.76 2.25
b 10.95 11.61 3.97", header = TRUE)
res.pca <- prcomp(df[,2:4], scale = TRUE)
fviz_eig(res.pca)
fviz_pca_ind(res.pca, col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE) # Avoid text overlapping
fviz_pca_var(res.pca ,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE) # Avoid text overlapping
fviz_pca_biplot(res.pca , repel = TRUE ,
col.var = "#2E9FDF", # Variables color
col.ind = "#696969") + # Individuals color
theme_bw()
# Heatmap
library(pheatmap)
library(ggplot2)
library(ggplotify)
library(patchwork)
d <- read.table(text = "
group var1 var2 var3
a 10.59 10.48 4.73
a 11.05 9.02 5.33
a 9.94 10.77 3.46
a 7.99 15.56 5.15
a 9.08 14.76 5.46
b 9.38 14.41 1.46
b 9.52 12.51 3.00
b 10.81 11.12 5.19
b 10.68 9.76 2.25
b 10.95 11.61 3.97", header = TRUE)
df <- d[,-1]
annotation <- data.frame(group = d[,1])
p1 <- pheatmap(df, annotation_rows = annotation,
cluster_rows = T, cluster_cols = T,
border_color='white ')
p2 <- pheatmap(cor(df), cluster_rows=T,
cluster_cols = T, border_color = 'white ')
as.ggplot(p1) + as.ggplot(p2)
b <- data.frame(a=(1:10),
y1=c(2.5,0.5,2.2,1.9,3.1,2.3,2,1,1.5,1.1),
y2=c(2.4,0.7,2.9,2.2,3,2.7,1.6,1.1,1.6,0.9), row.names = 1)
dist <- dist(b, method = "euclidean")
# the ape package can be used for PCoA
library(ape)
PCOA <- pcoa(dist)
# plot the eigenvalues
barplot(PCOA$values$Relative_eig[1:10])
biplot.pcoa(PCOA)
biplot.pcoa(PCOA , b)
m <- matrix(ncol=5,nrow=5)
m[lower.tri(m)] <- c(9,3,6,11,7,5,10,9,2,8)
diag(m) <- 0
d <- as.dist(m, diag = TRUE)
cl <- hclust(d, method = "average")
plot(as.dendrogram(cl))
a <- read.table(text = "
10 5
20 20
30 10
30 15
5 10")
d <- dist(a, method = "euclidean")
d
library(factoextra)
dend1 <- hcut(d, k=2, hc_method = "centroid")
dend2 <- hcut(d, k=2, hc_method = "ward.D2")
par(mfrow = c(1,2))
plot(as.dendrogram(dend1))
plot(as.dendrogram(dend2))
d <- read.table(text = "
place TN NN PL FLW FLL SPS SL
HawramaP1 10.4 3 28.5 10.1 20.50 11.6 7.2
IG12638P2 16.2 3 29.1 15.3 29.90 23.9 9.9
IG12768P2 15.4 4 25.8 11.9 25.20 25.3 9.6
IG12769P1 24.9 4 16.0 14.6 26.30 22.6 9.4
IG88725P1 17.9 4 26.0 11.3 21.70 19.4 7.3
IG88732P1 16.7 4 15.3 17.1 27.50 26.0 7.4
IG88751P1 16.7 4 19.3 12.9 26.20 21.4 8.2
IG88753P2 9.8 4 25.3 13.0 25.50 23.3 8.7
IG88882P2 20.1 3 40.2 16.7 22.20 21.0 9.6
KalakanP3 8.5 3 23.6 6.9 20.00 19.3 7.1
MamokhP3 20.0 4 25.6 8.8 20.30 18.3 7.2
SeydsalP3 12.4 3 36.0 9.0 16.50 10.3 6.1
TarkhanP4 16.2 4 24.9 7.1 18.25 17.2 6.6
TazabadP4 13.4 3 26.8 13.7 22.35 20.0 11.9
TazabadP4 14.7 4 24.8 7.0 17.80 20.5 7.3
TirgaraP4 18.4 4 19.2 6.3 19.70 19.4 6.9
ArandanP4 12.2 4 23.2 5.6 17.30 17.1 6.4
BainjubP4 12.7 3 23.2 6.7 23.30 19.5 7.5
ChatanP4 12.4 4 23.1 11.4 21.70 17.8 7.7
HanaglaP4 9.8 4 19.4 7.1 23.40 16.7 6.4
Ac49656P4 14.6 3 40.9 12.2 22.10 29.6 13.3
aI49657P4 20.7 5 15.0 11.7 24.90 24.9 6.6
aI49659P4 18.1 4 18.7 15.8 25.30 24.4 8.4
aI49660P4 14.0 5 23.8 18.6 28.10 27.8 9.1
aI49661P4 19.4 3 21.9 9.8 18.60 15.2 6.8
aI49662P4 24.8 4 31.6 13.4 21.20 29.5 13.6
aI49663P4 18.3 3 37.4 13.0 20.90 27.7 11.9
aI49664P4 9.9 4 24.2 19.6 24.70 28.9 11.3
aI49665P4 12.4 3 27.6 12.9 22.40 25.1 11.1
aI49666P5 8.7 3 32.1 13.0 20.00 26.8 11.7
IG19200P5 8.7 3 18.9 9.4 21.90 15.0 6.7", header = T)
d
library(factoextra)
dend <- hcut(d[,-1], k = 4, hc_metric = "euclidean",
hc_method = "complete")
library(dendextend)
dend <- set(as.dendrogram(dend), "labels_cex", 0.8)
cols_branches <- c("red","green","blue","brown","black")
dend <- color_branches(dend , k=5, col = cols_branches)
plot(dend)
ann <- data.frame(place =(d[,1]))
rownames(ann) <- rownames(d)
labels_colors(dend) <- as.numeric(
as.factor(ann[,1])[ order.dendrogram(dend )])
labels(dend) <- as.character(
rownames(ann ))[ order.dendrogram(dend)]
dend %>% color_branches(dend , k=5,
groupLabels = T) %>% plot(horiz = F)
library(circlize)
circlize_dendrogram(dend , dend_track_height = 0.2,
groupLabels = T)
d0 <- read.table(text = "
place TN NN PL FLW FLL SPS SL
HawramaP1 10.4 3 28.5 10.1 20.50 11.6 7.2
IG12638P2 16.2 3 29.1 15.3 29.90 23.9 9.9
IG12768P2 15.4 4 25.8 11.9 25.20 25.3 9.6
IG12769P1 24.9 4 16.0 14.6 26.30 22.6 9.4
IG88725P1 17.9 4 26.0 11.3 21.70 19.4 7.3
IG88732P1 16.7 4 15.3 17.1 27.50 26.0 7.4
IG88751P1 16.7 4 19.3 12.9 26.20 21.4 8.2
IG88753P2 9.8 4 25.3 13.0 25.50 23.3 8.7
IG88882P2 20.1 3 40.2 16.7 22.20 21.0 9.6
KalakanP3 8.5 3 23.6 6.9 20.00 19.3 7.1
MamokhP3 20.0 4 25.6 8.8 20.30 18.3 7.2
SeydsalP3 12.4 3 36.0 9.0 16.50 10.3 6.1
TarkhanP4 16.2 4 24.9 7.1 18.25 17.2 6.6
TazabadP4 13.4 3 26.8 13.7 22.35 20.0 11.9
Tazaba2P4 14.7 4 24.8 7.0 17.80 20.5 7.3
TirgaraP4 18.4 4 19.2 6.3 19.70 19.4 6.9
ArandanP4 12.2 4 23.2 5.6 17.30 17.1 6.4
BainjubP4 12.7 3 23.2 6.7 23.30 19.5 7.5
ChatanP4 12.4 4 23.1 11.4 21.70 17.8 7.7
HanaglaP4 9.8 4 19.4 7.1 23.40 16.7 6.4
Ac49656P4 14.6 3 40.9 12.2 22.10 29.6 13.3
aI49657P4 20.7 5 15.0 11.7 24.90 24.9 6.6
aI49659P4 18.1 4 18.7 15.8 25.30 24.4 8.4
aI49660P4 14.0 5 23.8 18.6 28.10 27.8 9.1
aI49661P4 19.4 3 21.9 9.8 18.60 15.2 6.8
aI49662P4 24.8 4 31.6 13.4 21.20 29.5 13.6
aI49663P4 18.3 3 37.4 13.0 20.90 27.7 11.9
aI49664P4 9.9 4 24.2 19.6 24.70 28.9 11.3
aI49665P4 12.4 3 27.6 12.9 22.40 25.1 11.1
aI49666P5 8.7 3 32.1 13.0 20.00 26.8 11.7
IG19200P5 8.7 3 18.9 9.4 21.90 15.0 6.7", header = T)
d <- d0[-1]
k1 <- kmeans(d, centers = 3)
k2 <- kmeans(d, centers = 4)
plot(d[ ,c(3, 4)], xlab = "Pedancle length",
ylab = "Flag leaf width", pch = 16, col =k2$cluster, cex = 2)
text(d[ ,3], d[ ,4], labels = d0$place, pos = 3)
library(ggplot2)
ggplot(d0, aes(PL, FLW, color = factor(k1$cluster), label = place)) +
geom_point () +
geom_text()
library(factoextra)
rownames(d) <- d0$place
p1 <- fviz_cluster(k1, data = d,repel = T,ellipse = T) + ggtitle("k = 3")
p2 <- fviz_cluster(k2, data = d,repel = T,ellipse = T) + ggtitle("k = 4")
library(gridExtra)
grid.arrange(p1, p2, nrow = 1)
fviz_cluster(k1, data = d, repel = T, ellipse = T)
d <- read.table(text = "
7.151 8.322 12.006 2.195 1.162
7.453 10.166 12.200 2.272 1.734
8.618 10.592 13.577 4.592 2.552
5.976 9.425 10.802 2.213 0.906
6.841 10.324 12.366 2.915 1.995
5.961 11.171 11.049 3.813 3.651")
factor <- factanal(d, factors = 2, scores
= "regression",rotation = "varimax")
scores <- factor$scores
scores <- round(data.frame(scores), digits = 3)
scores
loadings <- subset(factor$loadings[1:5 ,])
loadings <- round(loadings , digits = 3)
loadings <- data.frame(loadings)
loadings
library(ggrepel)
library(ggplot2)
p1 <- ggplot(loadings , aes(Factor1, Factor2)) +
geom_point(size =3) +
geom_text_repel(label=rownames(loadings), colour
= "gray", size = 4, box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines")) +
theme_bw()
p2 <- ggplot(scores , aes(Factor1, Factor2)) +
geom_point(size = 3, color="brown") +
geom_text_repel(label = rownames(scores), color
= "gray2", size = 4, box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines")) + theme_bw()
library(gridExtra)
grid.arrange(p1, p2, nrow = 1)
train <- read.table(text = "
g x1 x2
1 -2 5
1 0 3
1 -1 1
2 0 6
2 2 4
2 1 2
3 1 -2
3 0 0
3 -1 -4", header = T)
library(MASS)
model <- lda(train$g ~., train , prior = c(1,1,1)/3)
model
predict <- predict(model, train)
predict
# Graph
attributes(model)
ldahist(data = predict$x[,1], g = train$g)
plot(model)
# Dotplot
test <- data.frame(g = 2, x1 = 1, x2 = 3)
predict2 <- predict(model , test)
names(predict2)
predict2$class
table(predict2$class , test$g)
cat("LDA accuracy is:",
sum(predict2$class == test$g)/ length(test$g))
# :رسم نمودار نقطەای اعضا براساس توابع تشخیصاول و دوم
library(ggplot2)
library(ggrepel)
lda.data <- cbind(train , predict(model )$x)
lda.data$g <- as.factor(lda.data$g)
ggplot(lda.data , aes(LD1, LD2)) +
geom_point(aes(color = g), size = 3) +
geom_text_repel(label=rownames(lda.data),
colour = "black", size = 4, box.padding
= unit(0.35, "lines"),
point.padding = unit(0.3, "lines")) + theme_bw()
a <- read.table(text = "
rep cr PWWl DWWl GPl GSl GHl PWWf DWWf GPf GSf GHf
1 So 27.71 2.79 87 0.02 65.05 8.92 1.32 75 0.006 144
2 So 26.88 2.50 77 0.02 43.92 4.84 1.28 50 0.005 43
3 So 27.50 2.66 56 0.03 51.20 8.14 1.38 79 0.028 151
4 So 23.15 2.71 91 0.02 54.79 10.53 1.57 63 0.009 130
1 Co 12.82 1.32 100 0.03 40.04 3.57 0.59 92 0.006 162
2 Co 12.17 1.22 85 0.04 39.46 4.16 0.83 80 0.016 146
3 Co 14.49 1.28 90 0.06 44.63 4.26 0.69 55 0.015 136
4 Co 13.18 1.18 95 0.03 51.55 4.59 0.72 72 0.020 151
1 Sa 6.22 0.43 91 0.02 43.85 7.89 0.99 99 0.007 133
2 Sa 5.50 0.40 77 0.02 47.49 5.55 0.80 66 0.041 153
3 Sa 5.55 0.40 47 0.03 43.80 4.68 0.53 55 0.014 132
4 Sa 5.86 0.49 86 0.02 51.44 8.25 0.91 71 0.049 127
1 Ba 4.37 0.47 90 0.02 46.67 9.11 1.47 78 0.007 153
2 Ba 4.69 0.47 89 0.02 43.09 6.46 1.06 79 0.023 147
3 Ba 4.71 0.46 54 0.03 36.00 4.57 0.75 82 0.022 131
4 Ba 4.10 0.41 95 0.02 40.32 2.61 0.49 74 0.034 143
1 Ra 0.80 0.03 84 0.03 52.00 2.63 0.38 82 0.006 154
2 Ra 0.93 0.04 85 0.02 53.17 1.81 0.28 51 0.005 33
3 Ra 0.79 0.04 50 0.02 58.53 2.46 0.33 88 0.008 135
4 Ra 0.74 0.04 86 0.02 45.24 1.77 0.24 77 0.009 144
1 Su 8.73 0.74 100 0.03 31.53 8.32 0.86 67 0.006 86
2 Su 9.13 0.82 89 0.03 32.95 12.43 1.29 54 0.005 26
3 Su 8.21 0.68 94 0.05 53.71 7.19 0.77 61 0.052 103
4 Su 8.41 0.77 96 0.03 39.10 7.81 0.75 72 0.023 118
1 Wh 4.03 0.43 100 0.03 21.82 5.05 0.93 75 0.005 153
2 Wh 3.81 0.41 97 0.03 65.13 3.27 0.71 42 0.005 58
3 Wh 3.47 0.33 97 0.03 41.83 5.19 0.90 61 0.044 156
4 Wh 4.39 0.48 97 0.02 24.50 3.62 0.71 79 0.014 173", header = T)
cancor(a[,3:7], a[,8:12])
d <- read.table(text = "
Gen Fl Chl Stem Height PodN PodL Yield HI Thaus SeedP
1 204 44 8.6 92.3 32.6 5.3 21.6 35.6 3.3 14.7
2 202 43 8.4 91.1 28.3 5.9 14.3 24.8 3.8 11.4
3 189 34 10.6 81.3 20.4 5.1 15.6 32.3 2.9 8.7
4 205 42 8.5 93.7 27.8 5.6 21.9 37.6 4.9 13.8
5 185 36 13.4 86.4 19.3 5.5 36.6 38.9 3.9 11.5
6 190 31 11.3 75.1 17.7 5.4 14.5 32.9 3.1 8.4
7 207 45 6.7 87.5 22.9 5.2 11.7 23.9 4.4 9.0
8 206 45 7.7 90.2 26.0 6.0 13.3 21.2 4.3 8.8
9 206 44 6.7 89.7 22.0 5.6 14.8 31.9 3.1 14.2
10 185 34 10.6 89.3 20.4 5.1 15.6 32.3 2.9 8.7
11 206 42 8.5 92.7 27.8 5.6 21.9 37.6 4.9 13.8", header = T)
d
factor <- factanal(d[-1], factors = 2, scores =
"regression", rotation = "varimax")
scores <- factor$scores
scores <- round(data.frame(scores), digits = 3); scores
loadings <- subset(factor$loadings[1:10 ,])
loadings <- round(loadings , digits = 3)
loadings <- data.frame(loadings)
loadings
library(ggrepel)
library(ggplot2)
p1 <- ggplot(loadings , aes(Factor1, Factor2)) +
geom_point(size =3, color="red") +
geom_text_repel(label=rownames(loadings),
colour = "blue", size = 4,
box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines")) +
theme_bw()
p2 <- ggplot(scores , aes(Factor1, Factor2)) +
geom_point(size = 3, color="red") +
geom_text_repel(label = rownames(scores),
color = "blue", size = 4,
box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines")) + theme_bw()
library(gridExtra)
grid.arrange(p1, p2, nrow = 1)
library(MASS)
train <- read.table(text = "
BCHR A B C D E F G H
a 12 23 4 53 2 12 10 5
a 8 23 10 90 3 22 27 12
a 16 24 8 95 1 27 17 8
a 11 22 3 36 1 5 52 3
a 17 23 14 86 2 39 24 8
a 9 26 14 45 1 22 25 9
a 15 22 7 78 2 17 11 5
a 9 20 9 97 3 12 13 6
b 18 19 12 80 3 20 11 6
b 10 23 10 121 2 23 22 10
b 16 21 7 143 2 26 16 8
b 17 22 9 75 2 13 10 6
b 20 21 17 199 1 50 26 15
b 16 22 14 99 2 42 25 12
b 23 21 7 164 1 24 11 7
b 30 20 11 140 2 25 14 9", header = T)
test <- read.table(text = "
BCHR A B C D E F G H
a 7 18 8 90 2 30 50 6", header = T)
#model construction and prediction
model.1 <- lda(train$BCHR ~., train , prior = c(1,1)/2)
#model.2 <- qda(train$BCHR ~., train , prior = c(1,1)/2)
model.1
predict <- predict(model.1, test)
names(predict)
predict$class
table(predict$class , test$BCHR)
#accuracy
cat("LDA accuracy is:",
sum(predict$class == test$BCHR)/ length(test$BCHR))
attributes(model.1)
p <- predict(model.1, train)
ldahist(data = p$x[,1], g = train$BCHR)
65 + 32
3 * 1.7 - 2
4 * 3 + 6/0.2
5^2
2 > 1
2 >= 1
4 != 4
14 %/% 3
14 %% 3
3 == 3 & 4 != 4
3 == 3 && 4 != 4
is.na(6)
!is.na(6)
# برای مشاهدهی راهنما در مورد یک کاراکتر خاص باید علامت سوال و به دنبال آن کرکتر را درون علامت نقل قول تایپ و اجرا کرد
?'%%'
?'%/%'
?"!"
a <- 3
b <- "plant"
1+2
a**3
a^3
x <- c(1,3,5)
log(2)
log10(2)
sqrt(16)
abs(2.25)
round(2.227, digits = 2)
?round
v1 <- c(1, 2, 3)
v2 <- c("A", "B", "C")
(c(v1, v2))
data.frame(v1, v2)
1:10
letters[1:5]
LETTERS[1:5]
which(c(6,2,1,3,4,2,1) > 3)
x <- seq(5, 15, 3)
x
rep(1:4, 3)
rep(1:4, each=3)
rep(1:4, each=2, times=2)
s <- c('male','male','female','male','female')
s == 'male'
x <- matrix(c(1,3,2,1,-1,-2,1,1,1),3,3)
x
solve(x)
round(solve(x) %*%x, 3)
a1 <- sample(1:20, size = 9, replace = FALSE)
a1
a2 <- matrix(a1, nrow = 3)
a2
row.names(a2) <- seq(1:3)
colnames(a2) <- paste0("col", seq(1:3))
head(a2)
col4 <- c(rep("C1", 2),rep("C2", 1)) # ایجاد برداری با سه عنصر
b <- data.frame(a2, col4) # افزودن ستون چهارم به
head(b)
a <- matrix (1:10 , 2)
b <- as.vector(a)
b
a <- matrix(1:16, 4)
b <- matrix(1:16, 4)
c <- a + b
c <- as.data.frame(c)
c
# برای محاسبهی جذر ماتریس میتوان از بسته اگزم بهصورت زیر استفاده کرد:
library(expm)
sqrtm(b)
a <- list(x1 = 'red', x2 = 'green', x3 = c(21,32,11))
a
a[1]
a[3]
a$x3
# آرایه در اصل یک بردار چندبعدی و متشکل از دادههای همنوع است. دسترسی به عناصر مختلف یک آرایه از طریق کروشه ساده [] امکانپذیر است که در آن اولین و دومین عنصر شماره ردیف و ستون و سایر عناصر، ابعاد خارجیتر را نشان میدهند. تفاوت اصلی آرایه با ماترکس این است که ماتریس به دو بعد محدود است ولی آرایه میتواند چندین بعد داشته باشد.
theArray <- array(1:12, dim=c(2, 3, 2))
theArray
theArray[1, , ]
theArray[1, , 1]
theArray[, , 1]
.Library # مشاهده مسیر معمول نصب بستهها
.libPaths() # مشاهده مسیرهای حاضر و معمول نصب بستهها
# دستورات زیر بستههای وابسته به یک بسته مورد نظر (برای مثال دی سک تو) را نشان میدهد:
if (!require("pacman")) install.packages("pacman")
pacman::p_depends(DESeq2, local = TRUE)
# برای ارجاع به آر یا یک بسته خاص (برای مثال دی سک تو) دستورات زیر را اجرا کنید:
citation()
citation("DESeq2")
# library() # مشاهده بستههای نصب شده
library("MASS") # فراخوانی یک بسته
# Installing from Github
library("devtools")
#install_github("yanlinlin82/ggvenn")
# Help of a package
# library(help = e1071)
# Data sets of a package
# data(package = "datasets")
class(iris) # نمایش نوع دادهها
dim(iris) # نمایش تعداد ردیف و ستون دادهها
head(iris) # نمایش شش ردیف اول دادهها
tail(iris) # نمایش شش ردیف آخر دادهها
str(iris) # نمایش ساختار داده
summary(iris) # نمایش چندکها برای ستونهای مختلف
boxplot(iris[,1]) # نمایش نمودار جعبهای ستون
hist(iris[,1]) # رسم هیستوگرام ستون اول
# saving image directly in the computer
par(mfrow = c(2,2))
boxplot(iris[,1], col = "cyan")
boxplot(iris[,2], col = "green")
boxplot(iris[,3], col = "magenta")
boxplot(iris[,4], col = "gold")
dev.off()
# استخراج زیرمجموعهای از جدول
x <- data.frame(col1 =c(1,3,2), col2 =c(1,-2,1), col3 =c(1,1,-1))
row.names(x) <- paste0("row", 21:23)
# نمایش دادن ردیف row23
x[3,]
x[c("row21", "row23"),]
x[,3]
# نمایش ستونهای خاصی به همان شکل ستونی
x[,3, drop = F]
# مشخص کردن ردیفهایی که مقدار ستون اول آنها بزرگتر یا مساوی 2 است.
x[,1] >= 2
# نمایش ردیفهایی که مقدار ستون یک آنها بزرگتر یا مساوی 2 است
x[x[,1] >= 2,]
# نمایش شماره ردیفهایی که مقدار ستون یک آنها بزرگتر یا مساوی 2 است
which(x[,1] >= 2)
# نمایش ردیفهایی که جمع آنها بزرگتر یا مساوی 3 است.
x[rowSums(x) >= 3,]
# نمایش شماره ردیفی که 3 را دارد.
grep("3", row.names(x))
# در مورد ساختار داده، میتوان از علامت دلار هم برای مشخص کردن ستونها استفاده کرد.
x[x$col1 >= 2,]
# Filtering using the subset function
subset(x, col1 >=2 & col3 < 0)
# Filtering based on a part of the word
library(data.table)
x[rownames(x) %like% "3" | rownames(x) %like% "2",]
library(data.table)
q <- c(2,3)
x[x$col1 %in% q,]
# توابع rowSums و colSums
x <- matrix(c(0,0,0,0,0,0,2,3,4,5,6,9,1,2,3,0),4, byrow = T)
x
# استخراج ردیفهای دارای جمع بزرگتر از صفر
x[rowSums(x) > 0,]
rowMeans(x)
# استخراج ردیفهایی که حداقل دو عضو بزرگتر از صفر در آنها وجو دارد
x[rowSums(x > 0) > 2,]
# setwd("C:/Users/user/Desktop/")
# data <- read.csv("C:/Users/user/Desktop/Data.csv", header = TRUE)
# data <- read.table("Data.csv", sep=";", header = TRUE)
library(openxlsx)
# a <- read.xlsx("E:/data.xlsx",2,colIndex = c(1:4), header=T)
# write.table(b, "outputname.txt", quote=F, se = "\t", na="NA")
a <- matrix (c(5,4,3,6,7,1,2,3,2,8,7,3,9,4,8,1), 4)
a
prod(a[,1]) # محاسبه حاصلضرب همه مقادیر
rank(a) # محاسبهی رتبه مقادیر یک بردار
colnames(a) # مشاهده یا تعریف نام ستونهای یک ماتریس
sum(a) # جمع مقادیر
sum(a[,3])
sd(a) # محاسبه انحراف معیار
sd(a[,4])
sd(a, na.rm = T) # حذف مقادیر گمشده قبل از محاسبه
x <- c(2,5,NA,3,7)
is.na(x) # آیا ان ای در بردار ایکس وجود دارد؟
any(is.na(x))
which(is.na(x)) # کدام عدد ان ای است؟ عدد سوم!
which(x == 7) # کدام عدد در بردار ایکس برابر با 7 است؟ عدد پنجم!
a <- matrix (c(1,1,2,2,7,1,2,3,2,8,7,3,9,4,8,1), 4)
a
apply(a, 2, sum, na.rm = TRUE) # عدد 2 ستونها و عدد 1 ردیفها را در تابع اپلای مشخص میکند!
apply(a[,3:4], 2, sum, na.rm = TRUE)
apply(a[,3:4], 2, mean, na.rm = TRUE)
m <- matrix(c(1:10),nrow = 5, ncol = 6)
sq = function(x) x^2
apply(m, c(1,2), sq)
m <- matrix(c(1:10),nrow = 5, ncol = 6)
n <- lapply(m, mean)
n2 <- unlist(n)
n2
d <- as.data.frame(m)
nd <- lapply(d, mean)
nd2 <- unlist(nd)
nd2
sapply(iris[,1:3], mean) # تابع سپلای برای ساختار داده یا لیست به کار میرود.
b <- as.data.frame(a) # در کل از هر تابعی میتوان در درون توابع اپلای استفاده کرد.
sapply(b[3:4], function(x) x=x*x)
a <- matrix(1:21, 7)
a <- as.data.frame(a)
inf <- function(x) list(mean = mean(x),
se = sd(x)/sqrt(length(x)), length = length(x))
sapply(a, inf)
t(sapply(a, inf))
par(bg = NA)
par(mfrow=c(2,2))
par(mar = c(2, 4.5, 1.8, 1))
hist(a[,1], breaks = 12, main = NULL, xlab = NULL,
col = 'pink')
hist(a[,2], breaks = 12, main = NULL, xlab = NULL,
col = 'pink')
hist(b[,1], breaks = 12, main = NULL, xlab = NULL,
col = 'lightgreen')
hist(b[,2], breaks = 12, main = NULL, xlab = NULL,
col = 'lightgreen')
tapply(iris$Sepal.Length, iris$Species, mean) # محاسبه میانگین طول سپال برای گونههای مختلف
# محاسبهی میانگین، انحراف معیار، خطای معیار ستونهای جدول با استفاده از تابع اپلای:
# df <- read.table("1.txt",header=T)
# df
# mean <- apply(df[4:7], 2, FUN = mean, na.rm = T)
# mean
# cbind(mean)
# sd <- sapply(df[4:7], FUN = sd, na.rm = T)
# n <- sapply(df[4:7], FUN = length)
# cbind (mean , sd, n)
library(dplyr)
b <- summarise(group_by(iris, Species),
mPW = mean(Petal.Width), mSW = mean(Sepal.Width),
varPW = var(Petal.Width), varSW = var(Sepal.Width))
b
# مصورسازی در R
# ggplot(data = <DATA>, mapping = aes(<MAPPINGS>))
# + <GEOM_FUNCTION>()
# a <- read.csv(file = url("http://r-bio.github.io/data/surveys_complete.csv"))
# group_by and summarise
library(dplyr)
b <- summarise(group_by(iris, Species),
m = mean(Petal.Width),
var = var(Petal.Width))
b
# b$species <- as.factor(b$Species)
# library(ggplot2)
# ggplot(b) +
# geom_col(aes(Species, m, fill = species)) +
# geom_errorbar(aes(Species, ymin= m-(var/10), ymax=m+(var/10)), width = 0.2)
# برنامهنویسی
y <- c()
for (i in 1:5){
y[i] = i^2
}
y
for (i in 1:5) {
i = i^2; print(i)
}
# نکته: میتوان یک ساختار کنترلی مثلاً شرط یا شرایطی را قبل از عملکرد مورد نظر اضافه نمود:
for (i in 1:5) if(i > 3){
i = i**2; print(i)
}
# مثال:
x <- c(2,4,6,8,10)
y <- c()
for (i in 1:5) {
y[i] = x[i]^2
}
y
# مثال:
a <- 1:10
b <- c()
for (i in 1:length(a)){
ifelse(i < 6, b[i] <- a[i]^2, b[i] <- a[i]^4)
}
b
# مثال: رساندن اعداد یک جدول به توان سه:
a <- matrix(1:9, 3, 3)
b <- matrix(0, 3, 3)
for(i in 1:ncol(a)){
{
for (j in 1:nrow(a)) {
b[i,j] <- a[i,j]^3
}
}
}
b
# مثال: ایجاد جدول ضرب:
a <- 1:9
b <- matrix(0, 9, 9)
for(i in 1:9){
{
for (j in 1:9) {
b[i,j] <- a[i]*a[j]
}
}
}
b
a <- c("ali", "puya", "saeed", "karim", "jamal")
b <- c(18, 15, 14, 19, 17)
for (i in 1:5) {cat(a[i],"got", b[i], "in Statistics", "\n")}
x = 2
while (x < 100) {x = x^2; print(x)}
# if and else
if (1 == 1) {
print(1)
} else {
print("NotCorrect")
}
# using ifelse
a <- 1:10
ifelse(a%%2 == 0, 'even', 'odd')
# repeat
x <- 10
repeat{x <- x/2; print(x); if(x < 1) break()}
# Writing a function
fact <- function(n){
a = 1
for(i in 1:n)
a = a*i
a
}
fact(5)
Number <- function(x){
if(x == 1)
"One"
if(x == 2)
"Two"
if(x == 3)
"Three"
else("NotFound")
}
# ifelse
med <- function(x) {
sort(x)
ifelse(length(x)%%2 == 0, (x[(length(x)/2)] + x[(length(x)/2)+1])/2,
x[((length(x)-1)/2)+1])
}
med(c(5,7,9,12,15,3))
# converting DNA to RNA
dna2rna <- function(x){
if(!is.character(x))
stop("needs character sequence")
m <- toupper(x)
chartr("T", "U", m)
}
a <- "ttagcggcgaatagcatcgcgaatccctagc"
dna2rna(a)
# مثال: تابعی که تابع خاص دیگری را بر ستونهای عددی در یک ساختار داده اعمال و از ستونهای حرفی صرفنظر میکند:
meanDf <- function(x){
b <- c()
for(i in 1:ncol(x))
if(class(x[,i]) != 'character')
b[i] <- i
b <- na.omit(b)
sapply(x[,b], mean)
}
df <- data.frame(a = 1:5, b = letters[1:5], c = 5)
meanDf(df)
# ایجاد محدودیت برای آرگومانهای یک تابع.
agec <- function(age, gender){
if(class(age) != "numeric")
stop("inter your age in the first argument!")
if(gender != "male" & gender != "female")
stop("gender should be male or female!")
if (age > 12)
if (age < 20)
if (gender == "male")
print("You are a teenage boy.")
else
print("You are a teenage girl.")
else
print("You are already an adult.")
else
print("You are still too young.")
}
d <- read.table(text = "
group var1 var2 var3
early.v1 10.59 10.48 4.73
early.v2 11.05 9.02 5.33
early.v3 9.94 10.77 3.46
early.v4 7.99 15.56 5.15
early.v5 9.08 14.76 5.46
late.v1 9.38 14.41 1.46
late.v2 9.52 12.51 3.00
late.v3 10.81 11.12 5.19
late.v4 10.68 9.76 2.25
late.v5 10.95 11.61 3.97", header = TRUE)