library(tidyverse)
r <- 10
t <- 6
n <- t*r
levels <- c("level 1","level 2","level 3","level 4","level 5","level 6")
levels
## [1] "level 1" "level 2" "level 3" "level 4" "level 5" "level 6"
class(levels)
## [1] "character"
v <- rep(levels,each = r)
v
## [1] "level 1" "level 1" "level 1" "level 1" "level 1" "level 1" "level 1"
## [8] "level 1" "level 1" "level 1" "level 2" "level 2" "level 2" "level 2"
## [15] "level 2" "level 2" "level 2" "level 2" "level 2" "level 2" "level 3"
## [22] "level 3" "level 3" "level 3" "level 3" "level 3" "level 3" "level 3"
## [29] "level 3" "level 3" "level 4" "level 4" "level 4" "level 4" "level 4"
## [36] "level 4" "level 4" "level 4" "level 4" "level 4" "level 5" "level 5"
## [43] "level 5" "level 5" "level 5" "level 5" "level 5" "level 5" "level 5"
## [50] "level 5" "level 6" "level 6" "level 6" "level 6" "level 6" "level 6"
## [57] "level 6" "level 6" "level 6" "level 6"
f <- factor(v)
f
## [1] level 1 level 1 level 1 level 1 level 1 level 1 level 1 level 1
## [9] level 1 level 1 level 2 level 2 level 2 level 2 level 2 level 2
## [17] level 2 level 2 level 2 level 2 level 3 level 3 level 3 level 3
## [25] level 3 level 3 level 3 level 3 level 3 level 3 level 4 level 4
## [33] level 4 level 4 level 4 level 4 level 4 level 4 level 4 level 4
## [41] level 5 level 5 level 5 level 5 level 5 level 5 level 5 level 5
## [49] level 5 level 5 level 6 level 6 level 6 level 6 level 6 level 6
## [57] level 6 level 6 level 6 level 6
## Levels: level 1 level 2 level 3 level 4 level 5 level 6
class(f)
## [1] "factor"
# alternative code 1
f <- factor(rep(levels, each = r))
# nesting the 2 commands avoids the unnecessary intermediate vector v
# alternative code 2
f <- rep(levels, each = r) %>% factor()
# pipe %>% is equivalent to the nested commands above but makes clear what is evaluated first.
set.seed(5678)
1:n # row labels
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
## [47] 47 48 49 50 51 52 53 54 55 56 57 58 59 60
rows <- sample(1:n,n) # random re-shuffling of rows
rows
## [1] 24 26 60 22 58 6 10 19 8 46 4 49 57 7 45 32 28 15 40 9 47 14 50
## [24] 41 37 2 23 35 1 3 54 55 34 59 39 51 43 30 31 27 17 33 20 56 29 5
## [47] 21 38 12 52 44 16 13 11 18 36 42 53 48 25
fac <- f[rows]
fac
## [1] level 3 level 3 level 6 level 3 level 6 level 1 level 1 level 2
## [9] level 1 level 5 level 1 level 5 level 6 level 1 level 5 level 4
## [17] level 3 level 2 level 4 level 1 level 5 level 2 level 5 level 5
## [25] level 4 level 1 level 3 level 4 level 1 level 1 level 6 level 6
## [33] level 4 level 6 level 4 level 6 level 5 level 3 level 4 level 3
## [41] level 2 level 4 level 2 level 6 level 3 level 1 level 3 level 4
## [49] level 2 level 6 level 5 level 2 level 2 level 2 level 2 level 4
## [57] level 5 level 6 level 5 level 3
## Levels: level 1 level 2 level 3 level 4 level 5 level 6
#Â set.seed is used to control randomness if we do not set the seed then we obtain
# different results everytime we run the program.
set.seed(5678)
fac <- sample(f, size = n)
# fac <- f %>% sample(size = n) alternative code.
units <- 1:n # unit labels
crd <- tibble(units = units, treatment = fac) # creates a data.frame
crd
## # A tibble: 60 x 2
## units treatment
## <int> <fct>
## 1 1 level 3
## 2 2 level 3
## 3 3 level 6
## 4 4 level 3
## 5 5 level 6
## 6 6 level 1
## 7 7 level 1
## 8 8 level 2
## 9 9 level 1
## 10 10 level 5
## # ... with 50 more rows
glimpse(crd)
## Observations: 60
## Variables: 2
## $ units <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1...
## $ treatment <fct> level 3, level 3, level 6, level 3, level 6, level 1...
crd <- arrange(crd, treatment) # arrange by treatment level
# alternative code
# crd <- crd %>% arrange(treatment)
glimpse(crd)
## Observations: 60
## Variables: 2
## $ units <int> 6, 7, 9, 11, 14, 20, 26, 29, 30, 46, 8, 18, 22, 41, ...
## $ treatment <fct> level 1, level 1, level 1, level 1, level 1, level 1...
crd1 <- tibble(units=units, treatment=f)
crd1 < crd1[rows,]
## Warning in Ops.factor(left, right): '<' not meaningful for factors
## units treatment
## [1,] TRUE NA
## [2,] TRUE NA
## [3,] TRUE NA
## [4,] TRUE NA
## [5,] TRUE NA
## [6,] FALSE NA
## [7,] TRUE NA
## [8,] TRUE NA
## [9,] FALSE NA
## [10,] TRUE NA
## [11,] FALSE NA
## [12,] TRUE NA
## [13,] TRUE NA
## [14,] FALSE NA
## [15,] TRUE NA
## [16,] TRUE NA
## [17,] TRUE NA
## [18,] FALSE NA
## [19,] TRUE NA
## [20,] FALSE NA
## [21,] TRUE NA
## [22,] FALSE NA
## [23,] TRUE NA
## [24,] TRUE NA
## [25,] TRUE NA
## [26,] FALSE NA
## [27,] FALSE NA
## [28,] TRUE NA
## [29,] FALSE NA
## [30,] FALSE NA
## [31,] TRUE NA
## [32,] TRUE NA
## [33,] TRUE NA
## [34,] TRUE NA
## [35,] TRUE NA
## [36,] TRUE NA
## [37,] TRUE NA
## [38,] FALSE NA
## [39,] FALSE NA
## [40,] FALSE NA
## [41,] FALSE NA
## [42,] FALSE NA
## [43,] FALSE NA
## [44,] TRUE NA
## [45,] FALSE NA
## [46,] FALSE NA
## [47,] FALSE NA
## [48,] FALSE NA
## [49,] FALSE NA
## [50,] TRUE NA
## [51,] FALSE NA
## [52,] FALSE NA
## [53,] FALSE NA
## [54,] FALSE NA
## [55,] FALSE NA
## [56,] FALSE NA
## [57,] FALSE NA
## [58,] FALSE NA
## [59,] FALSE NA
## [60,] FALSE NA
crd1
## # A tibble: 60 x 2
## units treatment
## <int> <fct>
## 1 1 level 1
## 2 2 level 1
## 3 3 level 1
## 4 4 level 1
## 5 5 level 1
## 6 6 level 1
## 7 7 level 1
## 8 8 level 1
## 9 9 level 1
## 10 10 level 1
## # ... with 50 more rows
crd1 <- crd1 %>% arrange(treatment)
crd1
## # A tibble: 60 x 2
## units treatment
## <int> <fct>
## 1 1 level 1
## 2 2 level 1
## 3 3 level 1
## 4 4 level 1
## 5 5 level 1
## 6 6 level 1
## 7 7 level 1
## 8 8 level 1
## 9 9 level 1
## 10 10 level 1
## # ... with 50 more rows
mu <- 500 # overall mean response
tau <- c(-20,50,0,-30,-10,100) # treatment effects over mu
means <- mu + tau # mean response for each treatment level
means
## [1] 480 550 500 470 490 600
means <- mu+tau %>% rep(each=r) # vector of means
# means <- rep(mu+tau, each = r) # alternative code
means
## [1] 480 480 480 480 480 480 480 480 480 480 550 550 550 550 550 550 550
## [18] 550 550 550 500 500 500 500 500 500 500 500 500 500 470 470 470 470
## [35] 470 470 470 470 470 470 490 490 490 490 490 490 490 490 490 490 600
## [52] 600 600 600 600 600 600 600 600 600
sd <- 10 # common sd
y <- rnorm(n, mean=means, sd=sd) # note the units should be arranged by treatment level.
y
## [1] 472.4084 485.5443 469.4177 476.5448 465.7961 478.7417 467.5876
## [8] 493.3207 478.1360 475.6323 556.9446 529.1966 547.6057 544.2420
## [15] 549.0948 561.5796 560.7574 561.1673 551.7749 554.6377 492.2205
## [22] 493.1161 498.0180 496.2580 485.5763 508.7216 504.7826 501.5454
## [29] 515.4524 495.0196 461.8719 458.5625 472.9068 459.4350 456.0150
## [36] 480.9937 465.8724 468.7799 456.7203 470.4136 489.8517 465.4183
## [43] 468.4639 484.2266 495.4763 485.2693 485.9455 473.3897 471.6645
## [50] 484.7507 599.8706 591.0384 618.1664 607.3093 614.9173 581.2274
## [57] 587.9494 596.5072 599.7507 586.7642
crd1$response<-y
glimpse(crd1)
## Observations: 60
## Variables: 3
## $ units <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1...
## $ treatment <fct> level 1, level 1, level 1, level 1, level 1, level 1...
## $ response <dbl> 472.4084, 485.5443, 469.4177, 476.5448, 465.7961, 47...
ggplot(crd1, aes(treatment, response)) + geom_point()
ggplot(crd1, aes(treatment, response)) + geom_boxplot()
17. Analyse the data using a linear model via the command in R
mod.crd1 <- lm(response~treatment, data=crd1)
coefs <- coef(mod.crd1)
coefs
## (Intercept) treatmentlevel 2 treatmentlevel 3 treatmentlevel 4
## 476.312968 75.387111 22.758085 -11.155876
## treatmentlevel 5 treatmentlevel 6
## 4.132686 122.037112
The estimates given, correspond to estimates of \(\mu\) \(\tau_2\) \(\tau_3\) \(\tau_4\) \(\tau_5\) and \(\tau_6\) so you can compare them with the actual population values.
c(mu, tau[-1])
## [1] 500 50 0 -30 -10 100
taus <- c(0, coefs[-1])
means2<-coefs[1]+taus
means2
## treatmentlevel 2 treatmentlevel 3 treatmentlevel 4
## 476.3130 551.7001 499.0711 465.1571
## treatmentlevel 5 treatmentlevel 6
## 480.4457 598.3501
You can compare them with the actual population values
mu+tau
## [1] 480 550 500 470 490 600
by_group <- group_by(crd1, treatment) # groups the data.frame
by_group
## # A tibble: 60 x 3
## # Groups: treatment [6]
## units treatment response
## <int> <fct> <dbl>
## 1 1 level 1 472.
## 2 2 level 1 486.
## 3 3 level 1 469.
## 4 4 level 1 477.
## 5 5 level 1 466.
## 6 6 level 1 479.
## 7 7 level 1 468.
## 8 8 level 1 493.
## 9 9 level 1 478.
## 10 10 level 1 476.
## # ... with 50 more rows
summaries.crd<-summarize(by_group, means = mean(response))
# applies the mean function for each group defined by the levels of the treatment
# factor
glimpse(summaries.crd)
## Observations: 6
## Variables: 2
## $ treatment <fct> level 1, level 2, level 3, level 4, level 5, level 6
## $ means <dbl> 476.3130, 551.7001, 499.0711, 465.1571, 480.4457, 59...