Example: Experiment to test plywood strength

# load data
library(tidyverse)
plywood <- read_delim("http://people.bath.ac.uk/kai21/MA50259/Data/plywood.txt",delim = " ")
glimpse(plywood)
## Observations: 60
## Variables: 3
## $ strength <int> 502, 458, 445, 479, 468, 463, 517, 494, 499, 463, 470...
## $ glue     <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B"...
## $ units    <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16...
# visualise data
ggplot(plywood,aes(glue, strength)) + geom_point() + xlab("Glue") + ylab("Mean shear strength")

Our own randomised experiment

set.seed(5678) # to control randomness
r <- 10; t <- 6;
levels <- c("level 1","level 2","level 3","level 4","level 5","level 6")
f <- rep(levels, each = r) %>% factor()
n <- r*t
fac <- f %>% sample(size=n) # randomisation
units <- 1:n # unit labels 
crd <- tibble(units=units, treatment=fac) # creates data.frame (tibble)
glimpse(crd)
## Observations: 60
## Variables: 2
## $ units     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1...
## $ treatment <fct> level 3, level 3, level 6, level 3, level 6, level 1...
crd <- crd %>% arrange(treatment) # arrange by treatment level.
glimpse(crd)
## Observations: 60
## Variables: 2
## $ units     <int> 6, 7, 9, 11, 14, 20, 26, 29, 30, 46, 8, 18, 22, 41, ...
## $ treatment <fct> level 1, level 1, level 1, level 1, level 1, level 1...

Our own CRD experiment, simulating the response

mu <- 500 # overall response
tau <- c(-20,50,0,-30,-10,100) # treatment effects
sd <- 10 # overall sd
means <- mu+tau %>% rep(each = r)
y <- rnorm(n, mean=means,sd=sd) # units are arranged by treatment level
crd$response <- y
glimpse(crd)
## Observations: 60
## Variables: 3
## $ units     <int> 6, 7, 9, 11, 14, 20, 26, 29, 30, 46, 8, 18, 22, 41, ...
## $ treatment <fct> level 1, level 1, level 1, level 1, level 1, level 1...
## $ response  <dbl> 472.4084, 485.5443, 469.4177, 476.5448, 465.7961, 47...
ggplot(crd,aes(treatment,response)) + geom_point()

Statistical analysis, the treatment effects model

r <- 4; t <- 3; levels <-c("level 1", "level 2", "level 3");
fact <- rep(levels, each = r) %>% factor()
Z <- model.matrix(~ fact); Z
##    (Intercept) factlevel 2 factlevel 3
## 1            1           0           0
## 2            1           0           0
## 3            1           0           0
## 4            1           0           0
## 5            1           1           0
## 6            1           1           0
## 7            1           1           0
## 8            1           1           0
## 9            1           0           1
## 10           1           0           1
## 11           1           0           1
## 12           1           0           1
## attr(,"assign")
## [1] 0 1 1
## attr(,"contrasts")
## attr(,"contrasts")$fact
## [1] "contr.treatment"
mod.crd <- lm(response~treatment,data=crd)
summary(mod.crd)
## 
## Call:
## lm(formula = response ~ treatment, data = crd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.5035  -6.9354   0.1534   5.5528  19.8164 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       476.313      3.058 155.756  < 2e-16 ***
## treatmentlevel 2   75.387      4.325  17.431  < 2e-16 ***
## treatmentlevel 3   22.758      4.325   5.262 2.51e-06 ***
## treatmentlevel 4  -11.156      4.325  -2.580   0.0126 *  
## treatmentlevel 5    4.133      4.325   0.956   0.3435    
## treatmentlevel 6  122.037      4.325  28.218  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.67 on 54 degrees of freedom
## Multiple R-squared:  0.9644, Adjusted R-squared:  0.9611 
## F-statistic: 292.2 on 5 and 54 DF,  p-value: < 2.2e-16
coefs <- coef(mod.crd) # extracts coefficient values only
# reconstruct means from coefficients
taus <- c(0, coefs[2:length(coefs)])
means2<-coefs[1]+taus
means2
##                  treatmentlevel 2 treatmentlevel 3 treatmentlevel 4 
##         476.3130         551.7001         499.0711         465.1571 
## treatmentlevel 5 treatmentlevel 6 
##         480.4457         598.3501
# compute means from original data
by_group <- group_by(crd, treatment)
summaries.crd<-summarize(by_group, means=mean(response))
glimpse(summaries.crd)
## Observations: 6
## Variables: 2
## $ treatment <fct> level 1, level 2, level 3, level 4, level 5, level 6
## $ means     <dbl> 476.3130, 551.7001, 499.0711, 465.1571, 480.4457, 59...

Statistical analysis of plywood experiment: linear model

mod.plywood<-lm(strength ~ glue, data = plywood)
summary(mod.plywood)
## 
## Call:
## lm(formula = strength ~ glue, data = plywood)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -44.40 -17.82  -5.00  14.45  59.40 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  478.800      8.107  59.063  < 2e-16 ***
## glueB         13.600     11.464   1.186   0.2407    
## glueC         24.600     11.464   2.146   0.0364 *  
## glueD         50.000     11.464   4.361 5.86e-05 ***
## glueE         99.800     11.464   8.705 7.30e-12 ***
## glueF        117.700     11.464  10.267 2.67e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.64 on 54 degrees of freedom
## Multiple R-squared:  0.7646, Adjusted R-squared:  0.7428 
## F-statistic: 35.08 on 5 and 54 DF,  p-value: 8.37e-16
coefs <- coef(mod.plywood) # extracts coefficent values only
# reconstruct means from coefficients
taus <- c(0,coefs[2:length(coefs)])
means2<-coefs[1]+taus
means2
##       glueB glueC glueD glueE glueF 
## 478.8 492.4 503.4 528.8 578.6 596.5
# compute means from original data
by_group <- group_by(plywood, glue)
summaries.plywood<-summarize(by_group, means = mean(strength))

glimpse(summaries.plywood)
## Observations: 6
## Variables: 2
## $ glue  <chr> "A", "B", "C", "D", "E", "F"
## $ means <dbl> 478.8, 492.4, 503.4, 528.8, 578.6, 596.5