Example: Experiment to test plywood strength
# load data
library(tidyverse)
plywood <- read_delim("http://people.bath.ac.uk/kai21/MA50259/Data/plywood.txt",delim = " ")
glimpse(plywood)
## Observations: 60
## Variables: 3
## $ strength <int> 502, 458, 445, 479, 468, 463, 517, 494, 499, 463, 470...
## $ glue <chr> "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B"...
## $ units <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16...
# visualise data
ggplot(plywood,aes(glue, strength)) + geom_point() + xlab("Glue") + ylab("Mean shear strength")
Our own randomised experiment
set.seed(5678) # to control randomness
r <- 10; t <- 6;
levels <- c("level 1","level 2","level 3","level 4","level 5","level 6")
f <- rep(levels, each = r) %>% factor()
n <- r*t
fac <- f %>% sample(size=n) # randomisation
units <- 1:n # unit labels
crd <- tibble(units=units, treatment=fac) # creates data.frame (tibble)
glimpse(crd)
## Observations: 60
## Variables: 2
## $ units <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1...
## $ treatment <fct> level 3, level 3, level 6, level 3, level 6, level 1...
crd <- crd %>% arrange(treatment) # arrange by treatment level.
glimpse(crd)
## Observations: 60
## Variables: 2
## $ units <int> 6, 7, 9, 11, 14, 20, 26, 29, 30, 46, 8, 18, 22, 41, ...
## $ treatment <fct> level 1, level 1, level 1, level 1, level 1, level 1...
Our own CRD experiment, simulating the response
mu <- 500 # overall response
tau <- c(-20,50,0,-30,-10,100) # treatment effects
sd <- 10 # overall sd
means <- mu+tau %>% rep(each = r)
y <- rnorm(n, mean=means,sd=sd) # units are arranged by treatment level
crd$response <- y
glimpse(crd)
## Observations: 60
## Variables: 3
## $ units <int> 6, 7, 9, 11, 14, 20, 26, 29, 30, 46, 8, 18, 22, 41, ...
## $ treatment <fct> level 1, level 1, level 1, level 1, level 1, level 1...
## $ response <dbl> 472.4084, 485.5443, 469.4177, 476.5448, 465.7961, 47...
ggplot(crd,aes(treatment,response)) + geom_point()
Statistical analysis, the treatment effects model
r <- 4; t <- 3; levels <-c("level 1", "level 2", "level 3");
fact <- rep(levels, each = r) %>% factor()
Z <- model.matrix(~ fact); Z
## (Intercept) factlevel 2 factlevel 3
## 1 1 0 0
## 2 1 0 0
## 3 1 0 0
## 4 1 0 0
## 5 1 1 0
## 6 1 1 0
## 7 1 1 0
## 8 1 1 0
## 9 1 0 1
## 10 1 0 1
## 11 1 0 1
## 12 1 0 1
## attr(,"assign")
## [1] 0 1 1
## attr(,"contrasts")
## attr(,"contrasts")$fact
## [1] "contr.treatment"
mod.crd <- lm(response~treatment,data=crd)
summary(mod.crd)
##
## Call:
## lm(formula = response ~ treatment, data = crd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.5035 -6.9354 0.1534 5.5528 19.8164
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 476.313 3.058 155.756 < 2e-16 ***
## treatmentlevel 2 75.387 4.325 17.431 < 2e-16 ***
## treatmentlevel 3 22.758 4.325 5.262 2.51e-06 ***
## treatmentlevel 4 -11.156 4.325 -2.580 0.0126 *
## treatmentlevel 5 4.133 4.325 0.956 0.3435
## treatmentlevel 6 122.037 4.325 28.218 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.67 on 54 degrees of freedom
## Multiple R-squared: 0.9644, Adjusted R-squared: 0.9611
## F-statistic: 292.2 on 5 and 54 DF, p-value: < 2.2e-16
coefs <- coef(mod.crd) # extracts coefficient values only
# reconstruct means from coefficients
taus <- c(0, coefs[2:length(coefs)])
means2<-coefs[1]+taus
means2
## treatmentlevel 2 treatmentlevel 3 treatmentlevel 4
## 476.3130 551.7001 499.0711 465.1571
## treatmentlevel 5 treatmentlevel 6
## 480.4457 598.3501
# compute means from original data
by_group <- group_by(crd, treatment)
summaries.crd<-summarize(by_group, means=mean(response))
glimpse(summaries.crd)
## Observations: 6
## Variables: 2
## $ treatment <fct> level 1, level 2, level 3, level 4, level 5, level 6
## $ means <dbl> 476.3130, 551.7001, 499.0711, 465.1571, 480.4457, 59...
Statistical analysis of plywood experiment: linear model
mod.plywood<-lm(strength ~ glue, data = plywood)
summary(mod.plywood)
##
## Call:
## lm(formula = strength ~ glue, data = plywood)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.40 -17.82 -5.00 14.45 59.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 478.800 8.107 59.063 < 2e-16 ***
## glueB 13.600 11.464 1.186 0.2407
## glueC 24.600 11.464 2.146 0.0364 *
## glueD 50.000 11.464 4.361 5.86e-05 ***
## glueE 99.800 11.464 8.705 7.30e-12 ***
## glueF 117.700 11.464 10.267 2.67e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 25.64 on 54 degrees of freedom
## Multiple R-squared: 0.7646, Adjusted R-squared: 0.7428
## F-statistic: 35.08 on 5 and 54 DF, p-value: 8.37e-16
coefs <- coef(mod.plywood) # extracts coefficent values only
# reconstruct means from coefficients
taus <- c(0,coefs[2:length(coefs)])
means2<-coefs[1]+taus
means2
## glueB glueC glueD glueE glueF
## 478.8 492.4 503.4 528.8 578.6 596.5
# compute means from original data
by_group <- group_by(plywood, glue)
summaries.plywood<-summarize(by_group, means = mean(strength))
glimpse(summaries.plywood)
## Observations: 6
## Variables: 2
## $ glue <chr> "A", "B", "C", "D", "E", "F"
## $ means <dbl> 478.8, 492.4, 503.4, 528.8, 578.6, 596.5