Loading libraries

library(readxl)
library(ExpDes)
library(agricolae)
library(tidyverse)
library(ggstatsplot)
library(car)
library(patchwork)
library(emmeans)
library(multcomp)

Overview of CRD & RCBD

Completely Randomized Design

  • Simplest experimental design

  • Frequently used to compare treatments when environmental conditions are fairly uniform (homogeneous experimental units)

  • Each treatment is applied at random to several experimental units

  • If the experimental units are not homogeneous, experimental error would be large and difficult to detect treatment effect

  • Applicable for experiments in controlled conditions

Randomized Complete Block Design

  • RCBD is one of the most widely used experimental designs

  • Suited for field experiments where there exists one extraneous factor which can be used as stratifying variable to form homogeneous experimental units

  • Blocking is used to minimize variability in experimental units so as to increase precision of the experiment

  • Units within a block a homogeneous; units between blocks are heterogeneous

  • Complete set of treatments are assigned independently at random in every block

Analysis of Variance (ANOVA)

ANOVA for CRD: An example

#Importing the data
yield_data <- read_excel("Crop yield.xlsx")

ANOVA with post hoc test

with(yield_data, crd(treat = Variety,
                     resp = Yield,
                     quali = TRUE,
                     mcomp = "tukey"))
## ------------------------------------------------------------------------
## Analysis of Variance Table
## ------------------------------------------------------------------------
##            DF    SS     MS     Fc     Pr>Fc
## Treatament  3 188.2 62.733 5.6901 0.0075605
## Residuals  16 176.4 11.025                 
## Total      19 364.6                        
## ------------------------------------------------------------------------
## CV = 13.78 %
## 
## ------------------------------------------------------------------------
## Shapiro-Wilk normality test
## p-value:  0.7255893 
## According to Shapiro-Wilk normality test at 5% of significance, residuals can be considered normal.
## ------------------------------------------------------------------------
## 
## ------------------------------------------------------------------------
## Homogeneity of variances test
## p-value:  0.7323728 
## According to the test of bartlett at 5% of significance, residuals can be considered homocedastic.
## ------------------------------------------------------------------------
## 
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     B   28.6 
## ab    D   25 
##  b    C   22.4 
##  b    A   20.4 
## ------------------------------------------------------------------------

Presenting results visually

#Run ANOVA the traditional way
mod1 <- aov(Yield ~ Variety,
            data = yield_data)
summary(mod1)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## Variety      3  188.2   62.73    5.69 0.00756 **
## Residuals   16  176.4   11.02                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Compute summary statistics by variety
TRT.means <- yield_data %>% 
  group_by(Variety)  %>%  
  dplyr::summarize(Mean=mean(Yield), 
            SD = sd(Yield),
            n = length(Yield))  %>%  
  dplyr::mutate(SE = SD/sqrt(n))

TRT.means
## # A tibble: 4 × 5
##   Variety  Mean    SD     n    SE
##   <chr>   <dbl> <dbl> <int> <dbl>
## 1 A        20.4  3.71     5  1.66
## 2 B        28.6  4.12     5  1.84
## 3 C        22.4  2.70     5  1.21
## 4 D        25    2.47     5  1.10
trt.means <- emmeans(mod1, specs="Variety")
trt.means
##  Variety emmean   SE df lower.CL upper.CL
##  A         20.4 1.48 16     17.3     23.5
##  B         28.6 1.48 16     25.5     31.7
##  C         22.4 1.48 16     19.3     25.5
##  D         25.0 1.48 16     21.9     28.1
## 
## Confidence level used: 0.95
#Add the letters
trt.means.cld <- cld(trt.means, Letters=letters)
trt.means.cld
##  Variety emmean   SE df lower.CL upper.CL .group
##  A         20.4 1.48 16     17.3     23.5  a    
##  C         22.4 1.48 16     19.3     25.5  a    
##  D         25.0 1.48 16     21.9     28.1  ab   
##  B         28.6 1.48 16     25.5     31.7   b   
## 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 4 estimates 
## significance level used: alpha = 0.05 
## NOTE: If two or more means share the same grouping symbol,
##       then we cannot show them to be different.
##       But we also did not show them to be the same.
trt.means.cld <- trt.means.cld %>% 
  arrange(Variety)  %>% 
  mutate(SE1=TRT.means$SE)

trt.means.cld
##   Variety emmean       SE df lower.CL upper.CL .group      SE1
## 1       A   20.4 1.484924 16  17.2521  23.5479     a  1.658614
## 2       B   28.6 1.484924 16  25.4521  31.7479      b 1.842010
## 3       C   22.4 1.484924 16  19.2521  25.5479     a  1.207891
## 4       D   25.0 1.484924 16  21.8521  28.1479     ab 1.103177
#Create graphs with SE and letters
ggplot(trt.means.cld, aes(Variety, emmean,label = .group)) + 
  geom_col(fill = "yellow", col="black") +
  geom_errorbar(aes(ymin=emmean - SE1, ymax = emmean + SE1),
                width = 0.2, size = 0.7) +
  geom_text(vjust=-2.1) +
  geom_text(vjust = 4, label = round(trt.means.cld$emmean,2)) +
  labs(x= "variety", y= "Yield") +
  lims(y = c(0,35)) +
  theme_classic()

Analysis of Experiments in RCBD

Importing the data

agrondata <- read_excel("agrondata.xlsx")
head(agrondata) #displays the 1st 6 rows of the data frame
## # A tibble: 6 × 10
##   treatment block   DTE  HT15  HT30  HT45  HT60   DTF   DTM HERBAGE
##       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1         1     1     4  56.2 102.   152.  210.    44    60  158000
## 2         1     2     4  53.9  92.9  140   194.    44    60  192000
## 3         1     3     4  53.4  90.4  134.  189.    44    60  180000
## 4         1     4     4  52.2  89    129.  183.    44    60  155000
## 5         1     5     4  54.6 102.   150.  207.    44    60  185000
## 6         2     1     7  47.3  73.5  131.  173.    76    90  255000

ANOVA for RCBD experiment with post hoc

with(agrondata, rbd(treat = treatment,
                    block = block,
                    resp = HT15,
                    quali = TRUE,
                    mcomp = "tukey"))
## ------------------------------------------------------------------------
## Analysis of Variance Table
## ------------------------------------------------------------------------
##            DF      SS      MS      Fc    Pr>Fc
## Treatament  2 2416.67 1208.33 232.588 0.000000
## Block       4   74.27   18.57   3.574 0.059085
## Residuals   8   41.56    5.20                 
## Total      14 2532.50                         
## ------------------------------------------------------------------------
## CV = 5.7 %
## 
## ------------------------------------------------------------------------
## Shapiro-Wilk normality test
## p-value:  0.1688107 
## According to Shapiro-Wilk normality test at 5% of significance, residuals can be considered normal.
## ------------------------------------------------------------------------
## 
## ------------------------------------------------------------------------
## Homogeneity of variances test
## p-value:  0.07919401 
## According to the test of oneillmathews at 5% of significance, the variances can be considered homocedastic.
## ------------------------------------------------------------------------
## 
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     1   54.06 
##  b    2   42.6 
##   c   3   23.3 
## ------------------------------------------------------------------------

Presenting the results visually

#Convert integer codes to factors
agrondata <- agrondata %>% 
  mutate(Trt = factor(treatment),
         Blk = factor(block))

#Run ANOVA the traditional way
mod2 <- aov(HT15 ~ Trt + Blk,
            data = agrondata)
summary(mod2)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Trt          2 2416.7  1208.3 232.588 8.17e-08 ***
## Blk          4   74.3    18.6   3.574   0.0591 .  
## Residuals    8   41.6     5.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Compute summary statistics by treatment
TRT.means1 <- agrondata |> 
  group_by(Trt) |> 
  dplyr::summarize(Mean=mean(HT15), 
            SD = sd(HT15),
            n = length(HT15)) |> 
  dplyr::mutate(SE = SD/sqrt(n))

TRT.means1
## # A tibble: 3 × 5
##   Trt    Mean    SD     n    SE
##   <fct> <dbl> <dbl> <int> <dbl>
## 1 1      54.1  1.48     5 0.663
## 2 2      42.6  4.90     5 2.19 
## 3 3      23.3  1.67     5 0.746
trt.means1 <- emmeans(mod2, specs="Trt")
trt.means1
##  Trt emmean   SE df lower.CL upper.CL
##  1     54.1 1.02  8     51.7     56.4
##  2     42.6 1.02  8     40.2     45.0
##  3     23.3 1.02  8     20.9     25.7
## 
## Results are averaged over the levels of: Blk 
## Confidence level used: 0.95
#Add the letters
trt.means.cld1 <- cld(trt.means1,
                     Letters=letters, 
                     decreasing = TRUE)

trt.means.cld1 <- trt.means.cld1 |> 
  arrange(Trt) |> 
  mutate(SE1=TRT.means1$SE)

trt.means.cld1
##   Trt emmean      SE df lower.CL upper.CL .group       SE1
## 1   1  54.06 1.01933  8 51.70942 56.41058    a   0.6630234
## 2   2  42.60 1.01933  8 40.24942 44.95058     b  2.1897488
## 3   3  23.30 1.01933  8 20.94942 25.65058      c 0.7463243
#Create graphs with SE and letters
ggplot(trt.means.cld1, aes(Trt, emmean,label = .group)) + 
  geom_col(fill = "lightblue", col="black") +
  geom_errorbar(aes(ymin=emmean - SE1, ymax = emmean + SE1),
                width = 0.2, size = 0.7) +
  geom_text(vjust=-1.9) +
  geom_text(vjust = 4, label = round(trt.means.cld1$emmean,2)) +
  labs(x= "Treatment", y= "Mean Plant Height (15 days)") +
  lims(y = c(0,60)) +
  theme_classic()

Residual analysis

#Extract the residuals
residuals <- resid(mod2)

#Generate the normal probability plot of the residuals
qqnorm(residuals) 

#Draws a reference line
qqline(residuals, col = "red") 

#A formal test of normality
shapiro.test(residuals) 
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.91625, p-value = 0.1688
#Runs Levene's test of equal variance (homogeneity)
leveneTest(residuals ~ Trt, data = agrondata)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  0.4724 0.6346
##       12