Loading libraries

library(readxl)
library(ExpDes)
library(agricolae)
library(tidyverse)
library(ggstatsplot)
library(car)
library(patchwork)
library(emmeans)
library(multcomp)

Overview of CRD & RCBD

Completely Randomized Design

Simplest experimental design
Frequently used to compare treatments when environmental conditions are fairly uniform (homogeneous experimental units)
Each treatment is applied at random to several experimental units
If the experimental units are not homogeneous, experimental error would be large and difficult to detect treatment effect
Applicable for experiments in controlled conditions

Randomized Complete Block Design

RCBD is one of the most widely used experimental designs
Suited for field experiments where there exists one extraneous factor which can be used as stratifying variable to form homogeneous experimental units
Blocking is used to minimize variability in experimental units so as to increase precision of the experiment
Units within a block a homogeneous; units between blocks are heterogeneous
Complete set of treatments are assigned independently at random in every block

Analysis of Variance (ANOVA)

A method of partitioning the total variance in the response into different components which can be attributed to different sources
- Systematic variation- effect of manipulated factors
- Random variation- experimental error
Method of comparing the effect of treatments on the response variable
Method of comparing the means of three or more treatments
Model Assumptions:
- Residuals (experimental error) are independently distributed as \(N(0, \sigma_{\epsilon}^2)\)
- Response is linearly related to the treatments
One-way ANOVA for CRD experiments
Two-way ANOVA for RCBD experiments

ANOVA for CRD: An example

#Importing the data
yield_data <- read_excel("Crop yield.xlsx")

ANOVA with post hoc test

with(yield_data, crd(treat = Variety,
                     resp = Yield,
                     quali = TRUE,
                     mcomp = "tukey"))

## ------------------------------------------------------------------------
## Analysis of Variance Table
## ------------------------------------------------------------------------
##            DF    SS     MS     Fc     Pr>Fc
## Treatament  3 188.2 62.733 5.6901 0.0075605
## Residuals  16 176.4 11.025                 
## Total      19 364.6                        
## ------------------------------------------------------------------------
## CV = 13.78 %
## 
## ------------------------------------------------------------------------
## Shapiro-Wilk normality test
## p-value:  0.7255893 
## According to Shapiro-Wilk normality test at 5% of significance, residuals can be considered normal.
## ------------------------------------------------------------------------
## 
## ------------------------------------------------------------------------
## Homogeneity of variances test
## p-value:  0.7323728 
## According to the test of bartlett at 5% of significance, residuals can be considered homocedastic.
## ------------------------------------------------------------------------
## 
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     B   28.6 
## ab    D   25 
##  b    C   22.4 
##  b    A   20.4 
## ------------------------------------------------------------------------

Presenting results visually

#Run ANOVA the traditional way
mod1 <- aov(Yield ~ Variety,
            data = yield_data)
summary(mod1)

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## Variety      3  188.2   62.73    5.69 0.00756 **
## Residuals   16  176.4   11.02                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Compute summary statistics by variety
TRT.means <- yield_data %>% 
  group_by(Variety)  %>%  
  dplyr::summarize(Mean=mean(Yield), 
            SD = sd(Yield),
            n = length(Yield))  %>%  
  dplyr::mutate(SE = SD/sqrt(n))

TRT.means

## # A tibble: 4 × 5
##   Variety  Mean    SD     n    SE
##   <chr>   <dbl> <dbl> <int> <dbl>
## 1 A        20.4  3.71     5  1.66
## 2 B        28.6  4.12     5  1.84
## 3 C        22.4  2.70     5  1.21
## 4 D        25    2.47     5  1.10

trt.means <- emmeans(mod1, specs="Variety")
trt.means

##  Variety emmean   SE df lower.CL upper.CL
##  A         20.4 1.48 16     17.3     23.5
##  B         28.6 1.48 16     25.5     31.7
##  C         22.4 1.48 16     19.3     25.5
##  D         25.0 1.48 16     21.9     28.1
## 
## Confidence level used: 0.95

#Add the letters
trt.means.cld <- cld(trt.means, Letters=letters)
trt.means.cld

##  Variety emmean   SE df lower.CL upper.CL .group
##  A         20.4 1.48 16     17.3     23.5  a    
##  C         22.4 1.48 16     19.3     25.5  a    
##  D         25.0 1.48 16     21.9     28.1  ab   
##  B         28.6 1.48 16     25.5     31.7   b   
## 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 4 estimates 
## significance level used: alpha = 0.05 
## NOTE: If two or more means share the same grouping symbol,
##       then we cannot show them to be different.
##       But we also did not show them to be the same.

trt.means.cld <- trt.means.cld %>% 
  arrange(Variety)  %>% 
  mutate(SE1=TRT.means$SE)

trt.means.cld

##   Variety emmean       SE df lower.CL upper.CL .group      SE1
## 1       A   20.4 1.484924 16  17.2521  23.5479     a  1.658614
## 2       B   28.6 1.484924 16  25.4521  31.7479      b 1.842010
## 3       C   22.4 1.484924 16  19.2521  25.5479     a  1.207891
## 4       D   25.0 1.484924 16  21.8521  28.1479     ab 1.103177

#Create graphs with SE and letters
ggplot(trt.means.cld, aes(Variety, emmean,label = .group)) + 
  geom_col(fill = "yellow", col="black") +
  geom_errorbar(aes(ymin=emmean - SE1, ymax = emmean + SE1),
                width = 0.2, size = 0.7) +
  geom_text(vjust=-2.1) +
  geom_text(vjust = 4, label = round(trt.means.cld$emmean,2)) +
  labs(x= "variety", y= "Yield") +
  lims(y = c(0,35)) +
  theme_classic()

Analysis of Experiments in RCBD

Importing the data

agrondata <- read_excel("agrondata.xlsx")
head(agrondata) #displays the 1st 6 rows of the data frame

## # A tibble: 6 × 10
##   treatment block   DTE  HT15  HT30  HT45  HT60   DTF   DTM HERBAGE
##       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1         1     1     4  56.2 102.   152.  210.    44    60  158000
## 2         1     2     4  53.9  92.9  140   194.    44    60  192000
## 3         1     3     4  53.4  90.4  134.  189.    44    60  180000
## 4         1     4     4  52.2  89    129.  183.    44    60  155000
## 5         1     5     4  54.6 102.   150.  207.    44    60  185000
## 6         2     1     7  47.3  73.5  131.  173.    76    90  255000

ANOVA for RCBD experiment with post hoc

with(agrondata, rbd(treat = treatment,
                    block = block,
                    resp = HT15,
                    quali = TRUE,
                    mcomp = "tukey"))

## ------------------------------------------------------------------------
## Analysis of Variance Table
## ------------------------------------------------------------------------
##            DF      SS      MS      Fc    Pr>Fc
## Treatament  2 2416.67 1208.33 232.588 0.000000
## Block       4   74.27   18.57   3.574 0.059085
## Residuals   8   41.56    5.20                 
## Total      14 2532.50                         
## ------------------------------------------------------------------------
## CV = 5.7 %
## 
## ------------------------------------------------------------------------
## Shapiro-Wilk normality test
## p-value:  0.1688107 
## According to Shapiro-Wilk normality test at 5% of significance, residuals can be considered normal.
## ------------------------------------------------------------------------
## 
## ------------------------------------------------------------------------
## Homogeneity of variances test
## p-value:  0.07919401 
## According to the test of oneillmathews at 5% of significance, the variances can be considered homocedastic.
## ------------------------------------------------------------------------
## 
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     1   54.06 
##  b    2   42.6 
##   c   3   23.3 
## ------------------------------------------------------------------------

Presenting the results visually

#Convert integer codes to factors
agrondata <- agrondata %>% 
  mutate(Trt = factor(treatment),
         Blk = factor(block))

#Run ANOVA the traditional way
mod2 <- aov(HT15 ~ Trt + Blk,
            data = agrondata)
summary(mod2)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Trt          2 2416.7  1208.3 232.588 8.17e-08 ***
## Blk          4   74.3    18.6   3.574   0.0591 .  
## Residuals    8   41.6     5.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Compute summary statistics by treatment
TRT.means1 <- agrondata |> 
  group_by(Trt) |> 
  dplyr::summarize(Mean=mean(HT15), 
            SD = sd(HT15),
            n = length(HT15)) |> 
  dplyr::mutate(SE = SD/sqrt(n))

TRT.means1

## # A tibble: 3 × 5
##   Trt    Mean    SD     n    SE
##   <fct> <dbl> <dbl> <int> <dbl>
## 1 1      54.1  1.48     5 0.663
## 2 2      42.6  4.90     5 2.19 
## 3 3      23.3  1.67     5 0.746

trt.means1 <- emmeans(mod2, specs="Trt")
trt.means1

##  Trt emmean   SE df lower.CL upper.CL
##  1     54.1 1.02  8     51.7     56.4
##  2     42.6 1.02  8     40.2     45.0
##  3     23.3 1.02  8     20.9     25.7
## 
## Results are averaged over the levels of: Blk 
## Confidence level used: 0.95

#Add the letters
trt.means.cld1 <- cld(trt.means1,
                     Letters=letters, 
                     decreasing = TRUE)

trt.means.cld1 <- trt.means.cld1 |> 
  arrange(Trt) |> 
  mutate(SE1=TRT.means1$SE)

trt.means.cld1

##   Trt emmean      SE df lower.CL upper.CL .group       SE1
## 1   1  54.06 1.01933  8 51.70942 56.41058    a   0.6630234
## 2   2  42.60 1.01933  8 40.24942 44.95058     b  2.1897488
## 3   3  23.30 1.01933  8 20.94942 25.65058      c 0.7463243

#Create graphs with SE and letters
ggplot(trt.means.cld1, aes(Trt, emmean,label = .group)) + 
  geom_col(fill = "lightblue", col="black") +
  geom_errorbar(aes(ymin=emmean - SE1, ymax = emmean + SE1),
                width = 0.2, size = 0.7) +
  geom_text(vjust=-1.9) +
  geom_text(vjust = 4, label = round(trt.means.cld1$emmean,2)) +
  labs(x= "Treatment", y= "Mean Plant Height (15 days)") +
  lims(y = c(0,60)) +
  theme_classic()

Residual analysis

To check validity of model assumptions

#Extract the residuals
residuals <- resid(mod2)

#Generate the normal probability plot of the residuals
qqnorm(residuals) 

#Draws a reference line
qqline(residuals, col = "red")

#A formal test of normality
shapiro.test(residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.91625, p-value = 0.1688

#Runs Levene's test of equal variance (homogeneity)
leveneTest(residuals ~ Trt, data = agrondata)

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  0.4724 0.6346
##       12

Analysis of Basic Experimental Designs Using R

NE Milla, Jr.

2026-02-25

Loading libraries

Overview of CRD & RCBD

Completely Randomized Design

Randomized Complete Block Design

Analysis of Variance (ANOVA)

ANOVA for CRD: An example

ANOVA with post hoc test

Presenting results visually

Analysis of Experiments in RCBD

Importing the data

ANOVA for RCBD experiment with post hoc

Presenting the results visually

Residual analysis