Loading libraries

library(readxl)
library(ExpDes)
library(tidyverse)
library(ggstatsplot)
library(ISLR)
library(patchwork)
library(emmeans)
library(multcomp)
library(multcompView)
library(ggpubr)
library(rstatix)
library(Hmisc)
library(GGally)
library(agricolae)

Analysis of Experiments in RCBD

Importing the data (Make sure to set the appropriate working directory)

agrondata <- read_excel("agrondata.xlsx")
head(agrondata) #displays the 1st 6 rows of the data frame

## # A tibble: 6 × 10
##   treatment block   DTE  HT15  HT30  HT45  HT60   DTF   DTM HERBAGE
##       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1         1     1     4  56.2 102.   152.  210.    44    60  158000
## 2         1     2     4  53.9  92.9  140   194.    44    60  192000
## 3         1     3     4  53.4  90.4  134.  189.    44    60  180000
## 4         1     4     4  52.2  89    129.  183.    44    60  155000
## 5         1     5     4  54.6 102.   150.  207.    44    60  185000
## 6         2     1     7  47.3  73.5  131.  173.    76    90  255000

ANOVA for RCBD experiment with post hoc

with(agrondata, rbd(treat = treatment,
                    block = block,
                    resp = HT15,
                    quali = TRUE,
                    mcomp = "tukey"))

## ------------------------------------------------------------------------
## Analysis of Variance Table
## ------------------------------------------------------------------------
##            DF      SS      MS      Fc    Pr>Fc
## Treatament  2 2416.67 1208.33 232.588 0.000000
## Block       4   74.27   18.57   3.574 0.059085
## Residuals   8   41.56    5.20                 
## Total      14 2532.50                         
## ------------------------------------------------------------------------
## CV = 5.7 %
## 
## ------------------------------------------------------------------------
## Shapiro-Wilk normality test
## p-value:  0.1688107 
## According to Shapiro-Wilk normality test at 5% of significance, residuals can be considered normal.
## ------------------------------------------------------------------------
## 
## ------------------------------------------------------------------------
## Homogeneity of variances test
## p-value:  0.07919401 
## According to the test of oneillmathews at 5% of significance, the variances can be considered homocedastic.
## ------------------------------------------------------------------------
## 
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     1   54.06 
##  b    2   42.6 
##   c   3   23.3 
## ------------------------------------------------------------------------

Presenting the results visually

#Run ANOVA the traditional way
agrondata$Trt <- as.factor(agrondata$treatment)
agrondata$Blk <- as.factor(agrondata$block)
head(agrondata)

## # A tibble: 6 × 12
##   treatment block   DTE  HT15  HT30  HT45  HT60   DTF   DTM HERBAGE Trt   Blk  
##       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl> <fct> <fct>
## 1         1     1     4  56.2 102.   152.  210.    44    60  158000 1     1    
## 2         1     2     4  53.9  92.9  140   194.    44    60  192000 1     2    
## 3         1     3     4  53.4  90.4  134.  189.    44    60  180000 1     3    
## 4         1     4     4  52.2  89    129.  183.    44    60  155000 1     4    
## 5         1     5     4  54.6 102.   150.  207.    44    60  185000 1     5    
## 6         2     1     7  47.3  73.5  131.  173.    76    90  255000 2     1

out1 <- aov(HT15 ~ Trt + Blk,
            data = agrondata)
anova(out1)

## Analysis of Variance Table
## 
## Response: HT15
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## Trt        2 2416.67 1208.33 232.588 8.171e-08 ***
## Blk        4   74.27   18.57   3.574   0.05908 .  
## Residuals  8   41.56    5.20                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TRT.means <- agrondata |> 
  group_by(treatment) |> 
  dplyr::summarize(Mean=mean(HT15), 
            SD = sd(HT15),
            n = length(HT15)) |> 
  dplyr::mutate(SE = SD/sqrt(n))

TRT.means

## # A tibble: 3 × 5
##   treatment  Mean    SD     n    SE
##       <dbl> <dbl> <dbl> <int> <dbl>
## 1         1  54.1  1.48     5 0.663
## 2         2  42.6  4.90     5 2.19 
## 3         3  23.3  1.67     5 0.746

#Compute summary statistics by Stocks
trt.means <- emmeans(out1, specs="Trt")
trt.means

##  Trt emmean   SE df lower.CL upper.CL
##  1     54.1 1.02  8     51.7     56.4
##  2     42.6 1.02  8     40.2     45.0
##  3     23.3 1.02  8     20.9     25.7
## 
## Results are averaged over the levels of: Blk 
## Confidence level used: 0.95

#Add the letters
trt.means.cld <- cld(trt.means,
                     Letters=letters, 
                     decreasing = TRUE)
trt.means.cld <- trt.means.cld |> 
  arrange(Trt) |> 
  mutate(SE1=TRT.means$SE)

trt.means.cld

##   Trt emmean      SE df lower.CL upper.CL .group       SE1
## 1   1  54.06 1.01933  8 51.70942 56.41058    a   0.6630234
## 2   2  42.60 1.01933  8 40.24942 44.95058     b  2.1897488
## 3   3  23.30 1.01933  8 20.94942 25.65058      c 0.7463243

#Create graphs with SE and letters
ggplot(trt.means.cld, aes(Trt, emmean,label = .group)) + 
  geom_col(fill = "pink", col="black") +
  geom_errorbar(aes(ymin = emmean - SE1, 
                    ymax = emmean + SE1),
                width = 0.2, 
                size = 0.7) +
  geom_text(vjust=-1.5, size = 5) +
  geom_text(vjust = 3, label = round(trt.means.cld$emmean,2)) +
  labs(x= "Treatment", y= "Mean Plant Height (15 days)") +
  lims(y = c(0,60)) +
  theme_classic()

Analysis of Factorial Experiments in CRD

Importing the data

fact_crd <- read_excel("Factorial.xlsx",
                       sheet = "CRD")

#Display the first 6 rows of the data frame
head(fact_crd)

## # A tibble: 6 × 4
##       A     B   Rep Height
##   <dbl> <dbl> <dbl>  <dbl>
## 1     1     1     1   46.5
## 2     1     1     2   55.9
## 3     1     1     3   78.7
## 4     1     2     1   49.5
## 5     1     2     2   59.5
## 6     1     2     3   78.7

ANOVA Table

with(fact_crd,fat2.crd(factor1 = A,
                       factor2 = B,
                       resp = Height,
                       quali = c(TRUE,TRUE),
                       mcomp = "tukey",
                       fac.names = c("Spacing", "Age"),
                       ))

## ------------------------------------------------------------------------
## Legend:
## FACTOR 1:  Spacing 
## FACTOR 2:  Age 
## ------------------------------------------------------------------------
## 
## 
## Analysis of Variance Table
## ------------------------------------------------------------------------
##             DF      SS MS      Fc    Pr>Fc
## Spacing      1   409.0  3  1.5207 0.241120
## Age          2 12846.3  5 23.8835 0.000066
## Spacing*Age  2   996.6  4  1.8529 0.198942
## Residuals   12  3227.2  2                 
## Total       17 17479.1  1                 
## ------------------------------------------------------------------------
## CV = 20.36 %
## 
## ------------------------------------------------------------------------
## Shapiro-Wilk normality test
## p-value:  0.4529232 
## According to Shapiro-Wilk normality test at 5% of significance, residuals can be considered normal.
## ------------------------------------------------------------------------
## 
## No significant interaction: analyzing the simple effect
## ------------------------------------------------------------------------
## Spacing
## According to the F test, the means of this factor are statistical equal.
## ------------------------------------------------------------------------
##   Levels    Means
## 1      1 85.30000
## 2      2 75.76667
## ------------------------------------------------------------------------
## Age
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     3   118.0833 
##  b    2   65.36667 
##  b    1   58.15 
## ------------------------------------------------------------------------

Graphical presentation

#Create the ANOVA model
fact_crd$Spacing = as.factor(fact_crd$A)
fact_crd$Age = as.factor(fact_crd$B)
head(fact_crd)

## # A tibble: 6 × 6
##       A     B   Rep Height Spacing Age  
##   <dbl> <dbl> <dbl>  <dbl> <fct>   <fct>
## 1     1     1     1   46.5 1       1    
## 2     1     1     2   55.9 1       1    
## 3     1     1     3   78.7 1       1    
## 4     1     2     1   49.5 1       2    
## 5     1     2     2   59.5 1       2    
## 6     1     2     3   78.7 1       2

#Create the ANOVA model
fact_mod1 <- aov(Height ~ Spacing + Age + Spacing:Age,
                 data = fact_crd)

#Display the ANOVA table
summary(fact_mod1)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Spacing      1    409     409   1.521    0.241    
## Age          2  12846    6423  23.883 6.55e-05 ***
## Spacing:Age  2    997     498   1.853    0.199    
## Residuals   12   3227     269                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

fact_crd <- fact_crd |> 
  group_by(Age) |> 
  dplyr::summarize(Mean = mean(Height),
            SD = sd(Height),
            n = length(Height)) |> 
  dplyr::mutate(SE = SD/sqrt(n))
 
fact_crd

## # A tibble: 3 × 5
##   Age    Mean    SD     n    SE
##   <fct> <dbl> <dbl> <int> <dbl>
## 1 1      58.2  12.0     6  4.89
## 2 2      65.4  10.4     6  4.24
## 3 3     118.   26.0     6 10.6

#Generate the summary statistics for the levels of Factor B
B.means <- emmeans(fact_mod1, specs="Age")
B.means#Print the table of means

##  Age emmean   SE df lower.CL upper.CL
##  1     58.1 6.69 12     43.6     72.7
##  2     65.4 6.69 12     50.8     80.0
##  3    118.1 6.69 12    103.5    132.7
## 
## Results are averaged over the levels of: Spacing 
## Confidence level used: 0.95

#Assign the letter designations to the means of the Factor B
B.means.cld <- cld(B.means,
                   Letters=letters) |> 
  arrange(Age) |> 
  mutate(SE1 = fact_crd$SE)
B.means.cld #Print the table of means

##   Age    emmean       SE df  lower.CL  upper.CL .group       SE1
## 1   1  58.15000 6.694975 12  43.56290  72.73710     a   4.888882
## 2   2  65.36667 6.694975 12  50.77957  79.95376     a   4.237269
## 3   3 118.08333 6.694975 12 103.49624 132.67043      b 10.610008

#Generate the two-way plot
ggplot(B.means.cld, aes(Age, emmean,label = .group)) + 
  geom_col(fill="gray") +
  geom_errorbar(aes(ymin = emmean - SE1, 
                    ymax = emmean + SE1),
                width = 0.2, size = 0.7) +
  geom_text(vjust=-3.2, size = 5) +
  geom_text(label=round(B.means.cld$emmean,2),vjust=5, color = "black") +
  lims(y = c(0,160)) +
  labs(x= "Age", y= "Mean Height") +
  theme_classic()

Analysis of Factorial Experiments in RCBD

Importing the data into R

fact_rbd <- read_excel("Factorial.xlsx",
                       sheet = "RCBD")
head(fact_rbd)

## # A tibble: 6 × 4
##   Block     P     N Yield
##   <dbl> <dbl> <dbl> <dbl>
## 1     1     1     1   0.9
## 2     1     1     2   1.2
## 3     1     1     3   1.3
## 4     1     1     4   1.8
## 5     1     1     5   1.1
## 6     1     2     1   0.9

Factorial ANOVA (RCBD)

with(fact_rbd,fat2.rbd(factor1 = P, 
                       factor2 = N,
                       block = Block, 
                       resp = Yield,
                       quali = c(TRUE,TRUE),
                       fac.names = c("Phosporous","Nitrogen"),
                       mcomp="tukey"))

## ------------------------------------------------------------------------
## Legend:
## FACTOR 1:  Phosporous 
## FACTOR 2:  Nitrogen 
## ------------------------------------------------------------------------
## 
## 
## Analysis of Variance Table
## ------------------------------------------------------------------------
##                     DF     SS MS     Fc    Pr>Fc
## Block                2 0.0640  3  1.631 0.213772
## Phosporous           2 0.1693  4  4.316 0.023244
## Nitrogen             4 2.4902  6 31.732 0.000000
## Phosporous*Nitrogen  8 1.0151  5  6.468 0.000090
## Residuals           28 0.5493  2                
## Total               44 4.2880  1                
## ------------------------------------------------------------------------
## CV = 11.12 %
## 
## ------------------------------------------------------------------------
## Shapiro-Wilk normality test
## p-value:  0.1377132 
## According to Shapiro-Wilk normality test at 5% of significance, residuals can be considered normal.
## ------------------------------------------------------------------------
## 
## 
## 
## Significant interaction: analyzing the interaction
## ------------------------------------------------------------------------
## 
## Analyzing  Phosporous  inside of each level of  Nitrogen 
## ------------------------------------------------------------------------
## ------------------------------------------------------------------------
## Analysis of Variance Table
## ------------------------------------------------------------------------
##                       DF      SS      MS      Fc  Pr.Fc
## Block                  2 0.06400   0.032  1.6311 0.2138
## Nitrogen               4 2.49022 0.62256 31.7322      0
## Phosporous:Nitrogen 1  2 0.01556 0.00778  0.3964 0.6764
## Phosporous:Nitrogen 2  2 0.12667 0.06333  3.2282 0.0548
## Phosporous:Nitrogen 3  2 0.01556 0.00778  0.3964 0.6764
## Phosporous:Nitrogen 4  2 0.62000    0.31  15.801      0
## Phosporous:Nitrogen 5  2 0.40667 0.20333 10.3641  4e-04
## Residuals             28 0.54933 0.01962               
## Total                 44 4.28800                       
## ------------------------------------------------------------------------
## 
## 
## 
##  Phosporous  inside of the level  1  of  Nitrogen 
## 
## According to the F test, the means of this factor are statistical equal.
## ------------------------------------------------------------------------
##     Levels     Means
## 1        1 0.9333333
## 2        2 0.8333333
## 3        3 0.8666667
## ------------------------------------------------------------------------
## 
## 
##  Phosporous  inside of the level  2  of  Nitrogen 
## 
## According to the F test, the means of this factor are statistical equal.
## ------------------------------------------------------------------------
##     Levels     Means
## 1        1 1.2333333
## 2        2 0.9666667
## 3        3 1.2000000
## ------------------------------------------------------------------------
## 
## 
##  Phosporous  inside of the level  3  of  Nitrogen 
## 
## According to the F test, the means of this factor are statistical equal.
## ------------------------------------------------------------------------
##     Levels     Means
## 1        1  1.400000
## 2        2  1.300000
## 3        3  1.366667
## ------------------------------------------------------------------------
## 
## 
##  Phosporous  inside of the level  4  of  Nitrogen 
## ------------------------------------------------------------------------
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     1   1.933333 
##  b    3   1.433333 
##  b    2   1.333333 
## ------------------------------------------------------------------------
## 
## 
##  Phosporous  inside of the level  5  of  Nitrogen 
## ------------------------------------------------------------------------
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     2   1.666667 
##  b    1   1.233333 
##  b    3   1.2 
## ------------------------------------------------------------------------
## 
## 
## 
## Analyzing  Nitrogen  inside of each level of  Phosporous 
## ------------------------------------------------------------------------
## ------------------------------------------------------------------------
## Analysis of Variance Table
## ------------------------------------------------------------------------
##                       DF      SS      MS      Fc  Pr.Fc
## Block                  2 0.06400   0.032  1.6311 0.2138
## Phosporous             2 0.16933 0.08467  4.3155 0.0232
## Nitrogen:Phosporous 1  4 1.63067 0.40767 20.7791      0
## Nitrogen:Phosporous 2  4 1.29733 0.32433 16.5316      0
## Nitrogen:Phosporous 3  4 0.57733 0.14433  7.3568  4e-04
## Residuals             28 0.54933 0.01962               
## Total                 44 4.28800                       
## ------------------------------------------------------------------------
## 
## 
## 
##  Nitrogen  inside of the level  1  of  Phosporous 
## ------------------------------------------------------------------------
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     4   1.933333 
##  b    3   1.4 
##  bc   2   1.233333 
##  bc   5   1.233333 
##   c   1   0.9333333 
## ------------------------------------------------------------------------
## 
## 
##  Nitrogen  inside of the level  2  of  Phosporous 
## ------------------------------------------------------------------------
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     5   1.666667 
##  b    4   1.333333 
##  b    3   1.3 
##   c   2   0.9666667 
##   c   1   0.8333333 
## ------------------------------------------------------------------------
## 
## 
##  Nitrogen  inside of the level  3  of  Phosporous 
## ------------------------------------------------------------------------
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     4   1.433333 
## a     3   1.366667 
## a     2   1.2 
## a     5   1.2 
##  b    1   0.8666667 
## ------------------------------------------------------------------------

Visualizing the results

#Converting the integer codes to factors
fact_rbd$Block = factor(fact_rbd$Block)
fact_rbd$P = factor(fact_rbd$P)
fact_rbd$N = factor(fact_rbd$N)
head(fact_rbd)

## # A tibble: 6 × 4
##   Block P     N     Yield
##   <fct> <fct> <fct> <dbl>
## 1 1     1     1       0.9
## 2 1     1     2       1.2
## 3 1     1     3       1.3
## 4 1     1     4       1.8
## 5 1     1     5       1.1
## 6 1     2     1       0.9

#Create the ANOVA model
fact_mod2 <- aov(Yield ~ Block + P + N + P:N,
                 data = fact_rbd)

#Display the ANOVA table
anova(fact_mod2)

## Analysis of Variance Table
## 
## Response: Yield
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## Block      2 0.06400 0.03200  1.6311   0.21377    
## P          2 0.16933 0.08467  4.3155   0.02324 *  
## N          4 2.49022 0.62256 31.7322 4.946e-10 ***
## P:N        8 1.01511 0.12689  6.4676 8.979e-05 ***
## Residuals 28 0.54933 0.01962                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

pn.means <- fact_rbd |> 
  group_by(P,N) |> 
  dplyr::summarise(Mean = mean(Yield),
            SD = sd(Yield),
            n = length(Yield)) |> 
  dplyr::mutate(SE = SD/sqrt(n))

pn.means

## # A tibble: 15 × 6
## # Groups:   P [3]
##    P     N      Mean     SD     n     SE
##    <fct> <fct> <dbl>  <dbl> <int>  <dbl>
##  1 1     1     0.933 0.0577     3 0.0333
##  2 1     2     1.23  0.0577     3 0.0333
##  3 1     3     1.4   0.1        3 0.0577
##  4 1     4     1.93  0.153      3 0.0882
##  5 1     5     1.23  0.153      3 0.0882
##  6 2     1     0.833 0.0577     3 0.0333
##  7 2     2     0.967 0.115      3 0.0667
##  8 2     3     1.3   0.2        3 0.115 
##  9 2     4     1.33  0.252      3 0.145 
## 10 2     5     1.67  0.208      3 0.120 
## 11 3     1     0.867 0.153      3 0.0882
## 12 3     2     1.2   0.2        3 0.115 
## 13 3     3     1.37  0.0577     3 0.0333
## 14 3     4     1.43  0.0577     3 0.0333
## 15 3     5     1.2   0.1        3 0.0577

#Generate summary statistics for all treatment combinations
PN.means <- emmeans(fact_mod2, specs=c("P", "N"))
PN.means#Print the table of means

##  P N emmean     SE df lower.CL upper.CL
##  1 1  0.933 0.0809 28    0.768    1.099
##  2 1  0.833 0.0809 28    0.668    0.999
##  3 1  0.867 0.0809 28    0.701    1.032
##  1 2  1.233 0.0809 28    1.068    1.399
##  2 2  0.967 0.0809 28    0.801    1.132
##  3 2  1.200 0.0809 28    1.034    1.366
##  1 3  1.400 0.0809 28    1.234    1.566
##  2 3  1.300 0.0809 28    1.134    1.466
##  3 3  1.367 0.0809 28    1.201    1.532
##  1 4  1.933 0.0809 28    1.768    2.099
##  2 4  1.333 0.0809 28    1.168    1.499
##  3 4  1.433 0.0809 28    1.268    1.599
##  1 5  1.233 0.0809 28    1.068    1.399
##  2 5  1.667 0.0809 28    1.501    1.832
##  3 5  1.200 0.0809 28    1.034    1.366
## 
## Results are averaged over the levels of: Block 
## Confidence level used: 0.95

#Assign the letters to the treatment means
PN.means.cld <- cld(PN.means,
                   Letters=letters,
                   decreasing = TRUE)
PN.means.cld

##  P N emmean     SE df lower.CL upper.CL .group 
##  1 4  1.933 0.0809 28    1.768    2.099  a     
##  2 5  1.667 0.0809 28    1.501    1.832  ab    
##  3 4  1.433 0.0809 28    1.268    1.599   bc   
##  1 3  1.400 0.0809 28    1.234    1.566   bc   
##  3 3  1.367 0.0809 28    1.201    1.532   bcd  
##  2 4  1.333 0.0809 28    1.168    1.499   bcde 
##  2 3  1.300 0.0809 28    1.134    1.466   bcde 
##  1 5  1.233 0.0809 28    1.068    1.399    cdef
##  1 2  1.233 0.0809 28    1.068    1.399    cdef
##  3 5  1.200 0.0809 28    1.034    1.366    cdef
##  3 2  1.200 0.0809 28    1.034    1.366    cdef
##  2 2  0.967 0.0809 28    0.801    1.132     def
##  1 1  0.933 0.0809 28    0.768    1.099      ef
##  3 1  0.867 0.0809 28    0.701    1.032       f
##  2 1  0.833 0.0809 28    0.668    0.999       f
## 
## Results are averaged over the levels of: Block 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 15 estimates 
## significance level used: alpha = 0.05 
## NOTE: If two or more means share the same grouping symbol,
##       then we cannot show them to be different.
##       But we also did not show them to be the same.

PN.means.cld <- PN.means.cld |> 
  dplyr::arrange(P,N) |> 
  dplyr::mutate(SE1 = pn.means$SE)
PN.means.cld

##    P N    emmean        SE df  lower.CL  upper.CL  .group        SE1
## 1  1 1 0.9333333 0.0808683 28 0.7676821 1.0989845      ef 0.03333333
## 2  1 2 1.2333333 0.0808683 28 1.0676821 1.3989845    cdef 0.03333333
## 3  1 3 1.4000000 0.0808683 28 1.2343488 1.5656512   bc    0.05773503
## 4  1 4 1.9333333 0.0808683 28 1.7676821 2.0989845  a      0.08819171
## 5  1 5 1.2333333 0.0808683 28 1.0676821 1.3989845    cdef 0.08819171
## 6  2 1 0.8333333 0.0808683 28 0.6676821 0.9989845       f 0.03333333
## 7  2 2 0.9666667 0.0808683 28 0.8010155 1.1323179     def 0.06666667
## 8  2 3 1.3000000 0.0808683 28 1.1343488 1.4656512   bcde  0.11547005
## 9  2 4 1.3333333 0.0808683 28 1.1676821 1.4989845   bcde  0.14529663
## 10 2 5 1.6666667 0.0808683 28 1.5010155 1.8323179  ab     0.12018504
## 11 3 1 0.8666667 0.0808683 28 0.7010155 1.0323179       f 0.08819171
## 12 3 2 1.2000000 0.0808683 28 1.0343488 1.3656512    cdef 0.11547005
## 13 3 3 1.3666667 0.0808683 28 1.2010155 1.5323179   bcd   0.03333333
## 14 3 4 1.4333333 0.0808683 28 1.2676821 1.5989845   bc    0.03333333
## 15 3 5 1.2000000 0.0808683 28 1.0343488 1.3656512    cdef 0.05773503

#Comparing the levels of N for each level of P
ggplot(PN.means.cld, aes(x = P, 
                         y = emmean,
                         fill = N, 
                         label = .group)) + 
  geom_col(position = "dodge") +
  geom_errorbar(aes(ymin = emmean - SE1, 
                    ymax = emmean + SE1),
                width = 0.3, 
                size = 0.7,
                position = position_dodge(0.9)) +
  geom_text(vjust=-4, 
            position = position_dodge(0.9),
            size = 3) +
  labs(x= "P", y= "Mean Yield") +
  lims(y=c(0,2.5))+
  scale_fill_brewer(palette = "greens") +
  theme_classic()

Analysis of Split-Plot Experiments in RCBD

An experiment was carried out to compare the effects of three concentrations of a chemical seed dressing on the yield of oats. Three varieties were used in the trial because it is suspected that the response to the seed treatment would depend on the variety used. A split design was laid out in five randomized blocks. Varieties were assigned at random to the main plots within each block, and the seed treatment and control were assigned at random to the subplots within each main plot.

Importing the data

spplot <- read_excel("splitplot.xlsx")
head(spplot)

## # A tibble: 6 × 4
##   Block Variety Concentration Yield
##   <dbl>   <dbl>         <dbl> <dbl>
## 1     1       1             1  42.9
## 2     1       1             4  44.4
## 3     1       1             3  49.5
## 4     1       1             2  53.8
## 5     1       2             3  59.8
## 6     1       2             1  53.3

ANOVA for split plot experiments

with(spplot, split2.rbd(factor1 = Variety,
                       factor2 = Concentration,
                       block = Block,
                       resp = Yield,
                       quali = c(TRUE, TRUE),
                       fac.names = c("Variety", "Concentration"),
                       mcomp = "tukey", 
                       unfold = "1"))

## ------------------------------------------------------------------------
## Legend:
## FACTOR 1 (plot):  Variety 
## FACTOR 2 (split-plot):  Concentration 
## ------------------------------------------------------------------------
## 
## ------------------------------------------------------------------------
## Analysis of Variance Table
## ------------------------------------------------------------------------
##                       DF     SS      MS     Fc  Pr(>Fc)    
## Variety                2 3631.5 1815.77 60.775  1.5e-05 ***
## Block                  4 2931.3  732.82 24.528 0.000152 ***
## Error a                8  239.0   29.88                    
## Concentration          3  350.2  116.73  8.316 0.000248 ***
## Variety*Concentration  6  368.3   61.38  4.372 0.002049 ** 
## Error b               36  505.4   14.04                    
## Total                 59 8025.7                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ------------------------------------------------------------------------
## CV 1 = 10.6146 %
## CV 2 = 7.275885 %
## 
## No significant interaction: analyzing the simple effects
## ------------------------------------------------------------------------
## Variety
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     3   60.255 
##  b    2   52.88 
##   c   1   41.35 
## ------------------------------------------------------------------------
## 
## Concentration
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     2   55.45333 
##  b    3   51.22 
##  b    4   50.29333 
##  b    1   49.01333 
## ------------------------------------------------------------------------
## 
## 
## 
## 
## Significant interaction: analyzing the interaction
## ------------------------------------------------------------------------
## 
## Analyzing  Variety  inside of each level of  Concentration 
## ------------------------------------------------------------------------
##                                  DF        SS        MS       Fc  p.value
## Variety : Concentration 1   2.00000 1573.2013 786.60067 43.70568 0.000000
## Variety : Concentration 2   2.00000  495.6253 247.81267 13.76915 0.000048
## Variety : Concentration 3   2.00000  422.6680 211.33400 11.74229 0.000148
## Variety : Concentration 4   2.00000 1508.3253 754.16267 41.90333 0.000000
## Pooled Error               32.22141  579.9106  17.99768       NA       NA
## ------------------------------------------------------------------------
## 
## 
##  Variety inside of Concentration 1
## ------------------------------------------------------------------------
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     3   60.84 
##  b    2   50.34 
##   c   1   35.86 
## ------------------------------------------------------------------------
## 
##  Variety inside of Concentration 2
## ------------------------------------------------------------------------
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     3   62.78 
##  b    2   54.84 
##  b    1   48.74 
## ------------------------------------------------------------------------
## 
##  Variety inside of Concentration 3
## ------------------------------------------------------------------------
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     3   56.96 
## a     2   52.54 
##  b    1   44.16 
## ------------------------------------------------------------------------
## 
##  Variety inside of Concentration 4
## ------------------------------------------------------------------------
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     3   60.44 
##  b    2   53.8 
##   c   1   36.64 
## ------------------------------------------------------------------------
## 
## 
## Analyzing  Concentration  inside of each level of  Variety 
## ------------------------------------------------------------------------
##                            DF       SS        MS        Fc  p.value
## Concentration : Variety 1   3 574.1620 191.38733 13.633626 0.000004
## Concentration : Variety 2   3  56.2760  18.75867  1.336288 0.277818
## Concentration : Variety 3   3  88.0455  29.34850  2.090663 0.118618
## Error b                    36 505.3640  14.03789        NA       NA
## ------------------------------------------------------------------------
## 
## 
##  Concentration inside of Variety 1
## ------------------------------------------------------------------------
## Tukey's test
## ------------------------------------------------------------------------
## Groups Treatments Means
## a     2   48.74 
## a     3   44.16 
##  b    4   36.64 
##  b    1   35.86 
## ------------------------------------------------------------------------
## ------------------------------------------------------------------------
## 
## 
##  Concentration inside of Variety 2
## ------------------------------------------------------------------------
## According to F test, the means of this factor are not different.
## ------------------------------------------------------------------------
##   Levels Means
## 1      1 50.34
## 2      2 54.84
## 3      3 52.54
## 4      4 53.80
## ------------------------------------------------------------------------
## 
##  Concentration inside of Variety 3
## ------------------------------------------------------------------------
## According to F test, the means of this factor are not different.
## ------------------------------------------------------------------------
##   Levels Means
## 1      1 60.84
## 2      2 62.78
## 3      3 56.96
## 4      4 60.44
## ------------------------------------------------------------------------

Another way to generate ANOVA for split plot experiments

#Converts integer codes to nominal codes
spplot <- spplot |> 
  dplyr::mutate(Blk = as.factor(Block),
         Var = as.factor(Variety),
         Conc = as.factor(Concentration))

#Generates ANOVA
spplot.out <- aov(Yield ~ Blk + Var + Blk:Var + Conc + Var:Conc,
                  data = spplot)
anova(spplot.out)

## Analysis of Variance Table
## 
## Response: Yield
##           Df Sum Sq Mean Sq  F value    Pr(>F)    
## Blk        4 2931.3  732.82  52.2028 1.691e-14 ***
## Var        2 3631.5 1815.77 129.3477 < 2.2e-16 ***
## Conc       3  350.2  116.73   8.3156 0.0002483 ***
## Blk:Var    8  239.0   29.88   2.1283 0.0583461 .  
## Var:Conc   6  368.3   61.38   4.3725 0.0020493 ** 
## Residuals 36  505.4   14.04                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We need to correct the F ratio and p-value for Blk and Var

#Extract the Mean Square for *Blk:Var* and use this as *MSE(a)*
MSE_a <- anova(spplot.out)["Mean Sq"][4,1]
MS_Blk <- anova(spplot.out)["Mean Sq"][1,1]
MS_Var <- anova(spplot.out)["Mean Sq"][2,1]
print(c(MSE_a,MS_Blk, MS_Var))

## [1]   29.87704  732.81692 1815.76850

The corrected F ratio and p-value for Blk is:

\[ \begin{aligned} F_{Block} &= \frac{MS_{Block}}{MSE(a)} \notag \\ &= 24.528\notag \\ p-value &= 1.5179\times 10^{-4} \end{aligned} \]

The corrected F ratio and p-value for Var is:

\[ \begin{aligned} F_{Var} &= \frac{MS_{Var}}{MSE(a)} \notag \\ &= 60.775\notag \\ p-value &= 5.02\times 10^{-6} \end{aligned} \]

Generate the visualization of the interaction effect

VC.means <- spplot |> 
  group_by(Var, Conc) |> 
  dplyr::summarize(Mean = mean(Yield),
            SD = sd(Yield),
            n = length(Yield)) |> 
  dplyr::mutate(SE = SD/sqrt(n))
VC.means

## # A tibble: 12 × 6
## # Groups:   Var [3]
##    Var   Conc   Mean    SD     n    SE
##    <fct> <fct> <dbl> <dbl> <int> <dbl>
##  1 1     1      35.9  6.27     5  2.80
##  2 1     2      48.7  7.20     5  3.22
##  3 1     3      44.2  7.10     5  3.18
##  4 1     4      36.6  6.46     5  2.89
##  5 2     1      50.3 12.7      5  5.66
##  6 2     2      54.8  9.92     5  4.44
##  7 2     3      52.5 10.1      5  4.52
##  8 2     4      53.8  7.45     5  3.33
##  9 3     1      60.8  9.57     5  4.28
## 10 3     2      62.8  5.67     5  2.54
## 11 3     3      57.0  9.78     5  4.37
## 12 3     4      60.4 10.0      5  4.47

#Generate summary statistics for all treatment combinations
VC.means1 <- emmeans(spplot.out, specs=c("Var", "Conc"))
VC.means1#Print the table of means

##  Var Conc emmean   SE df lower.CL upper.CL
##  1   1      35.9 1.68 36     32.5     39.3
##  2   1      50.3 1.68 36     46.9     53.7
##  3   1      60.8 1.68 36     57.4     64.2
##  1   2      48.7 1.68 36     45.3     52.1
##  2   2      54.8 1.68 36     51.4     58.2
##  3   2      62.8 1.68 36     59.4     66.2
##  1   3      44.2 1.68 36     40.8     47.6
##  2   3      52.5 1.68 36     49.1     55.9
##  3   3      57.0 1.68 36     53.6     60.4
##  1   4      36.6 1.68 36     33.2     40.0
##  2   4      53.8 1.68 36     50.4     57.2
##  3   4      60.4 1.68 36     57.0     63.8
## 
## Results are averaged over the levels of: Blk 
## Confidence level used: 0.95

#Assign the letters to the treatment means
VC.means.cld <- cld(VC.means1,
                   Letters=letters,
                   decreasing = TRUE) |> 
  dplyr::arrange(Var, Conc) |> 
  dplyr::mutate(SE1 = VC.means$SE)

VC.means.cld#Print the table of means

##    Var Conc emmean       SE df lower.CL upper.CL   .group      SE1
## 1    1    1  35.86 1.675583 36 32.46176 39.25824        g 2.802963
## 2    1    2  48.74 1.675583 36 45.34176 52.13824     de   3.217856
## 3    1    3  44.16 1.675583 36 40.76176 47.55824      ef  3.176256
## 4    1    4  36.64 1.675583 36 33.24176 40.03824       fg 2.890778
## 5    2    1  50.34 1.675583 36 46.94176 53.73824     de   5.659205
## 6    2    2  54.84 1.675583 36 51.44176 58.23824  abcd    4.435380
## 7    2    3  52.54 1.675583 36 49.14176 55.93824    cd    4.520133
## 8    2    4  53.80 1.675583 36 50.40176 57.19824   bcd    3.331516
## 9    3    1  60.84 1.675583 36 57.44176 64.23824  ab      4.280958
## 10   3    2  62.78 1.675583 36 59.38176 66.17824  a       2.537597
## 11   3    3  56.96 1.675583 36 53.56176 60.35824  abcd    4.373168
## 12   3    4  60.44 1.675583 36 57.04176 63.83824  abc     4.472762

#Comparing the levels of Concentration for each level of Variety
ggplot(VC.means.cld, aes(x = Var, 
                         y = emmean,
                         fill = Conc, 
                         label = .group)) + 
  geom_col(position = "dodge") +
  geom_errorbar(aes(ymin = emmean - SE1, 
                    ymax = emmean + SE1),
                width = 0.2, 
                size = 0.7,
                position = position_dodge(0.9)) +
  geom_text(vjust=-3.2, 
            position = position_dodge(0.9),
            size = 2.5) +
  labs(x= "Variety", y= "Mean Yield") +
  lims(y = c(0, 75)) +
  scale_fill_brewer(palette = "Greens") +
  theme_classic()

#Comparing the levels of Variety for each level of Concentration
ggplot(VC.means.cld, aes(x = Conc, 
                         y = emmean,
                         fill = Var, 
                         label = .group)) + 
  geom_col(position = "dodge") +
  geom_errorbar(aes(ymin = emmean - SE1, 
                    ymax = emmean + SE1),
                width = 0.2, 
                size = 0.7,
                position = position_dodge(0.9)) +
  geom_text(vjust=-3.5, 
            position = position_dodge(0.9),
            size = 2.5) +
  labs(x= "Concentration", y= "Mean Yield") +
  lims(y = c(0, 75)) +
  scale_fill_brewer(palette = "Greens") +
  theme_classic()

Analysis of Experiments with Repeated Measurements

We shall use the agrondata in this demonstration since it contains repeated measurement on plant height. The data on plant height is in wide format, hence, it has to be converted to long format

head(agrondata)

## # A tibble: 6 × 12
##   treatment block   DTE  HT15  HT30  HT45  HT60   DTF   DTM HERBAGE Trt   Blk  
##       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl> <fct> <fct>
## 1         1     1     4  56.2 102.   152.  210.    44    60  158000 1     1    
## 2         1     2     4  53.9  92.9  140   194.    44    60  192000 1     2    
## 3         1     3     4  53.4  90.4  134.  189.    44    60  180000 1     3    
## 4         1     4     4  52.2  89    129.  183.    44    60  155000 1     4    
## 5         1     5     4  54.6 102.   150.  207.    44    60  185000 1     5    
## 6         2     1     7  47.3  73.5  131.  173.    76    90  255000 2     1

#Subset the agrondata to include only height measurements
HT <- agrondata |> 
  select(treatment, block, HT15, HT30, HT45, HT60)
HT

## # A tibble: 15 × 6
##    treatment block  HT15  HT30  HT45  HT60
##        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1         1     1  56.2 102.  152.  210. 
##  2         1     2  53.9  92.9 140   194. 
##  3         1     3  53.4  90.4 134.  189. 
##  4         1     4  52.2  89   129.  183. 
##  5         1     5  54.6 102.  150.  207. 
##  6         2     1  47.3  73.5 131.  173. 
##  7         2     2  44.4  72.4 119.  159. 
##  8         2     3  36.6  70.1 118.  158. 
##  9         2     4  38.2  68.8 119.  148. 
## 10         2     5  46.5  74.8 133.  174. 
## 11         3     1  24.4  38.7  50.4  74.2
## 12         3     2  21.9  36.6  45.9  69.5
## 13         3     3  22.2  37.3  46.7  70  
## 14         3     4  22.3  36    45.6  71.8
## 15         3     5  25.7  39.1  51.4  77.4

HT_Long <- HT  |> 
  gather(key = "Time", value = "HT", HT15, HT30, HT45, HT60) |> 
  convert_as_factor(treatment, block, Time) |> 
  dplyr::rename(Treatment = treatment) |> 
  dplyr::mutate(Days = recode(Time,
                       "HT15" = "15",
                       "HT30" = "30",
                       "HT45" = "45",
                       "HT60" = "60"))

HT_Long

## # A tibble: 60 × 5
##    Treatment block Time     HT Days 
##    <fct>     <fct> <fct> <dbl> <fct>
##  1 1         1     HT15   56.2 15   
##  2 1         2     HT15   53.9 15   
##  3 1         3     HT15   53.4 15   
##  4 1         4     HT15   52.2 15   
##  5 1         5     HT15   54.6 15   
##  6 2         1     HT15   47.3 15   
##  7 2         2     HT15   44.4 15   
##  8 2         3     HT15   36.6 15   
##  9 2         4     HT15   38.2 15   
## 10 2         5     HT15   46.5 15   
## # ℹ 50 more rows

#Testing for presence of outliers
HT_Long |> 
  group_by(Treatment, Days) |> 
  identify_outliers(HT)

## [1] Treatment  Days       block      Time       HT         is.outlier is.extreme
## <0 rows> (or 0-length row.names)

HT_Long |> 
  group_by(Treatment, Days) |> 
  shapiro_test(HT)

## # A tibble: 12 × 5
##    Treatment Days  variable statistic      p
##    <fct>     <fct> <chr>        <dbl>  <dbl>
##  1 1         15    HT           0.989 0.975 
##  2 1         30    HT           0.837 0.157 
##  3 1         45    HT           0.930 0.595 
##  4 1         60    HT           0.911 0.476 
##  5 2         15    HT           0.866 0.252 
##  6 2         30    HT           0.961 0.813 
##  7 2         45    HT           0.762 0.0384
##  8 2         60    HT           0.912 0.478 
##  9 3         15    HT           0.839 0.161 
## 10 3         30    HT           0.929 0.592 
## 11 3         45    HT           0.837 0.156 
## 12 3         60    HT           0.921 0.536

rep.aov <- anova_test(
  data = HT_Long, 
  dv = HT,
  wid = block,
  within = c(Treatment, Days))

#To display the ANOVA table
get_anova_table(rep.aov)

## ANOVA Table (type III tests)
## 
##           Effect  DFn   DFd        F        p p<.05   ges
## 1      Treatment 1.04  4.15  915.983 4.91e-06     * 0.969
## 2           Days 1.02  4.09 1289.421 2.80e-06     * 0.979
## 3 Treatment:Days 6.00 24.00  297.420 2.65e-21     * 0.877

#To display the results of sphericity test
rep.aov$`Mauchly's Test for Sphericity`

##      Effect        W     p p<.05
## 1 Treatment 0.071000 0.019     *
## 2      Days 0.000758 0.003     *

If the assumption of sphericity is meet, we can analyze data from a repeated measures experiment as a split plot experiment in CRD.

head(HT_Long)

## # A tibble: 6 × 5
##   Treatment block Time     HT Days 
##   <fct>     <fct> <fct> <dbl> <fct>
## 1 1         1     HT15   56.2 15   
## 2 1         2     HT15   53.9 15   
## 3 1         3     HT15   53.4 15   
## 4 1         4     HT15   52.2 15   
## 5 1         5     HT15   54.6 15   
## 6 2         1     HT15   47.3 15

aov.rep <- aov(HT ~ Treatment + Treatment:block + Days + Days:Treatment,
               data = HT_Long)

anova(aov.rep)

## Analysis of Variance Table
## 
## Response: HT
##                 Df Sum Sq Mean Sq   F value    Pr(>F)    
## Treatment        2  61993 30996.3 2347.9997 < 2.2e-16 ***
## Days             3  91141 30380.3 2301.3391 < 2.2e-16 ***
## Treatment:block 12   1524   127.0    9.6226 5.516e-08 ***
## Treatment:Days   6  14314  2385.6  180.7149 < 2.2e-16 ***
## Residuals       36    475    13.2                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We need to correct the F ratio and p-value for Treatment

#Extract the Mean Square for Blk:Var and use this as MSE(a)
MSE_a <- anova(aov.rep)["Mean Sq"][3,1]
MS_Trt <- anova(aov.rep)["Mean Sq"][1,1]
print(c(MSE_a, MS_Trt))

## [1]   127.0289 30996.2702

The corrected F ratio and p-value for Treatment is:

\[ \begin{aligned} F_{Treatment} &= \frac{MS_{Treatment}}{MSE(a)} \notag \\ &= 244.01\notag \\ p-value &= 2\times 10^{-8} \end{aligned} \]

Visualization

#Generate summary statistics for all treatment combinations
DT.means <- emmeans(aov.rep, specs=c("Days", "Treatment"))
DT.means#Print the table of means

##  Days Treatment emmean   SE df lower.CL upper.CL
##  15   1           54.1 1.62 36     50.8     57.4
##  30   1           95.2 1.62 36     91.9     98.5
##  45   1          141.1 1.62 36    137.8    144.4
##  60   1          196.3 1.62 36    193.0    199.6
##  15   2           42.6 1.62 36     39.3     45.9
##  30   2           71.9 1.62 36     68.6     75.2
##  45   2          123.9 1.62 36    120.6    127.2
##  60   2          162.6 1.62 36    159.3    165.9
##  15   3           23.3 1.62 36     20.0     26.6
##  30   3           37.5 1.62 36     34.2     40.8
##  45   3           48.0 1.62 36     44.7     51.3
##  60   3           72.6 1.62 36     69.3     75.9
## 
## Results are averaged over the levels of: block 
## Confidence level used: 0.95

#Assign the letters to the treatment means
DT.means.cld <- cld(DT.means,
                   Letters=letters,
                   decreasing = TRUE) |> 
  dplyr::arrange(Treatment, Days)

DT.means.cld#Print the table of means

##  Days Treatment emmean   SE df lower.CL upper.CL .group     
##  15   1           54.1 1.62 36     50.8     57.4        g   
##  30   1           95.2 1.62 36     91.9     98.5      e     
##  45   1          141.1 1.62 36    137.8    144.4    c       
##  60   1          196.3 1.62 36    193.0    199.6  a         
##  15   2           42.6 1.62 36     39.3     45.9         hi 
##  30   2           71.9 1.62 36     68.6     75.2       f    
##  45   2          123.9 1.62 36    120.6    127.2     d      
##  60   2          162.6 1.62 36    159.3    165.9   b        
##  15   3           23.3 1.62 36     20.0     26.6           j
##  30   3           37.5 1.62 36     34.2     40.8          i 
##  45   3           48.0 1.62 36     44.7     51.3        gh  
##  60   3           72.6 1.62 36     69.3     75.9       f    
## 
## Results are averaged over the levels of: block 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 12 estimates 
## significance level used: alpha = 0.05 
## NOTE: If two or more means share the same grouping symbol,
##       then we cannot show them to be different.
##       But we also did not show them to be the same.

HT_Long |> 
  group_by(Days, Treatment) |> 
  dplyr::summarize(Mean = mean(HT),
            SD = sd(HT),
            n = length(HT)) |>
  dplyr::mutate(SE = SD/sqrt(n)) |> 
  ggplot(aes(x = Days, y = Mean, group=Treatment)) +
  geom_line(aes(color = Treatment)) +
  geom_point() +
  geom_errorbar(aes(ymin = Mean - SE, 
                    ymax = Mean + SE,
                    colour = Treatment),
                width = 0.2, 
                size = 1) +
  labs(y = "Mean Height") +
  theme_classic() +
  theme(legend.position="bottom")

Correlation analysis

Loading a data set into R

plant_data <- read_excel("ACIAR Data2.xlsx", sheet = "Plant")
head(plant_data)

## # A tibble: 6 × 7
##   Treatment Replication Variety   ODW TotalN TotalP TotalK
##       <dbl>       <dbl>   <dbl> <dbl>  <dbl>  <dbl>  <dbl>
## 1         1           1       1  47.1   2.74   5.16   2.84
## 2         1           2       1  15.6   2.26   4.09   3.00
## 3         1           3       1  42.1   2.41   3.74   2.81
## 4         1           4       1  27.7   2.56   5.22   2.97
## 5         2           1       1  62.8   2.39   4.19   2.66
## 6         2           2       1  60.3   2.56   4.51   3.04

Correlation coefficient

#Correlation between two variables
plant_data |> 
  select(ODW, TotalN) |> 
  cor(method = "spearman")

##              ODW    TotalN
## ODW    1.0000000 0.4299631
## TotalN 0.4299631 1.0000000

#Correlation between two variables
cor.test(plant_data$ODW, plant_data$TotalN,
         method = "spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  plant_data$ODW and plant_data$TotalN
## S = 16679, p-value = 0.0009422
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.4299631

#Test of significance of the correlation
plant_data |> 
  select(ODW, TotalN) |> 
  cor_test(method = "pearson")

## # A tibble: 1 × 8
##   var1  var2     cor statistic      p conf.low conf.high method 
##   <chr> <chr>  <dbl>     <dbl>  <dbl>    <dbl>     <dbl> <chr>  
## 1 ODW   TotalN  0.33      2.56 0.0134   0.0719     0.544 Pearson

#Correlation between several variables
plant_data |> 
  select(-c(Treatment, Variety, Replication)) |> 
  cor()

##               ODW     TotalN     TotalP     TotalK
## ODW     1.0000000  0.3285493 -0.5482409  0.4746828
## TotalN  0.3285493  1.0000000 -0.6361370  0.7781900
## TotalP -0.5482409 -0.6361370  1.0000000 -0.7443045
## TotalK  0.4746828  0.7781900 -0.7443045  1.0000000

#Correlation between several variables
plant_data |> 
  select(-c(Treatment, Variety, Replication)) |> 
  cor_test()

## # A tibble: 16 × 8
##    var1   var2     cor    statistic        p conf.low conf.high method 
##    <chr>  <chr>  <dbl>        <dbl>    <dbl>    <dbl>     <dbl> <chr>  
##  1 ODW    ODW     1    493147422.   0          1          1     Pearson
##  2 ODW    TotalN  0.33         2.56 1.34e- 2   0.0719     0.544 Pearson
##  3 ODW    TotalP -0.55        -4.82 1.22e- 5  -0.709     -0.333 Pearson
##  4 ODW    TotalK  0.47         3.96 2.19e- 4   0.242      0.656 Pearson
##  5 TotalN ODW     0.33         2.56 1.34e- 2   0.0719     0.544 Pearson
##  6 TotalN TotalN  1          Inf    0          1          1     Pearson
##  7 TotalN TotalP -0.64        -6.06 1.37e- 7  -0.770     -0.448 Pearson
##  8 TotalN TotalK  0.78         9.11 1.69e-12   0.648      0.864 Pearson
##  9 TotalP ODW    -0.55        -4.82 1.22e- 5  -0.709     -0.333 Pearson
## 10 TotalP TotalN -0.64        -6.06 1.37e- 7  -0.770     -0.448 Pearson
## 11 TotalP TotalP  1    348707886.   0          1          1     Pearson
## 12 TotalP TotalK -0.74        -8.19 4.88e-11  -0.842     -0.599 Pearson
## 13 TotalK ODW     0.47         3.96 2.19e- 4   0.242      0.656 Pearson
## 14 TotalK TotalN  0.78         9.11 1.69e-12   0.648      0.864 Pearson
## 15 TotalK TotalP -0.74        -8.19 4.88e-11  -0.842     -0.599 Pearson
## 16 TotalK TotalK  1    348707886.   0          1          1     Pearson

#Correlation between several variables
plant_data |> 
  select(-c(Treatment, Variety, Replication)) |> 
  correlation::correlation(include_factors = TRUE, method = "Pearson")

## # Correlation Matrix (Pearson-method)
## 
## Parameter1 | Parameter2 |     r |         95% CI | t(54) |         p
## --------------------------------------------------------------------
## ODW        |     TotalN |  0.33 | [ 0.07,  0.54] |  2.56 | 0.013*   
## ODW        |     TotalP | -0.55 | [-0.71, -0.33] | -4.82 | < .001***
## ODW        |     TotalK |  0.47 | [ 0.24,  0.66] |  3.96 | < .001***
## TotalN     |     TotalP | -0.64 | [-0.77, -0.45] | -6.06 | < .001***
## TotalN     |     TotalK |  0.78 | [ 0.65,  0.86] |  9.11 | < .001***
## TotalP     |     TotalK | -0.74 | [-0.84, -0.60] | -8.19 | < .001***
## 
## p-value adjustment method: Holm (1979)
## Observations: 56

Visualization

Scatterplot

#from the ggstatplot package
ggscatterstats(
  data = plant_data,
  x = ODW,
  y = TotalN,
  bf.message = FALSE,
  marginal = FALSE # remove histograms
) +
  theme_classic()

plant_data |> 
  select(-c(Treatment, Variety, Replication)) |> 
  ggpairs() #from the ggally package

Correlogram

plant_data |> 
  select(-c(Treatment, Variety, Replication)) |> 
  ggcorr() #from the ggally package

plant_data |> 
  mutate(TRT = as.factor(Treatment),
         VAR = as.factor(Variety)) |>
  ggpairs(columns = 4:7, ggplot2::aes(colour=VAR))

Analysis of RCBD Experiment with Subsamples

An agronomist wishes to determine the efficacy of two fumigants C and S for controlling wire worms. A field is divided into 5 blocks, each containing 3 plots. Each of the three treatments fumigant C, fumigant S, and the control O were randomly assigned to one plot within each block. In each experimental plot, the number of wire worms was counted in each of four sub-samples.

worm <- read_excel("rcbdsub.xlsx", sheet = "rcbdsub")
head(worm)

## # A tibble: 6 × 4
##   TRT     BLOCK SAMPLES Worms
##   <chr>   <dbl>   <dbl> <dbl>
## 1 Control     1       1    12
## 2 Control     1       2    20
## 3 Control     1       3     8
## 4 Control     1       4     8
## 5 F1          1       1     5
## 6 F1          1       2     4

worm$Block <- as.factor(worm$BLOCK)
aov.worm <- aov(Worms ~ TRT + Block + TRT:Block,
            data = worm)
anova(aov.worm)

## Analysis of Variance Table
## 
## Response: Worms
##           Df Sum Sq Mean Sq F value   Pr(>F)    
## TRT        2 293.43 146.717 16.1129 5.28e-06 ***
## Block      4 151.17  37.792  4.1504 0.006033 ** 
## TRT:Block  8 196.23  24.529  2.6939 0.016407 *  
## Residuals 45 409.75   9.106                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

MS(Residuals) serves as MSE while MS(TRT:Block) serves as the MS(SE).
If TRT:Block is significant, then we use it to correct the F values for TRT and Block

\[ F_{TRT} = \frac{146.717}{24.529} = 5.981369 \\ p-value = 0.02579167 \]

\[ F_{Block} = \frac{37.792}{24.529} = 1.540707 \\ p-value = 0.2789973 \]

Post hoc test using HSD.test() function from the agricolae package

df1 <- anova(aov.worm)["Df"][3,1]#Extracts the DF for MSE 
MSerror1 <- anova(aov.worm)["Mean Sq"][3,1]#Extracts the MSE
with(worm, 
     HSD.test(y = Worms,
              trt = TRT,
              DFerror = df1,
              MSerror = MSerror1,
              console = TRUE))

## 
## Study: Worms ~ TRT
## 
## HSD Test for Worms 
## 
## Mean Square Error:  24.52917 
## 
## TRT,  means
## 
##         Worms      std  r       se Min Max  Q25 Q50   Q75
## Control  9.70 5.068998 20 1.107456   4  22 6.75 8.0 12.00
## F1       5.25 2.935715 20 1.107456   0  12 3.00 4.5  7.25
## F2       4.80 2.353050 20 1.107456   1   9 3.00 4.5  6.25
## 
## Alpha: 0.05 ; DF Error: 8 
## Critical Value of Studentized Range: 4.041036 
## 
## Minimun Significant Difference: 4.475269 
## 
## Treatments with the same letter are not significantly different.
## 
##         Worms groups
## Control  9.70      a
## F1       5.25     ab
## F2       4.80      b

Visualizing the results can follow the same procedure as in RCBD without subsamples

Statistical Tools and Methods for Data Analysis

NE Milla, Jr.

2024-10-10

Loading libraries

Analysis of Experiments in RCBD

Importing the data (Make sure to set the appropriate working directory)

ANOVA for RCBD experiment with post hoc

Presenting the results visually

Analysis of Factorial Experiments in CRD

Importing the data

ANOVA Table

Graphical presentation

Analysis of Factorial Experiments in RCBD

Importing the data into R

Factorial ANOVA (RCBD)

Visualizing the results

Analysis of Split-Plot Experiments in RCBD

Importing the data

ANOVA for split plot experiments

Another way to generate ANOVA for split plot experiments

Generate the visualization of the interaction effect

Analysis of Experiments with Repeated Measurements

Visualization

Correlation analysis

Loading a data set into R

Correlation coefficient

Visualization

Scatterplot

Correlogram

Analysis of RCBD Experiment with Subsamples

Post hoc test using HSD.test() function from the agricolae package