1. Draw a plot to show the experiment design in the field

Varieties: A, B, C, D

replicates: 5

arrangement: random

x=rep(1:5,4)
y=rep(1:4,5)
z=sample(rep(c("A", "B", "C","D"), 5))
z
##  [1] "A" "C" "C" "B" "B" "D" "D" "B" "B" "A" "C" "A" "C" "C" "D" "D" "A" "A" "D"
## [20] "B"
design = data.frame(x=factor(x), y=factor(y), z = z)
design
##    x y z
## 1  1 1 A
## 2  2 2 C
## 3  3 3 C
## 4  4 4 B
## 5  5 1 B
## 6  1 2 D
## 7  2 3 D
## 8  3 4 B
## 9  4 1 B
## 10 5 2 A
## 11 1 3 C
## 12 2 4 A
## 13 3 1 C
## 14 4 2 C
## 15 5 3 D
## 16 1 4 D
## 17 2 1 A
## 18 3 2 A
## 19 4 3 D
## 20 5 4 B
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
ggplot(data=design,
       aes(x=x,y=y))+
  geom_tile(fill = y,
            alpha = 0.2,
            col = "blue")+
  geom_text(aes(label = z))+
  theme_void()
The design of experiment

The design of experiment

2. Randomly generate the yield data following normal distribution

for A (mean = 50, sd = 3), B (mean = 45, sd =2),

C (mean = 40, sd = 5), and D (mean = 52, sd = 2).

yield = seq(30, 60,0.5)
y_A = dnorm(yield, mean = 50, sd = 3)
y_B = dnorm (yield,mean = 45, sd =2)
y_C = dnorm (yield,mean = 40, sd =5)
y_D = dnorm (yield,mean = 52, sd =2)

mydata = data.frame(yield,y_A,y_B,y_C, y_D)
ggplot(data = mydata,
       aes(x=yield, y = y_A))+
  geom_line(col = "green")+
  geom_vline(xintercept = 50,col = "green")+
  geom_line(aes(y=y_B),
            col="red")+
  geom_vline(xintercept=45, col = "red")+
  geom_line(aes(y=y_C),
            col="blue")+
  geom_vline(xintercept=40, col = "blue")+
  geom_line(aes(y=y_D),
            col="purple")+
  geom_vline(xintercept=52, col = "purple")

Yield_A = rnorm(5, mean = 50, sd =3)
Yield_B = rnorm(5, mean = 45, sd = 2)
Yield_C = rnorm(5, mean = 40, sd =5)
Yield_D = rnorm(5, mean = 52, sd = 2)

Yield = c(Yield_A, Yield_B,Yield_C, Yield_D)
Yield
##  [1] 50.27897 46.90059 48.83098 51.68239 47.10762 44.99582 48.88921 45.49101
##  [9] 45.66048 40.57440 33.84113 37.03294 43.26790 33.89243 46.57814 50.83271
## [17] 48.88642 49.35690 48.53048 54.94510
Variety = rep(c("A", "B", "C", "D"), each = 5)
Variety
##  [1] "A" "A" "A" "A" "A" "B" "B" "B" "B" "B" "C" "C" "C" "C" "C" "D" "D" "D" "D"
## [20] "D"
Yield_data = data.frame(Yield,Variety)
Yield_data
##       Yield Variety
## 1  50.27897       A
## 2  46.90059       A
## 3  48.83098       A
## 4  51.68239       A
## 5  47.10762       A
## 6  44.99582       B
## 7  48.88921       B
## 8  45.49101       B
## 9  45.66048       B
## 10 40.57440       B
## 11 33.84113       C
## 12 37.03294       C
## 13 43.26790       C
## 14 33.89243       C
## 15 46.57814       C
## 16 50.83271       D
## 17 48.88642       D
## 18 49.35690       D
## 19 48.53048       D
## 20 54.94510       D
summary(Yield_data)
##      Yield         Variety         
##  Min.   :33.84   Length:20         
##  1st Qu.:44.56   Class :character  
##  Median :47.00   Mode  :character  
##  Mean   :45.88                     
##  3rd Qu.:49.01                     
##  Max.   :54.95
mean = with(Yield_data, tapply(Yield, Variety,mean))
mean
##        A        B        C        D 
## 48.96011 45.12218 38.92251 50.51032
sd = with(Yield_data, tapply(Yield, Variety,sd))
sd
##        A        B        C        D 
## 2.051841 2.971303 5.748556 2.629563

3. Test whether there are significant differences

on yield among the 4 varieties and draw a plot to

show the results as cool as you can

res_aov = aov(Yield~Variety, data = Yield_data)
summary(res_aov)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Variety      3  399.5  133.18   10.05 0.000579 ***
## Residuals   16  212.0   13.25                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res_aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Yield ~ Variety, data = Yield_data)
## 
## $Variety
##           diff        lwr        upr     p adj
## B-A  -3.837925 -10.424437  2.7485866 0.3718855
## C-A -10.037601 -16.624113 -3.4510891 0.0024633
## D-A   1.550212  -5.036300  8.1367242 0.9056230
## C-B  -6.199676 -12.786188  0.3868362 0.0687462
## D-B   5.388138  -1.198374 11.9746495 0.1301282
## D-C  11.587813   5.001301 18.1743252 0.0006380
plot(TukeyHSD(res_aov))

ggplot(Yield_data,
       aes(x=Variety,y=Yield), show.legend = T)+
  geom_boxplot(aes(fill = Variety))+
  scale_fill_manual (values = cm.colors(4))+
  labs (title = "The differences among varieties") +
  theme(plot.title = element_text(face = "italic", color = "red", size=14))+
  xlab ("Varieties")+
  ylab ("Yield (Ton)")+
  theme(axis.title = element_text(face = "bold", color = "blue", size=13))+
  scale_x_discrete(labels = c("Type A", "Type B", "Type C", "Type D")) +
  theme(axis.text = element_text(face = "bold",
                                 color = "Black", size = 12,
                                 hjust = 1))+
  geom_segment(x=3,
               y=58,
               xend = 3,
               yend = 59)+
  geom_segment(x=4,
               y=59,
               xend = 4,
               yend = 60)+
  geom_segment(x=3,
               y=60,
               xend = 4,
               yend = 60)+
  annotate(geom = "text",
           x=3.5,
           y=60,
           label = "*")+
  annotate(geom = "text",
           x=2,
           y=57,
           label = "P=5.1e-07")+
  ylim(35,60.2)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).