R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Installing necessary packages

packages <- c(“ggplot2”, “dplyr”, “ggbeeswarm”, “viridisLite”, “viridis”, “RColorBrewer”, “cowplot”, “magick”, “ggpubr”, “readxl”, “knitr”)

new_packages <- packages[!(packages %in% installed.packages()[,“Package”])] if(length(new_packages)) install.packages(new_packages)

Loading libraries

library(readxl) library(ggplot2) library(dplyr) library(ggbeeswarm) library(viridisLite) library(viridis) library(RColorBrewer) library(ggpubr) library(cowplot) library(magick) library (knitr)

Directory setup, Loading and inspecting data

library(readxl)
library(knitr)
#.Rmd document should be enclosed by three backwards apostrophes ``` (grave accents!). 
# These are known as code chunks
# Set working directory to your data folder 
# setwd('/path/to/your/data/folder')  
setwd ('C:/Users/huma0/Desktop/Data')
# Load all the data files
test <- read_excel("test.xlsx")
## New names:
## * `` -> `...1`
kable(test, digits = 2, caption = "Data from test.xlsx")
Data from test.xlsx
…1 Treatment Yield_bu_per_acre Severity
1 Control 52.0 7.0
2 Control 53.2 8.0
3 Control 51.8 7.0
4 Control 50.0 7.0
5 Control 51.0 9.0
6 Control 52.7 8.5
7 Fungicide_A 52.0 5.1
8 Fungicide_A 55.0 5.2
9 Fungicide_A 56.6 5.0
10 Fungicide_A 56.2 5.0
11 Fungicide_A 53.1 5.2
12 Fungicide_A 52.4 5.1
13 Fungicide_B 59.2 3.5
14 Fungicide_B 55.0 3.9
15 Fungicide_B 59.1 3.4
16 Fungicide_B 53.2 4.0
17 Fungicide_B 52.6 5.1
18 Fungicide_B 58.0 3.7
19 Fungicide_C 61.0 3.1
20 Fungicide_C 60.0 3.2
21 Fungicide_C 58.0 3.4
22 Fungicide_C 59.0 3.4
23 Fungicide_C 57.9 3.5
24 Fungicide_C 59.0 3.3
test2 <- read_excel("test2.xlsx")
## New names:
## * `` -> `...1`
kable(test2, digits = 2, caption = "Data from test2.xlsx")
Data from test2.xlsx
…1 Treatment Yield_bu_per_acre Severity
1 Control 47.0 8.0
2 Control 48.0 9.0
3 Control 47.4 8.5
4 Control 47.4 8.0
5 Control 49.0 9.0
6 Control 50.0 7.0
7 Fungicide_A 52.4 4.6
8 Fungicide_A 52.6 4.8
9 Fungicide_A 53.0 4.9
10 Fungicide_A 53.0 5.0
11 Fungicide_A 53.1 4.9
12 Fungicide_A 54.0 4.6
13 Fungicide_B 55.0 3.5
14 Fungicide_B 55.3 3.6
15 Fungicide_B 55.4 3.4
16 Fungicide_B 53.2 3.8
17 Fungicide_B 53.3 3.9
18 Fungicide_B 55.0 3.7
19 Fungicide_C 63.0 2.0
20 Fungicide_C 64.0 2.0
21 Fungicide_C 66.6 2.3
22 Fungicide_C 67.0 2.5
23 Fungicide_C 68.0 2.7
24 Fungicide_C 65.0 2.3
test3 <- read_excel("test3.xlsx")
## New names:
## * `` -> `...1`
kable(test3, digits = 2, caption = "Data from test3.xlsx")
Data from test3.xlsx
…1 Treatment Yield_bu_per_acre Severity
1 Control 47.0 8.0
2 Control 48.0 9.0
3 Control 47.4 8.5
4 Control 47.4 8.0
5 Control 49.0 9.0
6 Control 50.0 7.0
7 Fungicide_A 47.0 4.6
8 Fungicide_A 48.0 4.8
9 Fungicide_A 47.4 4.9
10 Fungicide_A 47.4 5.0
11 Fungicide_A 49.0 4.9
12 Fungicide_A 50.0 4.6
13 Fungicide_B 47.0 3.5
14 Fungicide_B 48.0 3.6
15 Fungicide_B 47.4 3.4
16 Fungicide_B 47.4 3.8
17 Fungicide_B 49.0 3.9
18 Fungicide_B 50.0 3.7
19 Fungicide_C 55.0 2.0
20 Fungicide_C 55.3 2.0
21 Fungicide_C 55.4 2.3
22 Fungicide_C 53.2 2.5
23 Fungicide_C 53.3 2.7
24 Fungicide_C 55.0 2.3
test4 <- read_excel("test4.xlsx")
## New names:
## * `` -> `...1`
kable(test4, digits = 2, caption = "Data from test4.xlsx")
Data from test4.xlsx
…1 Treatment Yield_bu_per_acre Severity
1 Control 47.0 8.0
2 Control 48.0 9.0
3 Control 47.4 8.5
4 Control 47.4 8.0
5 Control 49.0 9.0
6 Control 50.0 7.0
7 Fungicide_A 47.0 4.6
8 Fungicide_A 48.0 4.8
9 Fungicide_A 47.4 4.9
10 Fungicide_A 47.4 5.0
11 Fungicide_A 49.0 4.9
12 Fungicide_A 50.0 4.6
13 Fungicide_B 47.0 3.5
14 Fungicide_B 48.0 3.6
15 Fungicide_B 47.4 3.4
16 Fungicide_B 47.4 3.8
17 Fungicide_B 49.0 3.9
18 Fungicide_B 50.0 3.7
19 Fungicide_C 55.0 3.0
20 Fungicide_C 51.0 4.0
21 Fungicide_C 52.0 4.0
22 Fungicide_C 56.0 3.4
23 Fungicide_C 61.0 3.5
24 Fungicide_C 53.0 3.0
# Assuming 'test' data is already loaded
# For demonstration, simulate some data
set.seed(123)
test <- data.frame(
  Treatment = rep(c("Control", "Fungicide_A", "Fungicide_B", "Fungicide_C"), each = 20),
  Yield_bu_per_acre = rnorm(80, mean=50, sd=10),
  Severity = rnorm(80, mean=5, sd=2)
)

Plotting a desired plot type (adding layer)

library(ggplot2)
# ` r assigning data`
yield.plot<- ggplot(data=test,
                    mapping= aes(x= Treatment,
                                 y=Yield_bu_per_acre))
yield.plot # <- assigns operator, i.e. values to variables and store in R memory 

# This line of code initializes a ggplot object named yield.plot using the 
# dataset 'test'. It sets up a basic plot where Treatment is on the x-axis 
# and Yield_bu_per_acre is on the y-axis, preparing for further
# graphical layers to be added.

## ` r assigning a shape layer`
(yield.plot<-yield.plot+
    geom_boxplot())

Basic color, making color useful

# View the names of the columns in the dataframe
names(test)
## [1] "Treatment"         "Yield_bu_per_acre" "Severity"
(yield.plot <- yield.plot + 
    geom_boxplot(fill = "purple"))

# pay attention - Treatment assigned to an aesthetics (aes) in R ggplot2
# aes - Aesthetics describe how data should be mapped to visual attributes l
# like color, shape, size, and position
# Frequently used ases = fill, color, size, alpha, x , y, etc.

# Note- I removed the code chunk with error on aes because it hinders Knitting

(yield.plot <- yield.plot + 
    geom_boxplot(aes(fill = Treatment)))

yield.plot

Polishing figure, part one -Shape and caption

# Adding axis title
(yield.plot <- yield.plot + xlab("Treatment Applied"))

(yield.plot <- yield.plot + ylab("Yield (bu/acre)"))

# coming back to business and adding axis titles


(yield.plot <- yield.plot + xlab("Treatment Applied"))

(yield.plot <- yield.plot + ylab("Yield (bu/acre)"))

# updating figure

(yield.plot <- yield.plot + 
    theme_bw(base_size = 14))

# updating figure

(yield.plot <- yield.plot + 
    theme(aspect.ratio = 1.25))

# Pay attentions for repetitive information : remove if not needed 
# Information in the legend is repetitive and not needed.


(yield.plot <- yield.plot + 
    guides(fill = FALSE))
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Polishing figure, part two - Color

# Adding colors

# more visually appealing virdis options:magma, inferno, plasma,cividis

yield.ploty <- ggplot(test, 
                      aes(x = Treatment, 
                          y = Yield_bu_per_acre, 
                          fill = Treatment)) +
  geom_boxplot() +
  scale_fill_viridis_d(option = "cividis", direction = -1) +  # Adjust the palette and direction
  ggtitle("Effect of Fungicides on Yield") +
  xlab("Treatment Applied") +
  ylab("Yield (bu/acre)") +
  theme_bw(base_size = 14) +
  theme(aspect.ratio = 1.25,
        panel.grid = element_blank(),
        legend.position = "none")

yield.ploty

# library(RColorBrewer) for glossy finish

library(RColorBrewer)
yield.plotgloss <- ggplot(test, 
                      aes(x = Treatment, 
                          y = Yield_bu_per_acre, 
                          fill = Treatment)) +
  geom_boxplot() +
  scale_fill_brewer(palette = "Set1") +
  ggtitle("Effect of Fungicides on Yield") +
  xlab("Treatment Applied") +
  ylab("Yield (bu/acre)") +
  theme_bw(base_size = 14) +
  theme(aspect.ratio = 1.25,
        panel.grid = element_blank(),
        legend.position = "none") # Optionally hide the legend

yield.plotgloss

Data visulaization, Severity

severity.plot <- ggplot(test, 
                        aes(x = Treatment, y = Severity)) +
  geom_boxplot() +
  ggtitle("Effect of Fungicides on Disease Severity") +
  theme_bw(base_size = 14) +
  theme(aspect.ratio = 1.5,
        panel.grid = element_blank()) +
  xlab("Treatment Applied") +
  ylab("Disease Severity")

severity.plot #why it is black and white?? #see that theme is 'bw'

# adjusting text (format text elements)

(severity.plot <- severity.plot + 
    theme(plot.title = element_text(hjust = 0.5)))

#Lets try this to fill inferno color

# To apply a color fill based on the Treatment to your boxplot, 
# you need to add fill = Treatment inside the aes() function.
severity.plot <- ggplot(test, 
                        aes(x = Treatment, y = Severity, fill=Treatment)) +
  geom_boxplot() +
  scale_fill_viridis_d(option = "inferno", direction = -1) +  # Adjust the palette and direction
  ggtitle("Effect of Fungicides on Disease Severity") +
  xlab("Treatment Applied") +
  ylab("Disease Severity") +
  theme_bw(base_size = 14) +
  theme(aspect.ratio = 1.25,
        panel.grid = element_blank(),
        legend.position = "none")

severity.plot

Data wrapping ‘Yield and Severity’, annotation cleaning

library(cowplot)
combined_plot <- plot_grid(yield.plot, severity.plot, 
                           labels = c('A', 'B'), label_size = 10, align = 'v')
combined_plot

ggsave("combined_plot.png", combined_plot, width = 20, height = 15, dpi = 300)

# see that it has issue with x axis annotation

yield.plot2 <- yield.plot +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
        axis.text.y = element_text(face = "bold"))

severity.plot2 <- severity.plot +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
        axis.text.y = element_text(face = "bold"))



# Combine the plots with adjusted x-axis labels and bold text
combined_plot2 <- plot_grid(yield.plot2, severity.plot2, 
                           labels = c('A', 'B'), label_size = 10, align = 'v')
combined_plot2

# more fun
#######
#https://bioinformatics.ccr.cancer.gov/docs/data-visualization-with-r/Lesson6_V2/#:~:text=An%20extremely%20useful%20feature%20of,package%20that%20creates%20grid%20grobs.
##################################################################################

# much cleaner way to write a code
combined_plot2 <- plot_grid(
  yield.plot2, severity.plot2,
  labels = c('A', 'B'),
  label_size = 10,
  align = 'v' # align vertically
)
combined_plot2

# saving the file locally

ggsave("combined_plot2.png", combined_plot2, width = 20, height = 15, dpi = 300)

Statistical Intergration, Severity and Yield Plot

library (ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
## 
##     get_legend
(severity.plot2 <- severity.plot + 
   stat_compare_means(method = "anova", 
                      label.y = 6.5)) #p-value is low, thus data is significant

# however its over all picture we don't know about the pairwise comparisons
# ANOVA gives, bbig picture unless you do a post hoc test.

#likewise for yield data
(yield.plotu2 <- yield.plot2 + stat_compare_means(method = "anova", 
                                                  label.y = 63)) #oops axis messed up (if)

#note label.y is the place where anova will show up, if its wrong it will not be
#visible. Make sure to check as adjust according to y axis
#lets correct y axis

yield.plotu2 <- yield.plot2 +
  stat_compare_means(method = "anova", 
                     label.y = 63) +
  ylim(48, 65) # Set the limits of the y-axis from 48 to 65 if you like

yield.plotu2
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_compare_means()`).

Satistical check, alternate method

# a longer way to do the same thing -ANOVA statistics

# dataframe `fungicide` with the variables `Treatment` and `Yield_bu_per_acre`
# Perform ANOVA
anova_result <- aov(Yield_bu_per_acre ~ Treatment, data = test)
anova_summary <- summary(anova_result)

# Extract the p-value
p_value <- anova_summary[[1]]$"Pr(>F)"[1]

# Create the plot
yield.plotu <- ggplot(test, aes(x = Treatment, y = Yield_bu_per_acre)) +
  geom_boxplot() +
  ylim(48, 65) +
  theme_bw(base_size = 14) +
  theme(aspect.ratio = 1.25, panel.grid = element_blank())

# If the p-value is significant, add it to the plot
if (p_value < 0.05) {
  yield.plotu <- yield.plotu + 
    annotate("text", x = 1, y = 65, label = paste("ANOVA p-value:", format(p_value, digits = 2)), size = 4)
}

# Print the plot
yield.plotu
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).

Pairwise Staticstics after ANOVA

# pairwise comparisons to evaluate individual effects 

yield.plotu2 <- yield.plot2 +
  stat_compare_means(method = "anova", 
                     label.y = 63) +
  ylim(48, 65) # Set the limits of the y-axis from 48 to 65

yield.plotu2
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_compare_means()`).

#lets make a pairwise comparisons
pairwise <- list(c('Control','Fungicide_A'),
                 c('Control','Fungicide_B'),
                 c('Control','Fungicide_C'),
                 c('Fungicide_A','Fungicide_B'),
                 c('Fungicide_A','Fungicide_C'),
                 c('Fungicide_B','Fungicide_C'))

(yield.plotu3 <- yield.plotu2 +
    stat_compare_means(comparisons = pairwise,
                       label = "p.signif",
                       method = 't.test'))
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_compare_means()`).
## Warning: Removed 42 rows containing non-finite values (`stat_signif()`).
## Warning: Removed 18 rows containing missing values (`geom_signif()`).

yield.plotu3
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_compare_means()`).
## Warning: Removed 42 rows containing non-finite values (`stat_signif()`).
## Warning: Removed 18 rows containing missing values (`geom_signif()`).

##The Grand Code- Compiled for all sites

# Combining data
# finalizing the whole data for fungicide vs yield for two locations


# Brookings data


yield.plotu <- ggplot(test, 
                      aes(x = Treatment, 
                          y = Yield_bu_per_acre, 
                          fill = Treatment)) +
  geom_boxplot() +
  scale_fill_brewer(palette = "Set1") +
  ggtitle("Effect of Fungicides on Yield- Brookings") +
  xlab("Treatment Applied") +
  ylab("Yield (bu/acre)") +
  theme_bw(base_size = 14) +
  theme(aspect.ratio = 1.25,
        panel.grid = element_blank(),
        legend.position = "none") # Optionally hide the legend

yield.plotu <- yield.plotu +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
        axis.text.y = element_text(face = "bold"))

yield.plotu <- yield.plotu +
  stat_compare_means(method = "anova", 
                     label.y = 69)

pairwise <- list(c('Control','Fungicide_A'),
                 c('Control','Fungicide_B'),
                 c('Control','Fungicide_C'),
                 c('Fungicide_A','Fungicide_B'),
                 c('Fungicide_A','Fungicide_C'),
                 c('Fungicide_B','Fungicide_C'))

(yield.plotu <- yield.plotu +
    stat_compare_means(comparisons = pairwise,
                       label = "p.signif",
                       method = 't.test'))

yield.plotu

# Hand county data, just need to change input and output name in code


yield.plots <- ggplot(test2, 
                      aes(x = Treatment, 
                          y = Yield_bu_per_acre, 
                          fill = Treatment)) +
  geom_boxplot() +
  scale_fill_brewer(palette = "Set1") +
  ggtitle("Effect of Fungicides on Yield-Hand") +
  xlab("Treatment Applied") +
  ylab("Yield (bu/acre)") +
  theme_bw(base_size = 14) +
  theme(aspect.ratio = 1.25,
        panel.grid = element_blank(),
        legend.position = "none") # Optionally hide the legend

yield.plots <- yield.plots +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
        axis.text.y = element_text(face = "bold"))
yield.plots

yield.plots <- yield.plots +
  stat_compare_means(method = "anova", 
                     label.y = 82)

pairwise2 <- list(c('Control','Fungicide_A'),
                  c('Control','Fungicide_B'),
                  c('Control','Fungicide_C'),
                  c('Fungicide_A','Fungicide_B'),
                  c('Fungicide_A','Fungicide_C'),
                  c('Fungicide_B','Fungicide_C'))

(yield.plots <- yield.plots +
    stat_compare_means(comparisons = pairwise2,
                       label = "p.signif",
                       method = 't.test'))

yield.plots

# check comparison among two:

combined_plottest <- plot_grid(yield.plotu, yield.plots, 
                            labels = c('A', 'B'), label_size = 10, align = 'v')
combined_plottest

# Brown county data


yield.plots2 <- ggplot(test3, 
                       aes(x = Treatment, 
                           y = Yield_bu_per_acre, 
                           fill = Treatment)) +
  geom_boxplot() +
  scale_fill_brewer(palette = "Set1") +
  ggtitle("Effect of Fungicides on Yield- Brown") +
  xlab("Treatment Applied") +
  ylab("Yield (bu/acre)") +
  theme_bw(base_size = 14) +
  theme(aspect.ratio = 1.25,
        panel.grid = element_blank(),
        legend.position = "none") # Optionally hide the legend

yield.plots2 <- yield.plots2 +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
        axis.text.y = element_text(face = "bold"))
yield.plots2

yield.plots2 <- yield.plots2 +
  stat_compare_means(method = "anova", 
                     label.y = 60)

pairwise3 <- list(c('Control','Fungicide_A'),
                  c('Control','Fungicide_B'),
                  c('Control','Fungicide_C'),
                  c('Fungicide_A','Fungicide_B'),
                  c('Fungicide_A','Fungicide_C'),
                  c('Fungicide_B','Fungicide_C'))

(yield.plots2 <- yield.plots2 +
    stat_compare_means(comparisons = pairwise3,
                       label = "p.signif",
                       method = 't.test'))

yield.plots2

combined_plotFinal <- plot_grid(
  yield.plotu, yield.plots, yield.plots2,
  labels = c('A', 'B', 'C'),
  label_size = 12,
  align = 'v' # align vertically
  
)


# SDSU data


yield.plots3 <- ggplot(test4, 
                       aes(x = Treatment, 
                           y = Yield_bu_per_acre, 
                           fill = Treatment)) +
  geom_boxplot() +
  scale_fill_brewer(palette = "Set1") +
  ggtitle("Effect of Fungicides on Yield- SDSU") +
  xlab("Treatment Applied") +
  ylab("Yield (bu/acre)") +
  theme_bw(base_size = 14) +
  theme(aspect.ratio = 1.25,
        panel.grid = element_blank(),
        legend.position = "none") # Optionally hide the legend

yield.plots3 <- yield.plots3 +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
        axis.text.y = element_text(face = "bold"))
yield.plots3

yield.plots3 <- yield.plots3 +
  stat_compare_means(method = "anova", 
                     label.y = 60)

pairwise4 <- list(c('Control','Fungicide_A'),
                  c('Control','Fungicide_B'),
                  c('Control','Fungicide_C'),
                  c('Fungicide_A','Fungicide_B'),
                  c('Fungicide_A','Fungicide_C'),
                  c('Fungicide_B','Fungicide_C'))

(yield.plots3 <- yield.plots3 +
    stat_compare_means(comparisons = pairwise4,
                       label = "p.signif",
                       method = 't.test'))

yield.plots3

# check comparison among two:

combined_plottest2 <- plot_grid(yield.plots2, yield.plots3, 
                               labels = c('A', 'B'), label_size = 10, align = 'v')
combined_plottest2

# check comparison among three:

combined_plottest3 <- plot_grid(yield.plotu, yield.plots2, yield.plots3, 
                               labels = c('A', 'B', 'C'), label_size = 10, align = 'v')
combined_plottest3

# check comparison among ALL:

combined_plotFinal <- plot_grid(
  yield.plotu, yield.plots, yield.plots2,yield.plots3,
  labels = c('A', 'B', 'C'),
  label_size = 12,
  align = 'v' # align vertically
  
)


combined_plotFinal 

# Display the combined plot
print(combined_plotFinal)

ggsave("combined_plotFinal.png", combined_plotFinal, width = 20, height = 15, dpi = 300)

What Next?

# see the issue with variable x and y axis, and a bit more____
# Who will fix it ??