This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
packages <- c(“ggplot2”, “dplyr”, “ggbeeswarm”, “viridisLite”, “viridis”, “RColorBrewer”, “cowplot”, “magick”, “ggpubr”, “readxl”, “knitr”)
new_packages <- packages[!(packages %in% installed.packages()[,“Package”])] if(length(new_packages)) install.packages(new_packages)
library(readxl) library(ggplot2) library(dplyr) library(ggbeeswarm) library(viridisLite) library(viridis) library(RColorBrewer) library(ggpubr) library(cowplot) library(magick) library (knitr)
library(readxl)
library(knitr)
#.Rmd document should be enclosed by three backwards apostrophes ``` (grave accents!).
# These are known as code chunks
# Set working directory to your data folder
# setwd('/path/to/your/data/folder')
setwd ('C:/Users/huma0/Desktop/Data')
# Load all the data files
test <- read_excel("test.xlsx")
## New names:
## * `` -> `...1`
kable(test, digits = 2, caption = "Data from test.xlsx")
| …1 | Treatment | Yield_bu_per_acre | Severity |
|---|---|---|---|
| 1 | Control | 52.0 | 7.0 |
| 2 | Control | 53.2 | 8.0 |
| 3 | Control | 51.8 | 7.0 |
| 4 | Control | 50.0 | 7.0 |
| 5 | Control | 51.0 | 9.0 |
| 6 | Control | 52.7 | 8.5 |
| 7 | Fungicide_A | 52.0 | 5.1 |
| 8 | Fungicide_A | 55.0 | 5.2 |
| 9 | Fungicide_A | 56.6 | 5.0 |
| 10 | Fungicide_A | 56.2 | 5.0 |
| 11 | Fungicide_A | 53.1 | 5.2 |
| 12 | Fungicide_A | 52.4 | 5.1 |
| 13 | Fungicide_B | 59.2 | 3.5 |
| 14 | Fungicide_B | 55.0 | 3.9 |
| 15 | Fungicide_B | 59.1 | 3.4 |
| 16 | Fungicide_B | 53.2 | 4.0 |
| 17 | Fungicide_B | 52.6 | 5.1 |
| 18 | Fungicide_B | 58.0 | 3.7 |
| 19 | Fungicide_C | 61.0 | 3.1 |
| 20 | Fungicide_C | 60.0 | 3.2 |
| 21 | Fungicide_C | 58.0 | 3.4 |
| 22 | Fungicide_C | 59.0 | 3.4 |
| 23 | Fungicide_C | 57.9 | 3.5 |
| 24 | Fungicide_C | 59.0 | 3.3 |
test2 <- read_excel("test2.xlsx")
## New names:
## * `` -> `...1`
kable(test2, digits = 2, caption = "Data from test2.xlsx")
| …1 | Treatment | Yield_bu_per_acre | Severity |
|---|---|---|---|
| 1 | Control | 47.0 | 8.0 |
| 2 | Control | 48.0 | 9.0 |
| 3 | Control | 47.4 | 8.5 |
| 4 | Control | 47.4 | 8.0 |
| 5 | Control | 49.0 | 9.0 |
| 6 | Control | 50.0 | 7.0 |
| 7 | Fungicide_A | 52.4 | 4.6 |
| 8 | Fungicide_A | 52.6 | 4.8 |
| 9 | Fungicide_A | 53.0 | 4.9 |
| 10 | Fungicide_A | 53.0 | 5.0 |
| 11 | Fungicide_A | 53.1 | 4.9 |
| 12 | Fungicide_A | 54.0 | 4.6 |
| 13 | Fungicide_B | 55.0 | 3.5 |
| 14 | Fungicide_B | 55.3 | 3.6 |
| 15 | Fungicide_B | 55.4 | 3.4 |
| 16 | Fungicide_B | 53.2 | 3.8 |
| 17 | Fungicide_B | 53.3 | 3.9 |
| 18 | Fungicide_B | 55.0 | 3.7 |
| 19 | Fungicide_C | 63.0 | 2.0 |
| 20 | Fungicide_C | 64.0 | 2.0 |
| 21 | Fungicide_C | 66.6 | 2.3 |
| 22 | Fungicide_C | 67.0 | 2.5 |
| 23 | Fungicide_C | 68.0 | 2.7 |
| 24 | Fungicide_C | 65.0 | 2.3 |
test3 <- read_excel("test3.xlsx")
## New names:
## * `` -> `...1`
kable(test3, digits = 2, caption = "Data from test3.xlsx")
| …1 | Treatment | Yield_bu_per_acre | Severity |
|---|---|---|---|
| 1 | Control | 47.0 | 8.0 |
| 2 | Control | 48.0 | 9.0 |
| 3 | Control | 47.4 | 8.5 |
| 4 | Control | 47.4 | 8.0 |
| 5 | Control | 49.0 | 9.0 |
| 6 | Control | 50.0 | 7.0 |
| 7 | Fungicide_A | 47.0 | 4.6 |
| 8 | Fungicide_A | 48.0 | 4.8 |
| 9 | Fungicide_A | 47.4 | 4.9 |
| 10 | Fungicide_A | 47.4 | 5.0 |
| 11 | Fungicide_A | 49.0 | 4.9 |
| 12 | Fungicide_A | 50.0 | 4.6 |
| 13 | Fungicide_B | 47.0 | 3.5 |
| 14 | Fungicide_B | 48.0 | 3.6 |
| 15 | Fungicide_B | 47.4 | 3.4 |
| 16 | Fungicide_B | 47.4 | 3.8 |
| 17 | Fungicide_B | 49.0 | 3.9 |
| 18 | Fungicide_B | 50.0 | 3.7 |
| 19 | Fungicide_C | 55.0 | 2.0 |
| 20 | Fungicide_C | 55.3 | 2.0 |
| 21 | Fungicide_C | 55.4 | 2.3 |
| 22 | Fungicide_C | 53.2 | 2.5 |
| 23 | Fungicide_C | 53.3 | 2.7 |
| 24 | Fungicide_C | 55.0 | 2.3 |
test4 <- read_excel("test4.xlsx")
## New names:
## * `` -> `...1`
kable(test4, digits = 2, caption = "Data from test4.xlsx")
| …1 | Treatment | Yield_bu_per_acre | Severity |
|---|---|---|---|
| 1 | Control | 47.0 | 8.0 |
| 2 | Control | 48.0 | 9.0 |
| 3 | Control | 47.4 | 8.5 |
| 4 | Control | 47.4 | 8.0 |
| 5 | Control | 49.0 | 9.0 |
| 6 | Control | 50.0 | 7.0 |
| 7 | Fungicide_A | 47.0 | 4.6 |
| 8 | Fungicide_A | 48.0 | 4.8 |
| 9 | Fungicide_A | 47.4 | 4.9 |
| 10 | Fungicide_A | 47.4 | 5.0 |
| 11 | Fungicide_A | 49.0 | 4.9 |
| 12 | Fungicide_A | 50.0 | 4.6 |
| 13 | Fungicide_B | 47.0 | 3.5 |
| 14 | Fungicide_B | 48.0 | 3.6 |
| 15 | Fungicide_B | 47.4 | 3.4 |
| 16 | Fungicide_B | 47.4 | 3.8 |
| 17 | Fungicide_B | 49.0 | 3.9 |
| 18 | Fungicide_B | 50.0 | 3.7 |
| 19 | Fungicide_C | 55.0 | 3.0 |
| 20 | Fungicide_C | 51.0 | 4.0 |
| 21 | Fungicide_C | 52.0 | 4.0 |
| 22 | Fungicide_C | 56.0 | 3.4 |
| 23 | Fungicide_C | 61.0 | 3.5 |
| 24 | Fungicide_C | 53.0 | 3.0 |
# Assuming 'test' data is already loaded
# For demonstration, simulate some data
set.seed(123)
test <- data.frame(
Treatment = rep(c("Control", "Fungicide_A", "Fungicide_B", "Fungicide_C"), each = 20),
Yield_bu_per_acre = rnorm(80, mean=50, sd=10),
Severity = rnorm(80, mean=5, sd=2)
)
library(ggplot2)
# ` r assigning data`
yield.plot<- ggplot(data=test,
mapping= aes(x= Treatment,
y=Yield_bu_per_acre))
yield.plot # <- assigns operator, i.e. values to variables and store in R memory
# This line of code initializes a ggplot object named yield.plot using the
# dataset 'test'. It sets up a basic plot where Treatment is on the x-axis
# and Yield_bu_per_acre is on the y-axis, preparing for further
# graphical layers to be added.
## ` r assigning a shape layer`
(yield.plot<-yield.plot+
geom_boxplot())
# View the names of the columns in the dataframe
names(test)
## [1] "Treatment" "Yield_bu_per_acre" "Severity"
(yield.plot <- yield.plot +
geom_boxplot(fill = "purple"))
# pay attention - Treatment assigned to an aesthetics (aes) in R ggplot2
# aes - Aesthetics describe how data should be mapped to visual attributes l
# like color, shape, size, and position
# Frequently used ases = fill, color, size, alpha, x , y, etc.
# Note- I removed the code chunk with error on aes because it hinders Knitting
(yield.plot <- yield.plot +
geom_boxplot(aes(fill = Treatment)))
yield.plot
# Adding axis title
(yield.plot <- yield.plot + xlab("Treatment Applied"))
(yield.plot <- yield.plot + ylab("Yield (bu/acre)"))
# coming back to business and adding axis titles
(yield.plot <- yield.plot + xlab("Treatment Applied"))
(yield.plot <- yield.plot + ylab("Yield (bu/acre)"))
# updating figure
(yield.plot <- yield.plot +
theme_bw(base_size = 14))
# updating figure
(yield.plot <- yield.plot +
theme(aspect.ratio = 1.25))
# Pay attentions for repetitive information : remove if not needed
# Information in the legend is repetitive and not needed.
(yield.plot <- yield.plot +
guides(fill = FALSE))
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Adding colors
# more visually appealing virdis options:magma, inferno, plasma,cividis
yield.ploty <- ggplot(test,
aes(x = Treatment,
y = Yield_bu_per_acre,
fill = Treatment)) +
geom_boxplot() +
scale_fill_viridis_d(option = "cividis", direction = -1) + # Adjust the palette and direction
ggtitle("Effect of Fungicides on Yield") +
xlab("Treatment Applied") +
ylab("Yield (bu/acre)") +
theme_bw(base_size = 14) +
theme(aspect.ratio = 1.25,
panel.grid = element_blank(),
legend.position = "none")
yield.ploty
# library(RColorBrewer) for glossy finish
library(RColorBrewer)
yield.plotgloss <- ggplot(test,
aes(x = Treatment,
y = Yield_bu_per_acre,
fill = Treatment)) +
geom_boxplot() +
scale_fill_brewer(palette = "Set1") +
ggtitle("Effect of Fungicides on Yield") +
xlab("Treatment Applied") +
ylab("Yield (bu/acre)") +
theme_bw(base_size = 14) +
theme(aspect.ratio = 1.25,
panel.grid = element_blank(),
legend.position = "none") # Optionally hide the legend
yield.plotgloss
severity.plot <- ggplot(test,
aes(x = Treatment, y = Severity)) +
geom_boxplot() +
ggtitle("Effect of Fungicides on Disease Severity") +
theme_bw(base_size = 14) +
theme(aspect.ratio = 1.5,
panel.grid = element_blank()) +
xlab("Treatment Applied") +
ylab("Disease Severity")
severity.plot #why it is black and white?? #see that theme is 'bw'
# adjusting text (format text elements)
(severity.plot <- severity.plot +
theme(plot.title = element_text(hjust = 0.5)))
#Lets try this to fill inferno color
# To apply a color fill based on the Treatment to your boxplot,
# you need to add fill = Treatment inside the aes() function.
severity.plot <- ggplot(test,
aes(x = Treatment, y = Severity, fill=Treatment)) +
geom_boxplot() +
scale_fill_viridis_d(option = "inferno", direction = -1) + # Adjust the palette and direction
ggtitle("Effect of Fungicides on Disease Severity") +
xlab("Treatment Applied") +
ylab("Disease Severity") +
theme_bw(base_size = 14) +
theme(aspect.ratio = 1.25,
panel.grid = element_blank(),
legend.position = "none")
severity.plot
library(cowplot)
combined_plot <- plot_grid(yield.plot, severity.plot,
labels = c('A', 'B'), label_size = 10, align = 'v')
combined_plot
ggsave("combined_plot.png", combined_plot, width = 20, height = 15, dpi = 300)
# see that it has issue with x axis annotation
yield.plot2 <- yield.plot +
theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
axis.text.y = element_text(face = "bold"))
severity.plot2 <- severity.plot +
theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
axis.text.y = element_text(face = "bold"))
# Combine the plots with adjusted x-axis labels and bold text
combined_plot2 <- plot_grid(yield.plot2, severity.plot2,
labels = c('A', 'B'), label_size = 10, align = 'v')
combined_plot2
# more fun
#######
#https://bioinformatics.ccr.cancer.gov/docs/data-visualization-with-r/Lesson6_V2/#:~:text=An%20extremely%20useful%20feature%20of,package%20that%20creates%20grid%20grobs.
##################################################################################
# much cleaner way to write a code
combined_plot2 <- plot_grid(
yield.plot2, severity.plot2,
labels = c('A', 'B'),
label_size = 10,
align = 'v' # align vertically
)
combined_plot2
# saving the file locally
ggsave("combined_plot2.png", combined_plot2, width = 20, height = 15, dpi = 300)
library (ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
##
## get_legend
(severity.plot2 <- severity.plot +
stat_compare_means(method = "anova",
label.y = 6.5)) #p-value is low, thus data is significant
# however its over all picture we don't know about the pairwise comparisons
# ANOVA gives, bbig picture unless you do a post hoc test.
#likewise for yield data
(yield.plotu2 <- yield.plot2 + stat_compare_means(method = "anova",
label.y = 63)) #oops axis messed up (if)
#note label.y is the place where anova will show up, if its wrong it will not be
#visible. Make sure to check as adjust according to y axis
#lets correct y axis
yield.plotu2 <- yield.plot2 +
stat_compare_means(method = "anova",
label.y = 63) +
ylim(48, 65) # Set the limits of the y-axis from 48 to 65 if you like
yield.plotu2
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_compare_means()`).
# a longer way to do the same thing -ANOVA statistics
# dataframe `fungicide` with the variables `Treatment` and `Yield_bu_per_acre`
# Perform ANOVA
anova_result <- aov(Yield_bu_per_acre ~ Treatment, data = test)
anova_summary <- summary(anova_result)
# Extract the p-value
p_value <- anova_summary[[1]]$"Pr(>F)"[1]
# Create the plot
yield.plotu <- ggplot(test, aes(x = Treatment, y = Yield_bu_per_acre)) +
geom_boxplot() +
ylim(48, 65) +
theme_bw(base_size = 14) +
theme(aspect.ratio = 1.25, panel.grid = element_blank())
# If the p-value is significant, add it to the plot
if (p_value < 0.05) {
yield.plotu <- yield.plotu +
annotate("text", x = 1, y = 65, label = paste("ANOVA p-value:", format(p_value, digits = 2)), size = 4)
}
# Print the plot
yield.plotu
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
# pairwise comparisons to evaluate individual effects
yield.plotu2 <- yield.plot2 +
stat_compare_means(method = "anova",
label.y = 63) +
ylim(48, 65) # Set the limits of the y-axis from 48 to 65
yield.plotu2
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_compare_means()`).
#lets make a pairwise comparisons
pairwise <- list(c('Control','Fungicide_A'),
c('Control','Fungicide_B'),
c('Control','Fungicide_C'),
c('Fungicide_A','Fungicide_B'),
c('Fungicide_A','Fungicide_C'),
c('Fungicide_B','Fungicide_C'))
(yield.plotu3 <- yield.plotu2 +
stat_compare_means(comparisons = pairwise,
label = "p.signif",
method = 't.test'))
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_compare_means()`).
## Warning: Removed 42 rows containing non-finite values (`stat_signif()`).
## Warning: Removed 18 rows containing missing values (`geom_signif()`).
yield.plotu3
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_compare_means()`).
## Warning: Removed 42 rows containing non-finite values (`stat_signif()`).
## Warning: Removed 18 rows containing missing values (`geom_signif()`).
##The Grand Code- Compiled for all sites
# Combining data
# finalizing the whole data for fungicide vs yield for two locations
# Brookings data
yield.plotu <- ggplot(test,
aes(x = Treatment,
y = Yield_bu_per_acre,
fill = Treatment)) +
geom_boxplot() +
scale_fill_brewer(palette = "Set1") +
ggtitle("Effect of Fungicides on Yield- Brookings") +
xlab("Treatment Applied") +
ylab("Yield (bu/acre)") +
theme_bw(base_size = 14) +
theme(aspect.ratio = 1.25,
panel.grid = element_blank(),
legend.position = "none") # Optionally hide the legend
yield.plotu <- yield.plotu +
theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
axis.text.y = element_text(face = "bold"))
yield.plotu <- yield.plotu +
stat_compare_means(method = "anova",
label.y = 69)
pairwise <- list(c('Control','Fungicide_A'),
c('Control','Fungicide_B'),
c('Control','Fungicide_C'),
c('Fungicide_A','Fungicide_B'),
c('Fungicide_A','Fungicide_C'),
c('Fungicide_B','Fungicide_C'))
(yield.plotu <- yield.plotu +
stat_compare_means(comparisons = pairwise,
label = "p.signif",
method = 't.test'))
yield.plotu
# Hand county data, just need to change input and output name in code
yield.plots <- ggplot(test2,
aes(x = Treatment,
y = Yield_bu_per_acre,
fill = Treatment)) +
geom_boxplot() +
scale_fill_brewer(palette = "Set1") +
ggtitle("Effect of Fungicides on Yield-Hand") +
xlab("Treatment Applied") +
ylab("Yield (bu/acre)") +
theme_bw(base_size = 14) +
theme(aspect.ratio = 1.25,
panel.grid = element_blank(),
legend.position = "none") # Optionally hide the legend
yield.plots <- yield.plots +
theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
axis.text.y = element_text(face = "bold"))
yield.plots
yield.plots <- yield.plots +
stat_compare_means(method = "anova",
label.y = 82)
pairwise2 <- list(c('Control','Fungicide_A'),
c('Control','Fungicide_B'),
c('Control','Fungicide_C'),
c('Fungicide_A','Fungicide_B'),
c('Fungicide_A','Fungicide_C'),
c('Fungicide_B','Fungicide_C'))
(yield.plots <- yield.plots +
stat_compare_means(comparisons = pairwise2,
label = "p.signif",
method = 't.test'))
yield.plots
# check comparison among two:
combined_plottest <- plot_grid(yield.plotu, yield.plots,
labels = c('A', 'B'), label_size = 10, align = 'v')
combined_plottest
# Brown county data
yield.plots2 <- ggplot(test3,
aes(x = Treatment,
y = Yield_bu_per_acre,
fill = Treatment)) +
geom_boxplot() +
scale_fill_brewer(palette = "Set1") +
ggtitle("Effect of Fungicides on Yield- Brown") +
xlab("Treatment Applied") +
ylab("Yield (bu/acre)") +
theme_bw(base_size = 14) +
theme(aspect.ratio = 1.25,
panel.grid = element_blank(),
legend.position = "none") # Optionally hide the legend
yield.plots2 <- yield.plots2 +
theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
axis.text.y = element_text(face = "bold"))
yield.plots2
yield.plots2 <- yield.plots2 +
stat_compare_means(method = "anova",
label.y = 60)
pairwise3 <- list(c('Control','Fungicide_A'),
c('Control','Fungicide_B'),
c('Control','Fungicide_C'),
c('Fungicide_A','Fungicide_B'),
c('Fungicide_A','Fungicide_C'),
c('Fungicide_B','Fungicide_C'))
(yield.plots2 <- yield.plots2 +
stat_compare_means(comparisons = pairwise3,
label = "p.signif",
method = 't.test'))
yield.plots2
combined_plotFinal <- plot_grid(
yield.plotu, yield.plots, yield.plots2,
labels = c('A', 'B', 'C'),
label_size = 12,
align = 'v' # align vertically
)
# SDSU data
yield.plots3 <- ggplot(test4,
aes(x = Treatment,
y = Yield_bu_per_acre,
fill = Treatment)) +
geom_boxplot() +
scale_fill_brewer(palette = "Set1") +
ggtitle("Effect of Fungicides on Yield- SDSU") +
xlab("Treatment Applied") +
ylab("Yield (bu/acre)") +
theme_bw(base_size = 14) +
theme(aspect.ratio = 1.25,
panel.grid = element_blank(),
legend.position = "none") # Optionally hide the legend
yield.plots3 <- yield.plots3 +
theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
axis.text.y = element_text(face = "bold"))
yield.plots3
yield.plots3 <- yield.plots3 +
stat_compare_means(method = "anova",
label.y = 60)
pairwise4 <- list(c('Control','Fungicide_A'),
c('Control','Fungicide_B'),
c('Control','Fungicide_C'),
c('Fungicide_A','Fungicide_B'),
c('Fungicide_A','Fungicide_C'),
c('Fungicide_B','Fungicide_C'))
(yield.plots3 <- yield.plots3 +
stat_compare_means(comparisons = pairwise4,
label = "p.signif",
method = 't.test'))
yield.plots3
# check comparison among two:
combined_plottest2 <- plot_grid(yield.plots2, yield.plots3,
labels = c('A', 'B'), label_size = 10, align = 'v')
combined_plottest2
# check comparison among three:
combined_plottest3 <- plot_grid(yield.plotu, yield.plots2, yield.plots3,
labels = c('A', 'B', 'C'), label_size = 10, align = 'v')
combined_plottest3
# check comparison among ALL:
combined_plotFinal <- plot_grid(
yield.plotu, yield.plots, yield.plots2,yield.plots3,
labels = c('A', 'B', 'C'),
label_size = 12,
align = 'v' # align vertically
)
combined_plotFinal
# Display the combined plot
print(combined_plotFinal)
ggsave("combined_plotFinal.png", combined_plotFinal, width = 20, height = 15, dpi = 300)
# see the issue with variable x and y axis, and a bit more____
# Who will fix it ??