Compiling the data from both evapotranspiration (Arduino), RGB and PhenoSight into one narrative to understand the temporal patterns across tomato species in their response to drought streess.
Data was collected by Sofia Trujillo Ortigoza as part of FFAR funded project at Boyce Thompson Institute. Analysis performed by Magda Julkowska.
EVT <- read.csv("Arduino1.csv")
RGB <- read.csv("RGB.csv")
PS <- read.csv("Phenosight.csv")
EVT
RGB
PS
OK So in EVT - I still actually need to calculate EVT - and same for RGB (growth rate), so let’s go:
I need to reshape the data from Arduino - so I can calculate EVT from the weight before (D3) and weight after (D2). So I need to cast this dataset with Day being part of the Before / After collumn.
library(reshape2)
EVTm <- melt(EVT, id = c("Day", "Pots", "Condition", "Accession", "Specie"))
EVTm
EVT_cast <- dcast(EVTm, Pots + Condition + Accession + Specie ~ variable + Day)
EVT_cast
OK - now I have the data in the shape I want - let’s calculate the EVT for each day:
EVT_cast$EVT.2 <- EVT_cast$After_2 - EVT_cast$Before_3
EVT_cast$EVT.3 <- EVT_cast$After_3 - EVT_cast$Before_4
EVT_cast$EVT.4 <- EVT_cast$After_4 - EVT_cast$Before_5
EVT_cast$EVT.5 <- EVT_cast$After_5 - EVT_cast$Before_6
EVT_cast$EVT.6 <- EVT_cast$After_6 - EVT_cast$Before_7
EVT_cast$EVT.7 <- EVT_cast$After_7 - EVT_cast$Before_8
EVT_cast$EVT.8 <- EVT_cast$After_8 - EVT_cast$Before_9
EVT_cast$EVT.9 <- EVT_cast$After_9 - EVT_cast$Before_10
EVT_cast$EVT.10 <- EVT_cast$After_10 - EVT_cast$Before_11
EVT_cast
OK - I can see some negative numbers - which is not right - but what we will do now is ignore them - ill come back to them later.
First - we need to have the data ONLY with the relevant info:
EVT1 <- EVT_cast[,c(1:4, 25:33)]
EVT1
now - I need my time as a column too:
mEVT <- melt(EVT1, id = c("Pots", "Condition", "Accession", "Specie"))
mEVT
colnames(mEVT)[6] <- "EVT"
colnames(mEVT)[5] <- "Day"
mEVT$Day <- gsub("EVT.", "", mEVT$Day)
mEVT$Condition <- gsub("Normal", "Control", mEVT$Condition)
mEVT$Day <- as.numeric(as.character(mEVT$Day))
mEVT
OK - now let’s plot this:
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(ggpubr)
library(ggsci)
## Warning: package 'ggsci' was built under R version 4.3.3
# Convert 'day' column to a factor with numeric levels
mEVT$Day <- as.factor(as.numeric(as.character(mEVT$Day)))
# Create a line graph (Area_lgraph) using ggplot2
EVT_lgraph <- ggplot(data = mEVT, aes(x = Day, y = EVT, group = Pots, color = Condition))
EVT_lgraph <- EVT_lgraph + geom_line(alpha = 0.1)
EVT_lgraph <- EVT_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
EVT_lgraph <- EVT_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
EVT_lgraph <- EVT_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
EVT_lgraph <- EVT_lgraph + labs(x = "Days of Experiment", y = "Evapotranspiration (g of H2O)") + scale_color_d3("category10") + theme(legend.position = "bottom")
EVT_lgraph
OK - so this is great - but we still have negative values - let’s remove
them:
mEVT2 <- subset(mEVT, mEVT$EVT > 0)
EVT_lgraph <- ggplot(data = mEVT2, aes(x = Day, y = EVT, group = Pots, color = Condition))
EVT_lgraph <- EVT_lgraph + geom_line(alpha = 0.1)
EVT_lgraph <- EVT_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
EVT_lgraph <- EVT_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
EVT_lgraph <- EVT_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
EVT_lgraph <- EVT_lgraph + labs(x = "Days of Experiment", y = "Evapotranspiration (g of H2O)") + scale_color_d3("category10") + theme(legend.position = "bottom")
EVT_lgraph
OK this is awesome - but the data is still a little choppy - let’s do
the splines:
days <- seq(2, 10, by = 1)
#create a temporary subset that contains only rows where the trayID is the first unique one
temp <- subset(mEVT2, mEVT2$Pots == unique(mEVT2$Pots)[1])
#assign TOE as a numeric val
temp$Day <- as.numeric(as.character(temp$Day))
#plot spline for this first entry
plot.spl <- with(temp, smooth.spline(Day, EVT, df = 4))
plot(EVT ~ Day, data = temp)
#actual vals
lines(plot.spl, col = "blue")
#predicted vals
lines(predict(plot.spl, days), col = "red")
same procedure to the rest of the trays
smooth out graph using spline for the rest of the trays
#names of columns to be used for plot
names <- c("Pots", "Day", "EVT")
spline_data <- data.frame(Pots = character(), Day = numeric(), EVT = numeric(), stringsAsFactors = FALSE)
# Populate spline_data
pred_temp <- predict(plot.spl, days)
#append the first tray to the dataframe
spline_data <- data.frame(Pots = temp$Pots[1], Day = pred_temp$x, EVT = pred_temp$y)
final_spline <- spline_data
#Do the same prediction for all plants
all_plants <- unique(mEVT2$Pots)
for (i in 2:length(all_plants)) {
#iterate foe each based on tray.ID
temp <- subset(mEVT2, mEVT2$Pots == unique(mEVT2$Pots)[i])
temp$Day <- as.numeric(as.character(temp$Day))
if (nrow(temp) > 4) {
#if there are more datapoints that match the correct tray.ID
#use timestamps corresponding to the data from subset
plot.spl <- with(temp, smooth.spline(Day, EVT, df = 4))
pred_temp <- predict(plot.spl, days)
spline_data <- data.frame(Pots = temp$Pots[1], Day = pred_temp$x, EVT = pred_temp$y, stringsAsFactors = FALSE)
} else {
spline_data <- data.frame(
#trayID is corresponding to the first trayID of i subset
Pots = temp$Pots[1],
Day = days,
EVT = rep(0, length(days)),
stringsAsFactors = FALSE
)
}
#append the predictions onto final_spline
final_spline <- rbind(final_spline, spline_data)
}
final_spline
now we have to decode what pot is what accession / species and treatment:
mEVT
decode <- unique(mEVT[,1:4])
decode
EVT_spline <- merge(decode, final_spline, by = "Pots", all = T)
EVT_spline <- na.omit(EVT_spline)
EVT_spline
Now let’s replot the smoothed data:
EVT_sgraph <- ggplot(data = EVT_spline, aes(x = Day, y = EVT, group = Pots, color = Condition))
EVT_sgraph <- EVT_sgraph + geom_line(alpha = 0.1)
EVT_sgraph <- EVT_sgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
EVT_sgraph <- EVT_sgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
EVT_sgraph <- EVT_sgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
EVT_sgraph <- EVT_sgraph + labs(x = "Days of Experiment", y = "Evapotranspiration (g of H2O)") + scale_color_d3("category10") + theme(legend.position = "bottom")
EVT_sgraph
OK - now let’s split it per genotype too. Before - let’s correct the spelling as per Zangjun Fei’s suggestions:
unique(EVT_spline$Specie)
## [1] "S. lycopersicoides (B)" "S. sitiens" "S. lycopersicoides"
## [4] "S. habrochaites" "S. corneliomulleri" "S. neorickii"
## [7] "S. pimpinellifolium" "S. cheesmaniae"
EVT_spline$Specie <- gsub("S. cheesmaniae", "S. cheesemaniae", EVT_spline$Specie)
EVT_spline$Specie <- gsub("S. corneliomulleri" , "S. corneliomulerii", EVT_spline$Specie)
OK - now let’s replot:
EVT_sgraph2 <- ggplot(data = EVT_spline, aes(x = Day, y = EVT, group = Pots, color = Condition))
EVT_sgraph2 <- EVT_sgraph2 + geom_line(alpha = 0.1) + facet_grid(~Specie)
EVT_sgraph2 <- EVT_sgraph2 + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
EVT_sgraph2 <- EVT_sgraph2 + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
EVT_sgraph2 <- EVT_sgraph2 + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
EVT_sgraph2 <- EVT_sgraph2 + labs(x = "Days of Experiment", y = "Evapotranspiration (g of H2O)") + scale_color_d3("category10") + theme(legend.position = "bottom")
EVT_sgraph2
OK - let’s do the p-value as a graph under the axis:
library(scales)
EVT_spline$Day <- as.numeric(EVT_spline$Day)
EVT_sgraph2 <- ggplot(data = EVT_spline, aes(x = Day, y = EVT, group = Pots, color = Condition))
EVT_sgraph2 <- EVT_sgraph2 + geom_line(alpha = 0.1) + facet_grid(~Specie)
EVT_sgraph2 <- EVT_sgraph2 + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
EVT_sgraph2 <- EVT_sgraph2 + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
EVT_sgraph2 <- EVT_sgraph2 + labs(x = "Days of Experiment", y = "Evapotranspiration (g of H2O)") + scale_color_d3("category10") + theme(legend.position = "top")
EVT_sgraph2
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Function to perform t-test and extract p-value
get_p_value <- function(EVT_spline) {
t.test(EVT ~ Condition, data = EVT_spline)$p.value
}
# Calculate p-values for each genotype and timepoint
p_values <- EVT_spline %>%
group_by(Day, Specie) %>%
summarise(p_value = get_p_value(cur_data())) %>%
ungroup()
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `p_value = get_p_value(cur_data())`.
## ℹ In group 1: `Day = 2` `Specie = "S. cheesemaniae"`.
## Caused by warning:
## ! `cur_data()` was deprecated in dplyr 1.1.0.
## ℹ Please use `pick()` instead.
## `summarise()` has grouped output by 'Day'. You can override using the `.groups`
## argument.
# Display the p-values in a table
p_values$LOD <- -log10(p_values$p_value)
p_values
EVT_pplot <- ggplot(p_values, aes(x = Day, y = LOD))
EVT_pplot <- EVT_pplot + geom_line() + geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "red")
EVT_pplot <- EVT_pplot + facet_wrap(~ Specie, ncol = 8)
EVT_pplot <- EVT_pplot + labs(x = "Days Of Experiment", y = "-log10(p-value)")
EVT_pplot <- EVT_pplot + theme_minimal() + theme_bw() + theme(strip.text = element_blank(), strip.background = element_blank())
EVT_pplot
Now let’s add these two graphs together
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
EVT_plot <- plot_grid(EVT_sgraph2, EVT_pplot, rel_heights = c(4,1), ncol = 1, align = "v", axis = "l")
EVT_plot
OK - let’s save it into PDF before I forget:
pdf("EVT_SmoothSplines_per_species_with_pvalues.pdf", height = 6, width = 12)
plot(EVT_plot)
dev.off()
## quartz_off_screen
## 2
OK - before we move on - I would like to calculate some additional traits here - like AVERAGE EVT and RELATIVE EVT per species.
library(doBy)
## Warning: package 'doBy' was built under R version 4.3.3
##
## Attaching package: 'doBy'
## The following object is masked from 'package:dplyr':
##
## order_by
EVT_sum <- summaryBy(EVT ~ Condition + Accession + Specie + Pots, data = EVT_spline)
EVT_sum
EVT_sum
EVT.mean_rank <- ggerrorplot(EVT_sum, x = "Specie", y = "EVT.mean", color = "Condition",
fill = "Condition", facet.by = "Condition",
desc_stat = "mean_sd", add = "jitter", ncol = 2,
xlab="", ylab= "mean EVT (g H2O / day)", add.params = list(color = "darkgray")) +
scale_color_d3("category10")
EVT.mean_rank <- EVT.mean_rank + stat_compare_means(method = "aov", label.y = 110)
EVT.mean_rank <- EVT.mean_rank + rremove("legend") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
EVT.mean_rank
OK - now let’s calculate the average per condition:
EVT_sum2 <- summaryBy(EVT.mean ~ Condition + Accession + Specie, data = EVT_sum)
EVT_sum2
Looks the same - so that’s fine.
sEVT_melt <- melt(EVT_sum2, id = c("Condition", "Accession", "Specie"))
sEVT_melt
csEVT <- dcast(sEVT_melt, Accession + Specie ~ Condition)
csEVT
csEVT$STI_EVT <- csEVT$Drought / csEVT$Control
csEVT$STI2_EVT <- csEVT$Drought / sqrt(csEVT$Control)
csEVT
EVT_STI <- ggdotchart(csEVT, x = "Specie", y = "STI_EVT",
label = round(csEVT$STI_EVT,2),
ylab = "Relative EVT (fraction of Control)", xlab = "",
sorting = "ascending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
ggtheme = theme_pubr() # ggplot2 theme
)
EVT_STI
scatter_EVT_STI1 <- ggscatter(csEVT, x = "Control", y = "Drought",
color = "STI_EVT",
ylab = "EVT at Drought", xlab = "EVT at Control"
) + gradient_color(c("blue", "red"))
scatter_EVT_STI1
EVT_STI2 <- ggdotchart(csEVT, x = "Specie", y = "STI2_EVT",
label = round(csEVT$STI2_EVT,2),
ylab = "STI2 EVT (a.u.)", xlab = "",
sorting = "ascending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
ggtheme = theme_pubr() # ggplot2 theme
)
EVT_STI2
scatter_EVT_STI2 <- ggscatter(csEVT, x = "Control", y = "Drought",
color = "STI2_EVT",
ylab = "EVT at Drought", xlab = "EVT at Control"
) + gradient_color(c("blue", "red"))
scatter_EVT_STI2
pdf("EVT_Summary_per_Accession.pdf", width = 7, height = 14)
mid_panel <- plot_grid(EVT_STI, scatter_EVT_STI1, ncol = 2, align = "h")
low_panel <- plot_grid(EVT_STI2, scatter_EVT_STI2, ncol = 2, align = "h")
plot_grid(EVT.mean_rank, mid_panel, low_panel, ncol = 1, labels = "AUTO")
dev.off()
## quartz_off_screen
## 2
OK - so the values we would like to keep from this experiment are in csEVT
csEVT
sum_data <- csEVT
colnames(sum_data)[2] <- "Species"
colnames(sum_data)[3] <- "EVT.C"
colnames(sum_data)[4] <- "EVT.D"
colnames(sum_data)[5] <- "EVT.STI1"
colnames(sum_data)[6] <- "EVT.STI2"
sum_data
OK - now let’s do the same thing for RGB data:
RGB
First of all - I need to get digital biomass by adding area of all 7 sides together:
RGB_sum <- summaryBy(area_pixels ~ timestamp + Condition + Accession + Pot, RGB, FUN = sum)
RGB_sum
OK - first we also need to have a day column
# Let's make a Day collumn
for(i in 1:nrow(RGB_sum)){
RGB_sum$Day[i] <- strsplit(RGB_sum$timestamp[i], "-")[[1]][1]
}
unique(RGB_sum$Day)
## [1] "2025.05.12" "2025.05.14" "2025.05.16" "2025.05.19" "2025.05.21"
now - let’s do the numeric rather than date”
RGB_sum$Day <- gsub("2025.05.12", 0, RGB_sum$Day)
RGB_sum$Day <- gsub("2025.05.14", 2, RGB_sum$Day)
RGB_sum$Day <- gsub("2025.05.16", 4, RGB_sum$Day)
RGB_sum$Day <- gsub("2025.05.19", 7, RGB_sum$Day)
RGB_sum$Day <- gsub("2025.05.21", 9, RGB_sum$Day)
RGB_sum$Day <- as.factor(as.numeric(as.character(RGB_sum$Day)))
RGB_sum$Condition <- gsub("Normal", "Control", RGB_sum$Condition)
RGB_sum
OK - now let’s plot the data
Area_lgraph <- ggplot(data = RGB_sum, aes(x = Day, y = area_pixels.sum, group = Pot, color = Condition))
Area_lgraph <- Area_lgraph + geom_line(alpha = 0.1)
Area_lgraph <- Area_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
Area_lgraph <- Area_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
Area_lgraph <- Area_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
Area_lgraph <- Area_lgraph + labs(x = "Days of Experiment", y = "Digital Biomass (a.u.)") + scale_color_d3("category10") + theme(legend.position = "bottom")
Area_lgraph
Hmm - there are still multiple plants on last day - let’s summarize them
again:
RGB_sum2 <- summaryBy(area_pixels.sum ~ Condition + Accession + Pot + Day, RGB_sum, FUN = sum)
RGB_sum2
Now - let’s replot it again:
Area_lgraph <- ggplot(data = RGB_sum2, aes(x = Day, y = area_pixels.sum.sum, group = Pot, color = Condition))
Area_lgraph <- Area_lgraph + geom_line(alpha = 0.1)
Area_lgraph <- Area_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
Area_lgraph <- Area_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
Area_lgraph <- Area_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
Area_lgraph <- Area_lgraph + labs(x = "Days of Experiment", y = "Digital Biomass (a.u.)") + scale_color_d3("category10") + theme(legend.position = "bottom")
Area_lgraph
OK this is awesome - but the data is still a little choppy - let’s do the splines:
days <- seq(0, 9, by = 1)
#create a temporary subset that contains only rows where the trayID is the first unique one
temp <- subset(RGB_sum2, RGB_sum2$Pot == unique(RGB_sum2$Pot)[1])
#assign TOE as a numeric val
temp$Day <- as.numeric(as.character(temp$Day))
#plot spline for this first entry
plot.spl <- with(temp, smooth.spline(Day, area_pixels.sum.sum, df = 3))
plot(area_pixels.sum.sum ~ Day, data = temp)
#actual vals
lines(plot.spl, col = "blue")
#predicted vals
lines(predict(plot.spl, days), col = "red")
same procedure to the rest of the trays
smooth out graph using spline for the rest of the trays
#names of columns to be used for plot
names <- c("Pots", "Day", "Area")
spline_data <- data.frame(Pots = character(), Day = numeric(), Area = numeric(), stringsAsFactors = FALSE)
# Populate spline_data
pred_temp <- predict(plot.spl, days)
#append the first tray to the dataframe
spline_data <- data.frame(Pots = temp$Pot[1], Day = pred_temp$x, Area = pred_temp$y)
final_spline <- spline_data
#Do the same prediction for all plants
all_plants <- unique(RGB_sum2$Pot)
for (i in 2:length(all_plants)) {
#iterate foe each based on tray.ID
temp <- subset(RGB_sum2, RGB_sum2$Pot == unique(RGB_sum2$Pot)[i])
temp$Day <- as.numeric(as.character(temp$Day))
if (nrow(temp) > 4) {
#if there are more datapoints that match the correct tray.ID
#use timestamps corresponding to the data from subset
plot.spl <- with(temp, smooth.spline(Day, area_pixels.sum.sum, df = 3))
pred_temp <- predict(plot.spl, days)
spline_data <- data.frame(Pots = temp$Pot[1], Day = pred_temp$x, Area = pred_temp$y, stringsAsFactors = FALSE)
} else {
spline_data <- data.frame(
#trayID is corresponding to the first trayID of i subset
Pots = temp$Pot[1],
Day = days,
EVT = rep(0, length(days)),
stringsAsFactors = FALSE
)
}
#append the predictions onto final_spline
final_spline <- rbind(final_spline, spline_data)
}
final_spline
now we have to decode what pot is what accession / species and treatment:
RGB_spline <- merge(decode, final_spline, by = "Pots", all = T)
RGB_spline <- na.omit(RGB_spline)
RGB_spline
Now let’s replot the smoothed data:
RGB_sgraph <- ggplot(data = RGB_spline, aes(x = Day, y = Area, group = Pots, color = Condition))
RGB_sgraph <- RGB_sgraph + geom_line(alpha = 0.1)
RGB_sgraph <- RGB_sgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
RGB_sgraph <- RGB_sgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
RGB_sgraph <- RGB_sgraph + labs(x = "Days of Experiment", y = "Digital Biomass (a.u.)") + scale_color_d3("category10") + theme(legend.position = "bottom")
RGB_sgraph
OK - now let’s split it per genotype too. Before - let’s correct the spelling as per Zangjun Fei’s suggestions:
unique(RGB_spline$Specie)
## [1] "S. lycopersicoides (B)" "S. sitiens" "S. lycopersicoides"
## [4] "S. habrochaites" "S. corneliomulleri" "S. neorickii"
## [7] "S. pimpinellifolium" "S. cheesmaniae"
RGB_spline$Specie <- gsub("S. cheesmaniae", "S. cheesemaniae", RGB_spline$Specie)
RGB_spline$Specie <- gsub("S. corneliomulleri" , "S. corneliomulerii", RGB_spline$Specie)
OK - now let’s replot:
RGB_sgraph2 <- ggplot(data = RGB_spline, aes(x = Day, y = Area, group = Pots, color = Condition))
RGB_sgraph2 <- RGB_sgraph2 + geom_line(alpha = 0.1) + facet_grid(~Specie)
RGB_sgraph2 <- RGB_sgraph2 + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
RGB_sgraph2 <- RGB_sgraph2 + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
RGB_sgraph2 <- RGB_sgraph2 + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
RGB_sgraph2 <- RGB_sgraph2 + labs(x = "Days of Experiment", y = "Digital Biomass (a.u.)") + scale_color_d3("category10") + theme(legend.position = "top")
RGB_sgraph2
## Warning: Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 3.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle
## Warning: Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 5.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle
## Warning: Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 1.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle
## Warning: Computation failed in `stat_compare_means()`.
## Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 6.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle
## Warning: Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 5.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle
## Warning: Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 7.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle
## Warning: Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 4.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle
OK - let’s do the p-value as a graph under the axis:
library(scales)
RGB_spline$Day <- as.numeric(RGB_spline$Day)
RGB_sgraph2 <- ggplot(data = RGB_spline, aes(x = Day, y = Area, group = Pots, color = Condition))
RGB_sgraph2 <- RGB_sgraph2 + geom_line(alpha = 0.1) + facet_grid(~Specie)
RGB_sgraph2 <- RGB_sgraph2 + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
RGB_sgraph2 <- RGB_sgraph2 + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
RGB_sgraph2 <- RGB_sgraph2 + labs(x = "Days of Experiment", y = "Digital Biomass (a.u.)") + scale_color_d3("category10") + theme(legend.position = "top")
RGB_sgraph2
library("dplyr")
# Function to perform t-test and extract p-value
get_p_value <- function(RGB_spline) {
t.test(Area ~ Condition, data = RGB_spline)$p.value
}
# Calculate p-values for each genotype and timepoint
p_values <- RGB_spline %>%
group_by(Day, Specie) %>%
summarise(p_value = get_p_value(cur_data())) %>%
ungroup()
## `summarise()` has grouped output by 'Day'. You can override using the `.groups`
## argument.
# Display the p-values in a table
p_values$LOD <- -log10(p_values$p_value)
p_values
RGB_pplot <- ggplot(p_values, aes(x = Day, y = LOD))
RGB_pplot <- RGB_pplot + geom_line() + geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "red")
RGB_pplot <- RGB_pplot + facet_wrap(~ Specie, ncol = 8)
RGB_pplot <- RGB_pplot + labs(x = "Days Of Experiment", y = "-log10(p-value)")
RGB_pplot <- RGB_pplot + theme_minimal() + theme_bw() + theme(strip.text = element_blank(), strip.background = element_blank())
RGB_pplot
Now let’s add these two graphs together
library(cowplot)
RGB_plot <- plot_grid(RGB_sgraph2, RGB_pplot, rel_heights = c(4,1), ncol = 1, align = "v", axis = "l")
RGB_plot
OK - let’s save it into PDF before I forget:
pdf("RGB_SmoothSplines_per_species_with_pvalues.pdf", height = 6, width = 12)
plot(RGB_plot)
dev.off()
## quartz_off_screen
## 2
OK - now let’s calculate growth rate for each of the plants that we have imaged:
temp <- subset(RGB_spline, RGB_spline$Pots == unique(RGB_spline$Pots[1]))
temp$Area <- as.numeric(temp$Area)
temp$Day <- as.numeric(temp$Day)
# plot the data
plot(temp$Area ~ temp$Day) +
# Linear regression line is added to the scatterplot using the lm() function
abline(lm(temp$Area ~ temp$Day))
## integer(0)
this looks as if we have some kind of slowing down happening (probably due to occlusions) - let’s calculate the growth rate based on first 6 days - where we have a clear linear growth rate then:
temp <- subset(RGB_spline, RGB_spline$Pots == unique(RGB_spline$Pots[1]))
temp$Area <- as.numeric(temp$Area)
temp$Day <- as.numeric(temp$Day)
temp <- subset(temp, temp$Day < 6)
# plot the data
plot(temp$Area ~ temp$Day) +
# Linear regression line is added to the scatterplot using the lm() function
abline(lm(temp$Area ~ temp$Day))
## integer(0)
OK - looks much better fit to linear function. Let’s put the values of the lm model into a table
# Fit a linear regression model to predict 'length' based on 'hours' in the 'temp' data frame
model <- lm(temp$Area ~ temp$Day)
model
##
## Call:
## lm(formula = temp$Area ~ temp$Day)
##
## Coefficients:
## (Intercept) temp$Day
## 160975 17316
GR <- model$coefficients[2]
GR
## temp$Day
## 17315.99
R2 <- summary(model)$r.squared
R2
## [1] 0.9913218
names <- c(text = "Pots", "Condition", "Species", "GR", "R2")
# Create an empty data frame 'growth_data' to store growth-related data
growth_data <- data.frame()
# Loop through each element 'k' in the 'names' vector
for (k in names) {
# Create a new column in 'growth_data' with the name specified by 'k' and set as character type
growth_data[[k]] <- as.character()
}
temp
# Assign the 'ID' value from the 'temp' data frame to the first row, first column of 'growth_data'
growth_data[1, 1] <- temp$Pots[1]
# Assign the 'Accession' value from the 'temp' data frame to the first row, second column of 'growth_data'
growth_data[1, 2] <- as.character(temp$Condition[1])
# Assign the 'Treatment' value from the 'temp' data frame to the first row, third column of 'growth_data'
growth_data[1, 3] <- temp$Specie[1]
# Assign the growth rate (GR) to the first row, fourth column of 'growth_data'
growth_data[1, 4] <- GR
# Assign the coefficient of determination (R-squared, R2) to the first row, fifth column of 'growth_data'
growth_data[1, 5] <- R2
growth_data
# Create vector 'all_plants' containing unique 'ID' values from 'final_spline'
all_plants <- unique(RGB_spline$Pots)
# Loop through each unique 'ID' in 'all_plants'
for (i in 1:length(all_plants)) {
temp <- subset(RGB_spline, RGB_spline$Pots == all_plants[i])
temp$Area <- as.numeric(temp$Area)
temp$Day <- as.numeric(temp$Day)
temp <- subset(temp, temp$Day < 6)
# Fit a linear regression model to predict 'Area' based on 'day'
model <- lm(temp$Area ~ temp$Day)
# Extract the growth rate (GR) coefficient from the linear regression model
GR <- model$coefficients[2]
# Calculate the coefficient of determination (R-squared, R2) for the model
R2 <- summary(model)$r.squared
# Populate 'growth_data' with relevant information for the current 'ID'
growth_data[i, 1] <- temp$Pots[1]
growth_data[i, 2] <- as.character(temp$Condition[1])
growth_data[i, 3] <- temp$Specie[1]
growth_data[i, 4] <- GR
growth_data[i, 5] <- R2
}
growth_data
Let’s fo a little clean up:
# Convert the 'GR' and 'R2' columns in 'growth_data' to numeric
growth_data$GR <- as.numeric(as.character(growth_data$GR))
growth_data$R2 <- as.numeric(as.character(growth_data$R2))
growth_data_clean <- subset(growth_data, growth_data$R2 > 0.7)
growth_data_clean <- subset(growth_data_clean, growth_data_clean$GR > 0)
growth_data_clean
GR_overall <- ggerrorplot(growth_data_clean, x = "Species", y = "GR", color = "Condition",
fill = "Condition", facet.by = "Condition",
desc_stat = "mean_sd", add = "jitter", ncol = 2,
xlab="", ylab= "Growth Rate (pixel / hour)", add.params = list(color = "darkgray")) +
scale_color_d3("category10")
GR_overall <- GR_overall + stat_compare_means(method = "aov", label.y = 180000)
GR_overall <- GR_overall + rremove("legend") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
GR_overall
OK - now let’s calculate the Stress Tolerance Indices for each accession based on this data:
growth_data_clean
GR_sum <- summaryBy(GR ~ Species + Condition, data = growth_data_clean)
colnames(GR_sum)[3] <- "GR"
GR_sum
csGR <- dcast(GR_sum, Species ~ Condition)
## Using GR as value column: use value.var to override.
csGR
Great - now that we have the data in the right format - let’s calculate two most interesting STIs
csGR$STI_GR <- csGR$Drought / csGR$Control
csGR$STI2_GR <- csGR$Drought / sqrt(csGR$Control)
csGR
GR_STI <- ggdotchart(csGR, x = "Species", y = "STI_GR",
label = round(csGR$STI_GR,2),
ylab = "Relative GR (fraction of Control)", xlab = "",
sorting = "ascending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
ggtheme = theme_pubr() # ggplot2 theme
)
GR_STI
scatter_GR_STI1 <- ggscatter(csGR, x = "Control", y = "Drought",
color = "STI_GR",
ylab = "GR at Drought", xlab = "GR at Control"
) + gradient_color(c("blue", "red"))
scatter_GR_STI1
GR_STI2 <- ggdotchart(csGR, x = "Species", y = "STI2_GR",
label = round(csGR$STI2_GR,2),
ylab = "STI2 GR (a.u.)", xlab = "",
sorting = "ascending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
ggtheme = theme_pubr() # ggplot2 theme
)
GR_STI2
scatter_GR_STI2 <- ggscatter(csGR, x = "Control", y = "Drought",
color = "STI2_GR",
ylab = "GR at Drought", xlab = "GR at Control"
) + gradient_color(c("blue", "red"))
scatter_GR_STI2
pdf("GR_Summary_per_Accession.pdf", width = 7, height = 14)
mid_panel <- plot_grid(GR_STI, scatter_GR_STI1, ncol = 2, align = "h")
low_panel <- plot_grid(GR_STI2, scatter_GR_STI2, ncol = 2, align = "h")
plot_grid(GR_overall, mid_panel, low_panel, ncol = 1, labels = "AUTO")
dev.off()
## quartz_off_screen
## 2
Now - let’s add growth data to the summary data too:
colnames(csGR)[2] <- "GR.C"
colnames(csGR)[3] <- "GR.D"
colnames(csGR)[4] <- "GR.STI1"
colnames(csGR)[5] <- "GR.STI2"
csGR
sum_data
sum_data2 <- merge(sum_data, csGR, by = "Species")
sum_data2
Let’s move on to PS data for now.
PS
OK - so for this I would like to first see how it looks throughout time - so I need to add Day rather than Date
unique(PS$Date)
## [1] 20250514 20250519 20250523
OK - so 14th is Day 2 - 19th is Day 7 - and 23rd is Day 11. Let’s add that info into the table:
PS$Day <- PS$Date
PS$Day <- gsub("20250514", 2, PS$Day)
PS$Day <- gsub("20250519", 7, PS$Day)
PS$Day <- gsub("20250523", 11, PS$Day)
PS
Let’s remove unnecessary info:
PS1 <- PS[,c(1, 22, 4:5,9:10, 12:21)]
PS1$Condition <- gsub("Normal", "Control", PS1$Condition)
PS1$Day <- as.factor(as.numeric(PS1$Day))
PS1
Cool - let’s now have a look at how individual traits change throughout time and drought exposure:
FvFm_lgraph <- ggplot(data = PS1, aes(x = Day, y = Fv.Fm, group = PlantID, color = Condition))
FvFm_lgraph <- FvFm_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
FvFm_lgraph <- FvFm_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
FvFm_lgraph <- FvFm_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
FvFm_lgraph <- FvFm_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
FvFm_lgraph <- FvFm_lgraph + labs(x = "Days of Experiment", y = "Max Quantum Yield (Fv/Fm)") + scale_color_d3("category10") + theme(legend.position = "top")
FvFm_lgraph
FqFm_lgraph <- ggplot(data = PS1, aes(x = Day, y = Fq..Fm., group = PlantID, color = Condition))
FqFm_lgraph <- FqFm_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
FqFm_lgraph <- FqFm_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
FqFm_lgraph <- FqFm_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
FqFm_lgraph <- FqFm_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
FqFm_lgraph <- FqFm_lgraph + labs(x = "Days of Experiment", y = "Quantum Yield (Fv'/Fm')") + scale_color_d3("category10") + theme(legend.position = "top")
FqFm_lgraph
qP_lgraph <- ggplot(data = PS1, aes(x = Day, y = qP, group = PlantID, color = Condition))
qP_lgraph <- qP_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
qP_lgraph <- qP_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
qP_lgraph <- qP_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
qP_lgraph <- qP_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
qP_lgraph <- qP_lgraph + labs(x = "Days of Experiment", y = "Reaction Open Centers / photochemical quenching puddle mode (qP)") + scale_color_d3("category10") + theme(legend.position = "top")
qP_lgraph
qN_lgraph <- ggplot(data = PS1, aes(x = Day, y = qN, group = PlantID, color = Condition))
qN_lgraph <- qN_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
qN_lgraph <- qN_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
qN_lgraph <- qN_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
qN_lgraph <- qN_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
qN_lgraph <- qN_lgraph + labs(x = "Days of Experiment", y = "Non-photochemical Quenching (qN)") + scale_color_d3("category10") + theme(legend.position = "top")
qN_lgraph
qL_lgraph <- ggplot(data = PS1, aes(x = Day, y = qL, group = PlantID, color = Condition))
qL_lgraph <- qL_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
qL_lgraph <- qL_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
qL_lgraph <- qL_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
qL_lgraph <- qL_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
qL_lgraph <- qL_lgraph + labs(x = "Days of Experiment", y = "Reaction Open Centers / photochemical quenching lake mode (qL)") + scale_color_d3("category10") + theme(legend.position = "top")
qL_lgraph
qI_lgraph <- ggplot(data = PS1, aes(x = Day, y = qI, group = PlantID, color = Condition))
qI_lgraph <- qI_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
qI_lgraph <- qI_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
qI_lgraph <- qI_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
qI_lgraph <- qI_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
qI_lgraph <- qI_lgraph + labs(x = "Days of Experiment", y = "slow relaxing NPQ (qI)") + scale_color_d3("category10") + theme(legend.position = "top")
qI_lgraph
Dno_lgraph <- ggplot(data = PS1, aes(x = Day, y = D.no, group = PlantID, color = Condition))
Dno_lgraph <- Dno_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
Dno_lgraph <- Dno_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
Dno_lgraph <- Dno_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
Dno_lgraph <- Dno_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
Dno_lgraph <- Dno_lgraph + labs(x = "Days of Experiment", y = "QY of non-regulated energy dissipation (phiNO)") + scale_color_d3("category10") + theme(legend.position = "top")
Dno_lgraph
Dnpq_lgraph <- ggplot(data = PS1, aes(x = Day, y = D.npq, group = PlantID, color = Condition))
Dnpq_lgraph <- Dnpq_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
Dnpq_lgraph <- Dnpq_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
Dnpq_lgraph <- Dnpq_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
Dnpq_lgraph <- Dnpq_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
Dnpq_lgraph <- Dnpq_lgraph + labs(x = "Days of Experiment", y = "QY of NPQ (phiNPQ)") + scale_color_d3("category10") + theme(legend.position = "top")
Dnpq_lgraph
npqt_lgraph <- ggplot(data = PS1, aes(x = Day, y = npq.t., group = PlantID, color = Condition))
npqt_lgraph <- npqt_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
npqt_lgraph <- npqt_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
npqt_lgraph <- npqt_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
npqt_lgraph <- npqt_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
npqt_lgraph <- npqt_lgraph + labs(x = "Days of Experiment", y = "Fast NPQ (npq(t))") + scale_color_d3("category10") + theme(legend.position = "top")
npqt_lgraph
ChlIdx_lgraph <- ggplot(data = PS1, aes(x = Day, y = ChlIdx, group = PlantID, color = Condition))
ChlIdx_lgraph <- ChlIdx_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
ChlIdx_lgraph <- ChlIdx_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
ChlIdx_lgraph <- ChlIdx_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
ChlIdx_lgraph <- ChlIdx_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
ChlIdx_lgraph <- ChlIdx_lgraph + labs(x = "Days of Experiment", y = "Chlorophyll Index (ChlIdx)") + scale_color_d3("category10") + theme(legend.position = "top")
ChlIdx_lgraph
AriIdx_lgraph <- ggplot(data = PS1, aes(x = Day, y = AriIdx, group = PlantID, color = Condition))
AriIdx_lgraph <- AriIdx_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
AriIdx_lgraph <- AriIdx_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
AriIdx_lgraph <- AriIdx_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
AriIdx_lgraph <- AriIdx_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
AriIdx_lgraph <- AriIdx_lgraph + labs(x = "Days of Experiment", y = "Anthocyanin Index (AriIdx)") + scale_color_d3("category10") + theme(legend.position = "top")
AriIdx_lgraph
NDVI_lgraph <- ggplot(data = PS1, aes(x = Day, y = NDVI, group = PlantID, color = Condition))
NDVI_lgraph <- NDVI_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
NDVI_lgraph <- NDVI_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
NDVI_lgraph <- NDVI_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
NDVI_lgraph <- NDVI_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
NDVI_lgraph <- NDVI_lgraph + labs(x = "Days of Experiment", y = "Normalized Difference Vegetation Index (NDVI)") + scale_color_d3("category10") + theme(legend.position = "top")
NDVI_lgraph
npqt_lgraph <- ggplot(data = PS1, aes(x = Day, y = npq.t., group = PlantID, color = Condition))
npqt_lgraph <- npqt_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
npqt_lgraph <- npqt_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3)
npqt_lgraph <- npqt_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
npqt_lgraph <- npqt_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
npqt_lgraph <- npqt_lgraph + labs(x = "Days of Experiment", y = "Fast NPQ (npq(t))") + scale_color_d3("category10") + theme(legend.position = "top")
npqt_lgraph
OK let’s save this into a file:
pdf("PhenoSight_Photosynthesis_per_Species.pdf", width = 12, height = 10)
plot_grid(FvFm_lgraph,
FqFm_lgraph,
npqt_lgraph, ncol = 1, labels = "AUTO"
)
dev.off()
## quartz_off_screen
## 2
pdf("PhenoSight_Pigments_per_Species.pdf", width = 12, height = 10)
plot_grid(ChlIdx_lgraph,
AriIdx_lgraph,
NDVI_lgraph, ncol = 1, labels = "AUTO")
dev.off()
## quartz_off_screen
## 2
OK - so let’s add all of the info to summary file as well - but first let’s reshape it and summarize the data per species:
PS1
sPS1 <- summaryBy(Fv.Fm + Fq..Fm. + qP + qN + qL + qI + D.no + D.npq + npq.t. + ChlIdx + AriIdx + NDVI ~ Specie + Condition + Day, data = PS1)
sPS1
Now - I d like to have all values per day and conditions too:
msPS1 <- melt(sPS1, id = c("Specie", "Condition", "Day"))
msPS1$variable <- gsub(".mean", "", msPS1$variable)
msPS1
now let’s cast it:
csPS1 <- dcast(msPS1, Specie ~ variable + Condition + Day)
csPS1
Are some of these traits correlated? Let’s see:
library(ggcorrplot)
mPS1 <- as.matrix(csPS1[2:73])
corr <- cor(mPS1)
head(corr[, 1:6])
## AriIdx_Control_2 AriIdx_Control_7 AriIdx_Control_11
## AriIdx_Control_2 1.0000000 0.5316530 0.9231154
## AriIdx_Control_7 0.5316530 1.0000000 0.7873112
## AriIdx_Control_11 0.9231154 0.7873112 1.0000000
## AriIdx_Drought_2 0.9603381 0.5719991 0.9336097
## AriIdx_Drought_7 0.6508832 0.7130012 0.7533033
## AriIdx_Drought_11 0.9632113 0.6770105 0.9559352
## AriIdx_Drought_2 AriIdx_Drought_7 AriIdx_Drought_11
## AriIdx_Control_2 0.9603381 0.6508832 0.9632113
## AriIdx_Control_7 0.5719991 0.7130012 0.6770105
## AriIdx_Control_11 0.9336097 0.7533033 0.9559352
## AriIdx_Drought_2 1.0000000 0.7185075 0.9247256
## AriIdx_Drought_7 0.7185075 1.0000000 0.6640674
## AriIdx_Drought_11 0.9247256 0.6640674 1.0000000
p.mat <- cor_pmat(mPS1)
pdf("PhenoSight_Correlation.pdf", width = 18, height = 18)
ggcorrplot(
corr,
p.mat = p.mat,
hc.order = FALSE,
type = "lower",
insig = "blank"
)
dev.off()
## quartz_off_screen
## 2
now let’s calculate the STIs for the few most interesting traits:
AriIdx npq(t) Fv/Fm Fq/Fm
csPS1$AriIdx_STI_2 <-csPS1$AriIdx_Drought_2 / csPS1$AriIdx_Control_2
csPS1$AriIdx_STI_7 <-csPS1$AriIdx_Drought_7 / csPS1$AriIdx_Control_7
csPS1$AriIdx_STI_11 <-csPS1$AriIdx_Drought_11 / csPS1$AriIdx_Control_11
csPS1$npqt_STI_2 <-csPS1$npq.t._Drought_2 / csPS1$npq.t._Control_2
csPS1$npqt_STI_7 <-csPS1$npq.t._Drought_7 / csPS1$npq.t._Control_7
csPS1$npqt_STI_11 <-csPS1$npq.t._Drought_11 / csPS1$npq.t._Control_11
csPS1$FvFm_STI_2 <-csPS1$Fv.Fm_Drought_2 / csPS1$Fv.Fm_Control_2
csPS1$FvFm_STI_7 <-csPS1$Fv.Fm_Drought_7 / csPS1$Fv.Fm_Control_7
csPS1$FvFm_STI_11 <-csPS1$Fv.Fm_Drought_11 / csPS1$Fv.Fm_Control_11
csPS1$FqFm_STI_2 <-csPS1$Fq..Fm._Drought_2 / csPS1$Fq..Fm._Control_2
csPS1$FqFm_STI_7 <-csPS1$Fq..Fm._Drought_7 / csPS1$Fq..Fm._Control_7
csPS1$FqFm_STI_11 <-csPS1$Fq..Fm._Drought_11 / csPS1$Fq..Fm._Control_11
csPS1
OK - now let’s have a look at how STI for AriIdx are changing throughout the experiment for each genotype studied:
temp <- csPS1[,c(1,74:85)]
temp
temp2 <- melt(temp, id = "Specie")
temp2$Day <- temp2$variable
temp2$Day <- gsub("AriIdx_STI_","", temp2$Day)
temp2$Day <- gsub("npqt_STI_","", temp2$Day)
temp2$Day <- gsub("FvFm_STI_","", temp2$Day)
temp2$Day <- gsub("FqFm_STI_","", temp2$Day)
temp2$Day <- as.numeric(temp2$Day)
temp2$variable <- gsub("_2","", temp2$variable)
temp2$variable <- gsub("_7","", temp2$variable)
temp2$variable <- gsub("_11","", temp2$variable)
temp2
library(tidyverse)
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ doBy::order_by() masks dplyr::order_by()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
STI_graph <- ggerrorplot(temp2, x = "Specie", y = "value", color = "Specie", fill = "Specie",
desc_stat = "mean_sd", add = "jitter", ncol = 2, facet.by = "variable",
xlab="", ylab= "STI (D/C)", add.params = list(color = "darkgray"))
STI_graph <- STI_graph + color_palette("aaas") + stat_compare_means(method = "aov") + ylim(0, 1.5)
STI_graph <- STI_graph + rremove("legend") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
STI_graph
Let’s save it to show that for these traits the variation in STI is not
as big as for EVT or GR traits:
pdf("PhenoSight_STI.pdf", width = 10, height = 10)
plot(STI_graph)
dev.off()
## quartz_off_screen
## 2
OK - let’s reduce the PS data to just few relevant values for now:
PS_sum <- summaryBy(value ~ Specie + variable, data = temp2)
PS_sum2 <- dcast(PS_sum, Specie ~ variable)
## Using value.mean as value column: use value.var to override.
colnames(PS_sum2)[1] <- "Species"
unique(PS_sum2$Species)
## [1] "S. cheesmaniae" "S. corneliomulleri" "S. habrochaites"
## [4] "S. lycopersicoides" "S. lycopersicoides (B)" "S. neorickii"
## [7] "S. pimpinellifolium" "S. sitiens"
PS_sum2$Species <- gsub("S. cheesmaniae", "S. cheesemaniae", PS_sum2$Species)
PS_sum2$Species <- gsub("S. corneliomulleri", "S. corneliomulerii", PS_sum2$Species)
PS_sum2
now - let’s merge it with other summary data from EVT and GR:
sum_data3 <- merge(sum_data2, PS_sum2, by = "Species", all = T)
sum_data3
NOW - how do we decide what species is tolerant and what is susceptible to drought:
sum_matrix <- as.matrix(sum_data3[,c(6,10:12)])
rownames(sum_matrix) <- sum_data3$Species
heatmap(sum_matrix, scale = "column")
library("pheatmap")
## Warning: package 'pheatmap' was built under R version 4.3.3
pdf("Accession_ranking_STI_based_three_groups.pdf")
pheatmap(sum_matrix, scale = "column", cutree_rows = 3)
dev.off()
## pdf
## 3