Compiling the data from both evapotranspiration (Arduino), RGB and PhenoSight into one narrative to understand the temporal patterns across tomato species in their response to drought streess.

Data was collected by Sofia Trujillo Ortigoza as part of FFAR funded project at Boyce Thompson Institute. Analysis performed by Magda Julkowska.

load data

EVT <- read.csv("Arduino1.csv")
RGB <- read.csv("RGB.csv")
PS <- read.csv("Phenosight.csv")

EVT

RGB

PS

OK So in EVT - I still actually need to calculate EVT - and same for RGB (growth rate), so let’s go:

Calculating EVT

I need to reshape the data from Arduino - so I can calculate EVT from the weight before (D3) and weight after (D2). So I need to cast this dataset with Day being part of the Before / After collumn.

library(reshape2)
EVTm <- melt(EVT, id = c("Day", "Pots", "Condition", "Accession", "Specie"))
EVTm

EVT_cast <- dcast(EVTm, Pots + Condition + Accession + Specie ~ variable + Day)
EVT_cast

OK - now I have the data in the shape I want - let’s calculate the EVT for each day:

EVT_cast$EVT.2 <- EVT_cast$After_2 - EVT_cast$Before_3
EVT_cast$EVT.3 <- EVT_cast$After_3 - EVT_cast$Before_4
EVT_cast$EVT.4 <- EVT_cast$After_4 - EVT_cast$Before_5
EVT_cast$EVT.5 <- EVT_cast$After_5 - EVT_cast$Before_6
EVT_cast$EVT.6 <- EVT_cast$After_6 - EVT_cast$Before_7
EVT_cast$EVT.7 <- EVT_cast$After_7 - EVT_cast$Before_8
EVT_cast$EVT.8 <- EVT_cast$After_8 - EVT_cast$Before_9
EVT_cast$EVT.9 <- EVT_cast$After_9 - EVT_cast$Before_10
EVT_cast$EVT.10 <- EVT_cast$After_10 - EVT_cast$Before_11
EVT_cast

OK - I can see some negative numbers - which is not right - but what we will do now is ignore them - ill come back to them later.

First - we need to have the data ONLY with the relevant info:

EVT1 <- EVT_cast[,c(1:4, 25:33)]
EVT1

now - I need my time as a column too:

mEVT <- melt(EVT1, id = c("Pots", "Condition", "Accession", "Specie"))
mEVT

colnames(mEVT)[6] <- "EVT"
colnames(mEVT)[5] <- "Day"
mEVT$Day <- gsub("EVT.", "", mEVT$Day)
mEVT$Condition <- gsub("Normal", "Control", mEVT$Condition)
mEVT$Day <- as.numeric(as.character(mEVT$Day)) 
mEVT

OK - now let’s plot this:

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.3.3

library(ggpubr)
library(ggsci)

## Warning: package 'ggsci' was built under R version 4.3.3

# Convert 'day' column to a factor with numeric levels
mEVT$Day <- as.factor(as.numeric(as.character(mEVT$Day)))

# Create a line graph (Area_lgraph) using ggplot2
EVT_lgraph <- ggplot(data = mEVT, aes(x = Day, y = EVT, group = Pots, color = Condition)) 
EVT_lgraph <- EVT_lgraph + geom_line(alpha = 0.1)
EVT_lgraph <- EVT_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
EVT_lgraph <- EVT_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

EVT_lgraph <- EVT_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F) 
EVT_lgraph <- EVT_lgraph + labs(x = "Days of Experiment", y = "Evapotranspiration (g of H2O)") + scale_color_d3("category10") + theme(legend.position = "bottom")
EVT_lgraph

OK - so this is great - but we still have negative values - let’s remove them:

mEVT2 <- subset(mEVT, mEVT$EVT > 0)

EVT_lgraph <- ggplot(data = mEVT2, aes(x = Day, y = EVT, group = Pots, color = Condition)) 
EVT_lgraph <- EVT_lgraph + geom_line(alpha = 0.1)
EVT_lgraph <- EVT_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
EVT_lgraph <- EVT_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
EVT_lgraph <- EVT_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F) 
EVT_lgraph <- EVT_lgraph + labs(x = "Days of Experiment", y = "Evapotranspiration (g of H2O)") + scale_color_d3("category10") + theme(legend.position = "bottom")
EVT_lgraph

OK this is awesome - but the data is still a little choppy - let’s do the splines:

splines EVT

days <- seq(2, 10, by = 1)
#create a temporary subset that contains only rows where the trayID is the first unique one
temp <- subset(mEVT2, mEVT2$Pots == unique(mEVT2$Pots)[1])
#assign TOE as a numeric val
temp$Day <- as.numeric(as.character(temp$Day))
#plot spline for this first entry 
plot.spl <- with(temp, smooth.spline(Day, EVT, df = 4))

plot(EVT ~ Day, data = temp)

#actual vals
lines(plot.spl, col = "blue")
#predicted vals
lines(predict(plot.spl, days), col = "red")

same procedure to the rest of the trays

smooth out graph using spline for the rest of the trays

#names of columns to be used for plot 
names <- c("Pots", "Day", "EVT")
spline_data <- data.frame(Pots = character(), Day = numeric(), EVT = numeric(), stringsAsFactors = FALSE)

# Populate spline_data
pred_temp <- predict(plot.spl, days)
#append the first tray to the dataframe
spline_data <- data.frame(Pots = temp$Pots[1], Day = pred_temp$x, EVT = pred_temp$y)
final_spline <- spline_data

#Do the same prediction for all plants
all_plants <- unique(mEVT2$Pots)

for (i in 2:length(all_plants)) {
  #iterate foe each based on tray.ID
  temp <- subset(mEVT2, mEVT2$Pots == unique(mEVT2$Pots)[i])
  temp$Day <- as.numeric(as.character(temp$Day))
  if (nrow(temp) > 4) {
    #if there are more datapoints that match the correct tray.ID
    #use timestamps corresponding to the data from subset 
    plot.spl <- with(temp, smooth.spline(Day, EVT, df = 4))
    pred_temp <- predict(plot.spl, days)
   
    spline_data <- data.frame(Pots = temp$Pots[1], Day = pred_temp$x, EVT = pred_temp$y, stringsAsFactors = FALSE)
  } else {
    spline_data <- data.frame(
      #trayID is corresponding to the first trayID of i subset
      Pots = temp$Pots[1],
      Day = days,
      EVT = rep(0, length(days)),
      stringsAsFactors = FALSE
    )
  }
  #append the predictions onto final_spline
  final_spline <- rbind(final_spline, spline_data)
}

final_spline

now we have to decode what pot is what accession / species and treatment:

mEVT

decode <- unique(mEVT[,1:4])
decode

EVT_spline <- merge(decode, final_spline, by = "Pots", all = T)
EVT_spline <- na.omit(EVT_spline)
EVT_spline

Now let’s replot the smoothed data:

EVT_sgraph <- ggplot(data = EVT_spline, aes(x = Day, y = EVT, group = Pots, color = Condition)) 
EVT_sgraph <- EVT_sgraph + geom_line(alpha = 0.1)
EVT_sgraph <- EVT_sgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
EVT_sgraph <- EVT_sgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
EVT_sgraph <- EVT_sgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F) 
EVT_sgraph <- EVT_sgraph + labs(x = "Days of Experiment", y = "Evapotranspiration (g of H2O)") + scale_color_d3("category10") + theme(legend.position = "bottom")
EVT_sgraph

OK - now let’s split it per genotype too. Before - let’s correct the spelling as per Zangjun Fei’s suggestions:

unique(EVT_spline$Specie)

## [1] "S. lycopersicoides (B)" "S. sitiens"             "S. lycopersicoides"    
## [4] "S. habrochaites"        "S. corneliomulleri"     "S. neorickii"          
## [7] "S. pimpinellifolium"    "S. cheesmaniae"

EVT_spline$Specie <- gsub("S. cheesmaniae", "S. cheesemaniae", EVT_spline$Specie)
EVT_spline$Specie <- gsub("S. corneliomulleri" , "S. corneliomulerii", EVT_spline$Specie)

OK - now let’s replot:

EVT_sgraph2 <- ggplot(data = EVT_spline, aes(x = Day, y = EVT, group = Pots, color = Condition)) 
EVT_sgraph2 <- EVT_sgraph2 + geom_line(alpha = 0.1) + facet_grid(~Specie)
EVT_sgraph2 <- EVT_sgraph2 + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
EVT_sgraph2 <- EVT_sgraph2 + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
EVT_sgraph2 <- EVT_sgraph2 + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F) 
EVT_sgraph2 <- EVT_sgraph2 + labs(x = "Days of Experiment", y = "Evapotranspiration (g of H2O)") + scale_color_d3("category10") + theme(legend.position = "bottom")
EVT_sgraph2

OK - let’s do the p-value as a graph under the axis:

library(scales)

EVT_spline$Day <- as.numeric(EVT_spline$Day)

EVT_sgraph2 <- ggplot(data = EVT_spline, aes(x = Day, y = EVT, group = Pots, color = Condition)) 
EVT_sgraph2 <- EVT_sgraph2 + geom_line(alpha = 0.1) + facet_grid(~Specie)
EVT_sgraph2 <- EVT_sgraph2 + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
EVT_sgraph2 <- EVT_sgraph2 + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
EVT_sgraph2 <- EVT_sgraph2 + labs(x = "Days of Experiment", y = "Evapotranspiration (g of H2O)") + scale_color_d3("category10") + theme(legend.position = "top")
EVT_sgraph2

library("dplyr")

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# Function to perform t-test and extract p-value
get_p_value <- function(EVT_spline) {
  t.test(EVT ~ Condition, data = EVT_spline)$p.value
}

# Calculate p-values for each genotype and timepoint
p_values <- EVT_spline %>%
  group_by(Day, Specie) %>%
  summarise(p_value = get_p_value(cur_data())) %>%
  ungroup()

## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `p_value = get_p_value(cur_data())`.
## ℹ In group 1: `Day = 2` `Specie = "S. cheesemaniae"`.
## Caused by warning:
## ! `cur_data()` was deprecated in dplyr 1.1.0.
## ℹ Please use `pick()` instead.

## `summarise()` has grouped output by 'Day'. You can override using the `.groups`
## argument.

# Display the p-values in a table
p_values$LOD <- -log10(p_values$p_value)
p_values

EVT_pplot <- ggplot(p_values, aes(x = Day, y = LOD)) 
EVT_pplot <- EVT_pplot + geom_line() + geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "red") 
EVT_pplot <- EVT_pplot + facet_wrap(~ Specie, ncol = 8)
EVT_pplot <- EVT_pplot + labs(x = "Days Of Experiment", y = "-log10(p-value)") 
EVT_pplot <- EVT_pplot + theme_minimal() + theme_bw() + theme(strip.text = element_blank(), strip.background = element_blank()) 
EVT_pplot

Now let’s add these two graphs together

library(cowplot)

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:ggpubr':
## 
##     get_legend

EVT_plot <- plot_grid(EVT_sgraph2, EVT_pplot, rel_heights = c(4,1), ncol = 1, align = "v", axis = "l")
EVT_plot

OK - let’s save it into PDF before I forget:

pdf("EVT_SmoothSplines_per_species_with_pvalues.pdf", height = 6, width = 12)
plot(EVT_plot)
dev.off()

## quartz_off_screen 
##                 2

Summary stats EVT

OK - before we move on - I would like to calculate some additional traits here - like AVERAGE EVT and RELATIVE EVT per species.

library(doBy)

## Warning: package 'doBy' was built under R version 4.3.3

## 
## Attaching package: 'doBy'

## The following object is masked from 'package:dplyr':
## 
##     order_by

EVT_sum <- summaryBy(EVT ~ Condition + Accession + Specie + Pots, data = EVT_spline)
EVT_sum

EVT_sum

EVT.mean_rank <- ggerrorplot(EVT_sum, x = "Specie", y = "EVT.mean", color = "Condition", 
                          fill = "Condition", facet.by = "Condition",
                          desc_stat = "mean_sd", add = "jitter", ncol = 2,
                          xlab="", ylab= "mean EVT (g H2O / day)", add.params = list(color = "darkgray")) +
                          scale_color_d3("category10") 
EVT.mean_rank <- EVT.mean_rank + stat_compare_means(method = "aov", label.y = 110)
EVT.mean_rank <- EVT.mean_rank + rremove("legend") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
EVT.mean_rank

OK - now let’s calculate the average per condition:

EVT_sum2 <- summaryBy(EVT.mean ~ Condition + Accession + Specie, data = EVT_sum)
EVT_sum2

Looks the same - so that’s fine.

sEVT_melt <- melt(EVT_sum2, id = c("Condition", "Accession", "Specie"))
sEVT_melt

csEVT <- dcast(sEVT_melt, Accession + Specie ~ Condition)
csEVT

csEVT$STI_EVT <- csEVT$Drought / csEVT$Control
csEVT$STI2_EVT <- csEVT$Drought / sqrt(csEVT$Control)
csEVT

EVT_STI <- ggdotchart(csEVT, x = "Specie", y = "STI_EVT",
           label = round(csEVT$STI_EVT,2),   
           ylab = "Relative EVT (fraction of Control)", xlab = "",
           sorting = "ascending",                        # Sort value in descending order
           add = "segments",                             # Add segments from y = 0 to dots
           ggtheme = theme_pubr()                        # ggplot2 theme
           )
EVT_STI

scatter_EVT_STI1 <- ggscatter(csEVT, x = "Control", y = "Drought", 
                              color = "STI_EVT",
                              ylab = "EVT at Drought", xlab = "EVT at Control"
                              ) + gradient_color(c("blue", "red"))
scatter_EVT_STI1

EVT_STI2 <- ggdotchart(csEVT, x = "Specie", y = "STI2_EVT",
           label = round(csEVT$STI2_EVT,2),   
           ylab = "STI2 EVT (a.u.)", xlab = "",
           sorting = "ascending",                        # Sort value in descending order
           add = "segments",                             # Add segments from y = 0 to dots
           ggtheme = theme_pubr()                        # ggplot2 theme
           )
EVT_STI2

scatter_EVT_STI2 <- ggscatter(csEVT, x = "Control", y = "Drought", 
                              color = "STI2_EVT",
                              ylab = "EVT at Drought", xlab = "EVT at Control"
                              ) + gradient_color(c("blue", "red"))
scatter_EVT_STI2

pdf("EVT_Summary_per_Accession.pdf", width = 7, height = 14)
mid_panel <- plot_grid(EVT_STI, scatter_EVT_STI1, ncol = 2, align = "h")
low_panel <- plot_grid(EVT_STI2, scatter_EVT_STI2, ncol = 2, align = "h")
plot_grid(EVT.mean_rank, mid_panel, low_panel,  ncol = 1, labels = "AUTO")
dev.off()

## quartz_off_screen 
##                 2

OK - so the values we would like to keep from this experiment are in csEVT

csEVT

sum_data <- csEVT
colnames(sum_data)[2] <- "Species"
colnames(sum_data)[3] <- "EVT.C"
colnames(sum_data)[4] <- "EVT.D"
colnames(sum_data)[5] <- "EVT.STI1"
colnames(sum_data)[6] <- "EVT.STI2"
sum_data

RGB data re-calculations

OK - now let’s do the same thing for RGB data:

RGB

First of all - I need to get digital biomass by adding area of all 7 sides together:

RGB_sum <- summaryBy(area_pixels ~ timestamp + Condition + Accession + Pot, RGB, FUN = sum)
RGB_sum

OK - first we also need to have a day column

# Let's make a Day collumn
for(i in 1:nrow(RGB_sum)){
  RGB_sum$Day[i] <- strsplit(RGB_sum$timestamp[i], "-")[[1]][1]
}
unique(RGB_sum$Day)

## [1] "2025.05.12" "2025.05.14" "2025.05.16" "2025.05.19" "2025.05.21"

now - let’s do the numeric rather than date”

RGB_sum$Day <- gsub("2025.05.12", 0, RGB_sum$Day)
RGB_sum$Day <- gsub("2025.05.14", 2, RGB_sum$Day)
RGB_sum$Day <- gsub("2025.05.16", 4, RGB_sum$Day)
RGB_sum$Day <- gsub("2025.05.19", 7, RGB_sum$Day)
RGB_sum$Day <- gsub("2025.05.21", 9, RGB_sum$Day)
RGB_sum$Day <- as.factor(as.numeric(as.character(RGB_sum$Day)))
RGB_sum$Condition <- gsub("Normal", "Control", RGB_sum$Condition)
RGB_sum

OK - now let’s plot the data

Area_lgraph <- ggplot(data = RGB_sum, aes(x = Day, y = area_pixels.sum, group = Pot, color = Condition)) 
Area_lgraph <- Area_lgraph + geom_line(alpha = 0.1)
Area_lgraph <- Area_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
Area_lgraph <- Area_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
Area_lgraph <- Area_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
Area_lgraph <- Area_lgraph + labs(x = "Days of Experiment", y = "Digital Biomass (a.u.)") + scale_color_d3("category10") + theme(legend.position = "bottom")
Area_lgraph

Hmm - there are still multiple plants on last day - let’s summarize them again:

RGB_sum2 <- summaryBy(area_pixels.sum ~ Condition + Accession + Pot + Day, RGB_sum, FUN = sum)
RGB_sum2

Now - let’s replot it again:

Area_lgraph <- ggplot(data = RGB_sum2, aes(x = Day, y = area_pixels.sum.sum, group = Pot, color = Condition)) 
Area_lgraph <- Area_lgraph + geom_line(alpha = 0.1)
Area_lgraph <- Area_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
Area_lgraph <- Area_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
Area_lgraph <- Area_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
Area_lgraph <- Area_lgraph + labs(x = "Days of Experiment", y = "Digital Biomass (a.u.)") + scale_color_d3("category10") + theme(legend.position = "bottom")
Area_lgraph

OK this is awesome - but the data is still a little choppy - let’s do the splines:

splines RGB

days <- seq(0, 9, by = 1)
#create a temporary subset that contains only rows where the trayID is the first unique one
temp <- subset(RGB_sum2, RGB_sum2$Pot == unique(RGB_sum2$Pot)[1])
#assign TOE as a numeric val
temp$Day <- as.numeric(as.character(temp$Day))
#plot spline for this first entry 
plot.spl <- with(temp, smooth.spline(Day, area_pixels.sum.sum, df = 3))

plot(area_pixels.sum.sum ~ Day, data = temp)

#actual vals
lines(plot.spl, col = "blue")
#predicted vals
lines(predict(plot.spl, days), col = "red")

same procedure to the rest of the trays

smooth out graph using spline for the rest of the trays

#names of columns to be used for plot 
names <- c("Pots", "Day", "Area")
spline_data <- data.frame(Pots = character(), Day = numeric(), Area = numeric(), stringsAsFactors = FALSE)

# Populate spline_data
pred_temp <- predict(plot.spl, days)
#append the first tray to the dataframe
spline_data <- data.frame(Pots = temp$Pot[1], Day = pred_temp$x, Area = pred_temp$y)
final_spline <- spline_data

#Do the same prediction for all plants
all_plants <- unique(RGB_sum2$Pot)

for (i in 2:length(all_plants)) {
  #iterate foe each based on tray.ID
  temp <- subset(RGB_sum2, RGB_sum2$Pot == unique(RGB_sum2$Pot)[i])
  temp$Day <- as.numeric(as.character(temp$Day))
  if (nrow(temp) > 4) {
    #if there are more datapoints that match the correct tray.ID
    #use timestamps corresponding to the data from subset 
    plot.spl <- with(temp, smooth.spline(Day, area_pixels.sum.sum, df = 3))
    pred_temp <- predict(plot.spl, days)
   
    spline_data <- data.frame(Pots = temp$Pot[1], Day = pred_temp$x, Area = pred_temp$y, stringsAsFactors = FALSE)
  } else {
    spline_data <- data.frame(
      #trayID is corresponding to the first trayID of i subset
      Pots = temp$Pot[1],
      Day = days,
      EVT = rep(0, length(days)),
      stringsAsFactors = FALSE
    )
  }
  #append the predictions onto final_spline
  final_spline <- rbind(final_spline, spline_data)
}

final_spline

now we have to decode what pot is what accession / species and treatment:

RGB_spline <- merge(decode, final_spline, by = "Pots", all = T)
RGB_spline <- na.omit(RGB_spline)
RGB_spline

Now let’s replot the smoothed data:

RGB_sgraph <- ggplot(data = RGB_spline, aes(x = Day, y = Area, group = Pots, color = Condition)) 
RGB_sgraph <- RGB_sgraph + geom_line(alpha = 0.1)
RGB_sgraph <- RGB_sgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
RGB_sgraph <- RGB_sgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
RGB_sgraph <- RGB_sgraph + labs(x = "Days of Experiment", y = "Digital Biomass (a.u.)") + scale_color_d3("category10") + theme(legend.position = "bottom")
RGB_sgraph

OK - now let’s split it per genotype too. Before - let’s correct the spelling as per Zangjun Fei’s suggestions:

unique(RGB_spline$Specie)

## [1] "S. lycopersicoides (B)" "S. sitiens"             "S. lycopersicoides"    
## [4] "S. habrochaites"        "S. corneliomulleri"     "S. neorickii"          
## [7] "S. pimpinellifolium"    "S. cheesmaniae"

RGB_spline$Specie <- gsub("S. cheesmaniae", "S. cheesemaniae", RGB_spline$Specie)
RGB_spline$Specie <- gsub("S. corneliomulleri" , "S. corneliomulerii", RGB_spline$Specie)

OK - now let’s replot:

RGB_sgraph2 <- ggplot(data = RGB_spline, aes(x = Day, y = Area, group = Pots, color = Condition)) 
RGB_sgraph2 <- RGB_sgraph2 + geom_line(alpha = 0.1) + facet_grid(~Specie)
RGB_sgraph2 <- RGB_sgraph2 + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
RGB_sgraph2 <- RGB_sgraph2 + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
RGB_sgraph2 <- RGB_sgraph2 + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F) 
RGB_sgraph2 <- RGB_sgraph2 + labs(x = "Days of Experiment", y = "Digital Biomass (a.u.)") + scale_color_d3("category10") + theme(legend.position = "top")
RGB_sgraph2

## Warning: Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 3.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle

## Warning: Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 5.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle

## Warning: Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 1.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle

## Warning: Computation failed in `stat_compare_means()`.
## Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 6.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle

## Warning: Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 5.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle

## Warning: Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 7.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle

## Warning: Computation failed in `stat_compare_means()`.
## Caused by error in `purrr::map()`:
## ℹ In index: 4.
## Caused by error in `.check_npc_coord()`:
## ! '*.npc coord for x axis should be either a numeric value in [0-1] or a character strings including one of right, left, center, centre, middle

OK - let’s do the p-value as a graph under the axis:

library(scales)

RGB_spline$Day <- as.numeric(RGB_spline$Day)

RGB_sgraph2 <- ggplot(data = RGB_spline, aes(x = Day, y = Area, group = Pots, color = Condition)) 
RGB_sgraph2 <- RGB_sgraph2 + geom_line(alpha = 0.1) + facet_grid(~Specie)
RGB_sgraph2 <- RGB_sgraph2 + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
RGB_sgraph2 <- RGB_sgraph2 + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
RGB_sgraph2 <- RGB_sgraph2 + labs(x = "Days of Experiment", y = "Digital Biomass (a.u.)") + scale_color_d3("category10") + theme(legend.position = "top")
RGB_sgraph2

library("dplyr")
# Function to perform t-test and extract p-value
get_p_value <- function(RGB_spline) {
  t.test(Area ~ Condition, data = RGB_spline)$p.value
}

# Calculate p-values for each genotype and timepoint
p_values <- RGB_spline %>%
  group_by(Day, Specie) %>%
  summarise(p_value = get_p_value(cur_data())) %>%
  ungroup()

## `summarise()` has grouped output by 'Day'. You can override using the `.groups`
## argument.

# Display the p-values in a table
p_values$LOD <- -log10(p_values$p_value)
p_values

RGB_pplot <- ggplot(p_values, aes(x = Day, y = LOD)) 
RGB_pplot <- RGB_pplot + geom_line() + geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "red") 
RGB_pplot <- RGB_pplot + facet_wrap(~ Specie, ncol = 8)
RGB_pplot <- RGB_pplot + labs(x = "Days Of Experiment", y = "-log10(p-value)") 
RGB_pplot <- RGB_pplot + theme_minimal() + theme_bw() + theme(strip.text = element_blank(), strip.background = element_blank()) 
RGB_pplot

Now let’s add these two graphs together

library(cowplot)
RGB_plot <- plot_grid(RGB_sgraph2, RGB_pplot, rel_heights = c(4,1), ncol = 1, align = "v", axis = "l")
RGB_plot

OK - let’s save it into PDF before I forget:

pdf("RGB_SmoothSplines_per_species_with_pvalues.pdf", height = 6, width = 12)
plot(RGB_plot)
dev.off()

## quartz_off_screen 
##                 2

Growth rate calculations

OK - now let’s calculate growth rate for each of the plants that we have imaged:

temp <- subset(RGB_spline, RGB_spline$Pots == unique(RGB_spline$Pots[1]))
temp$Area <- as.numeric(temp$Area)
temp$Day <- as.numeric(temp$Day)
# plot the data
plot(temp$Area ~ temp$Day) +
# Linear regression line is added to the scatterplot using the lm() function
abline(lm(temp$Area ~ temp$Day))

## integer(0)

this looks as if we have some kind of slowing down happening (probably due to occlusions) - let’s calculate the growth rate based on first 6 days - where we have a clear linear growth rate then:

temp <- subset(RGB_spline, RGB_spline$Pots == unique(RGB_spline$Pots[1]))
temp$Area <- as.numeric(temp$Area)
temp$Day <- as.numeric(temp$Day)
temp <- subset(temp, temp$Day < 6)
# plot the data
plot(temp$Area ~ temp$Day) +
# Linear regression line is added to the scatterplot using the lm() function
abline(lm(temp$Area ~ temp$Day))

## integer(0)

OK - looks much better fit to linear function. Let’s put the values of the lm model into a table

# Fit a linear regression model to predict 'length' based on 'hours' in the 'temp' data frame
model <- lm(temp$Area ~ temp$Day)
model

## 
## Call:
## lm(formula = temp$Area ~ temp$Day)
## 
## Coefficients:
## (Intercept)     temp$Day  
##      160975        17316

GR <- model$coefficients[2]
GR

## temp$Day 
## 17315.99

R2 <- summary(model)$r.squared
R2

## [1] 0.9913218

names <- c(text = "Pots", "Condition", "Species", "GR", "R2")

# Create an empty data frame 'growth_data' to store growth-related data
growth_data <- data.frame()

# Loop through each element 'k' in the 'names' vector
for (k in names) {
  # Create a new column in 'growth_data' with the name specified by 'k' and set as character type
  growth_data[[k]] <- as.character()
}
temp

# Assign the 'ID' value from the 'temp' data frame to the first row, first column of 'growth_data'
growth_data[1, 1] <- temp$Pots[1]

# Assign the 'Accession' value from the 'temp' data frame to the first row, second column of 'growth_data'
growth_data[1, 2] <- as.character(temp$Condition[1])

# Assign the 'Treatment' value from the 'temp' data frame to the first row, third column of 'growth_data'
growth_data[1, 3] <- temp$Specie[1]

# Assign the growth rate (GR) to the first row, fourth column of 'growth_data'
growth_data[1, 4] <- GR

# Assign the coefficient of determination (R-squared, R2) to the first row, fifth column of 'growth_data'
growth_data[1, 5] <- R2

growth_data

# Create vector 'all_plants' containing unique 'ID' values from 'final_spline'
all_plants <- unique(RGB_spline$Pots)

# Loop through each unique 'ID' in 'all_plants'
for (i in 1:length(all_plants)) {
  temp <- subset(RGB_spline, RGB_spline$Pots == all_plants[i])
  temp$Area <- as.numeric(temp$Area)
  temp$Day <- as.numeric(temp$Day)
  temp <- subset(temp, temp$Day < 6)
  # Fit a linear regression model to predict 'Area' based on 'day'
  model <- lm(temp$Area ~ temp$Day)

  # Extract the growth rate (GR) coefficient from the linear regression model
  GR <- model$coefficients[2]

  # Calculate the coefficient of determination (R-squared, R2) for the model
  R2 <- summary(model)$r.squared

  # Populate 'growth_data' with relevant information for the current 'ID'
  growth_data[i, 1] <- temp$Pots[1]
  growth_data[i, 2] <-  as.character(temp$Condition[1])
  growth_data[i, 3] <- temp$Specie[1]
  growth_data[i, 4] <- GR
  growth_data[i, 5] <- R2
}

growth_data

Let’s fo a little clean up:

# Convert the 'GR' and 'R2' columns in 'growth_data' to numeric
growth_data$GR <- as.numeric(as.character(growth_data$GR))
growth_data$R2 <- as.numeric(as.character(growth_data$R2))

growth_data_clean <- subset(growth_data, growth_data$R2 > 0.7)
growth_data_clean <- subset(growth_data_clean, growth_data_clean$GR > 0)
growth_data_clean

Summary stats Growth rate plotting

GR_overall <- ggerrorplot(growth_data_clean, x = "Species", y = "GR", color = "Condition", 
                          fill = "Condition", facet.by = "Condition",
                          desc_stat = "mean_sd", add = "jitter", ncol = 2,
                          xlab="", ylab= "Growth Rate (pixel / hour)", add.params = list(color = "darkgray")) +
                          scale_color_d3("category10") 
GR_overall <- GR_overall + stat_compare_means(method = "aov", label.y = 180000)
GR_overall <- GR_overall + rremove("legend") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
GR_overall

OK - now let’s calculate the Stress Tolerance Indices for each accession based on this data:

growth_data_clean

GR_sum <- summaryBy(GR  ~ Species + Condition, data = growth_data_clean)
colnames(GR_sum)[3] <- "GR"
GR_sum

csGR <- dcast(GR_sum, Species ~ Condition)

## Using GR as value column: use value.var to override.

csGR

Great - now that we have the data in the right format - let’s calculate two most interesting STIs

csGR$STI_GR <- csGR$Drought / csGR$Control
csGR$STI2_GR <- csGR$Drought / sqrt(csGR$Control)
csGR

GR_STI <- ggdotchart(csGR, x = "Species", y = "STI_GR",
           label = round(csGR$STI_GR,2),   
           ylab = "Relative GR (fraction of Control)", xlab = "",
           sorting = "ascending",                        # Sort value in descending order
           add = "segments",                             # Add segments from y = 0 to dots
           ggtheme = theme_pubr()                        # ggplot2 theme
           )
GR_STI

scatter_GR_STI1 <- ggscatter(csGR, x = "Control", y = "Drought", 
                              color = "STI_GR",
                              ylab = "GR at Drought", xlab = "GR at Control"
                              ) + gradient_color(c("blue", "red"))
scatter_GR_STI1

GR_STI2 <- ggdotchart(csGR, x = "Species", y = "STI2_GR",
           label = round(csGR$STI2_GR,2),   
           ylab = "STI2 GR (a.u.)", xlab = "",
           sorting = "ascending",                        # Sort value in descending order
           add = "segments",                             # Add segments from y = 0 to dots
           ggtheme = theme_pubr()                        # ggplot2 theme
           )
GR_STI2

scatter_GR_STI2 <- ggscatter(csGR, x = "Control", y = "Drought", 
                              color = "STI2_GR",
                              ylab = "GR at Drought", xlab = "GR at Control"
                              ) + gradient_color(c("blue", "red"))
scatter_GR_STI2

pdf("GR_Summary_per_Accession.pdf", width = 7, height = 14)
mid_panel <- plot_grid(GR_STI, scatter_GR_STI1, ncol = 2, align = "h")
low_panel <- plot_grid(GR_STI2, scatter_GR_STI2, ncol = 2, align = "h")
plot_grid(GR_overall, mid_panel, low_panel,  ncol = 1, labels = "AUTO")
dev.off()

## quartz_off_screen 
##                 2

Now - let’s add growth data to the summary data too:

colnames(csGR)[2] <- "GR.C"
colnames(csGR)[3] <- "GR.D"
colnames(csGR)[4] <- "GR.STI1"
colnames(csGR)[5] <- "GR.STI2"
csGR

sum_data

sum_data2 <- merge(sum_data, csGR, by = "Species")
sum_data2

Let’s move on to PS data for now.

PS2 data

PS

OK - so for this I would like to first see how it looks throughout time - so I need to add Day rather than Date

unique(PS$Date)

## [1] 20250514 20250519 20250523

OK - so 14th is Day 2 - 19th is Day 7 - and 23rd is Day 11. Let’s add that info into the table:

PS$Day <- PS$Date
PS$Day <- gsub("20250514", 2, PS$Day)
PS$Day <- gsub("20250519", 7, PS$Day)
PS$Day <- gsub("20250523", 11, PS$Day)
PS

Let’s remove unnecessary info:

PS1 <- PS[,c(1, 22, 4:5,9:10, 12:21)]
PS1$Condition <- gsub("Normal", "Control", PS1$Condition)
PS1$Day <- as.factor(as.numeric(PS1$Day))
PS1

Cool - let’s now have a look at how individual traits change throughout time and drought exposure:

FvFm_lgraph <- ggplot(data = PS1, aes(x = Day, y = Fv.Fm, group = PlantID, color = Condition)) 
FvFm_lgraph <- FvFm_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
FvFm_lgraph <- FvFm_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
FvFm_lgraph <- FvFm_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
FvFm_lgraph <- FvFm_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
FvFm_lgraph <- FvFm_lgraph + labs(x = "Days of Experiment", y = "Max Quantum Yield (Fv/Fm)") + scale_color_d3("category10") + theme(legend.position = "top")
FvFm_lgraph

FqFm_lgraph <- ggplot(data = PS1, aes(x = Day, y = Fq..Fm., group = PlantID, color = Condition)) 
FqFm_lgraph <- FqFm_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
FqFm_lgraph <- FqFm_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
FqFm_lgraph <- FqFm_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
FqFm_lgraph <- FqFm_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
FqFm_lgraph <- FqFm_lgraph + labs(x = "Days of Experiment", y = "Quantum Yield (Fv'/Fm')") + scale_color_d3("category10") + theme(legend.position = "top")
FqFm_lgraph

qP_lgraph <- ggplot(data = PS1, aes(x = Day, y = qP, group = PlantID, color = Condition)) 
qP_lgraph <- qP_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
qP_lgraph <- qP_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
qP_lgraph <- qP_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
qP_lgraph <- qP_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
qP_lgraph <- qP_lgraph + labs(x = "Days of Experiment", y = "Reaction Open Centers / photochemical quenching puddle mode (qP)") + scale_color_d3("category10") + theme(legend.position = "top")
qP_lgraph

qN_lgraph <- ggplot(data = PS1, aes(x = Day, y = qN, group = PlantID, color = Condition)) 
qN_lgraph <- qN_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
qN_lgraph <- qN_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
qN_lgraph <- qN_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
qN_lgraph <- qN_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
qN_lgraph <- qN_lgraph + labs(x = "Days of Experiment", y = "Non-photochemical Quenching (qN)") + scale_color_d3("category10") + theme(legend.position = "top")
qN_lgraph

qL_lgraph <- ggplot(data = PS1, aes(x = Day, y = qL, group = PlantID, color = Condition)) 
qL_lgraph <- qL_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
qL_lgraph <- qL_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
qL_lgraph <- qL_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
qL_lgraph <- qL_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
qL_lgraph <- qL_lgraph + labs(x = "Days of Experiment", y = "Reaction Open Centers / photochemical quenching lake mode (qL)") + scale_color_d3("category10") + theme(legend.position = "top")
qL_lgraph

qI_lgraph <- ggplot(data = PS1, aes(x = Day, y = qI, group = PlantID, color = Condition)) 
qI_lgraph <- qI_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
qI_lgraph <- qI_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
qI_lgraph <- qI_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
qI_lgraph <- qI_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed") 
qI_lgraph <- qI_lgraph + labs(x = "Days of Experiment", y = "slow relaxing NPQ (qI)") + scale_color_d3("category10") + theme(legend.position = "top")
qI_lgraph

Dno_lgraph <- ggplot(data = PS1, aes(x = Day, y = D.no, group = PlantID, color = Condition)) 
Dno_lgraph <- Dno_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
Dno_lgraph <- Dno_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
Dno_lgraph <- Dno_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
Dno_lgraph <- Dno_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
Dno_lgraph <- Dno_lgraph + labs(x = "Days of Experiment", y = "QY of non-regulated energy dissipation (phiNO)") + scale_color_d3("category10") + theme(legend.position = "top")
Dno_lgraph

Dnpq_lgraph <- ggplot(data = PS1, aes(x = Day, y = D.npq, group = PlantID, color = Condition)) 
Dnpq_lgraph <- Dnpq_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
Dnpq_lgraph <- Dnpq_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
Dnpq_lgraph <- Dnpq_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
Dnpq_lgraph <- Dnpq_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
Dnpq_lgraph <- Dnpq_lgraph + labs(x = "Days of Experiment", y = "QY of NPQ (phiNPQ)") + scale_color_d3("category10") + theme(legend.position = "top")
Dnpq_lgraph

npqt_lgraph <- ggplot(data = PS1, aes(x = Day, y = npq.t., group = PlantID, color = Condition)) 
npqt_lgraph <- npqt_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
npqt_lgraph <- npqt_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
npqt_lgraph <- npqt_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
npqt_lgraph <- npqt_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
npqt_lgraph <- npqt_lgraph + labs(x = "Days of Experiment", y = "Fast NPQ (npq(t))") + scale_color_d3("category10") + theme(legend.position = "top")
npqt_lgraph

ChlIdx_lgraph <- ggplot(data = PS1, aes(x = Day, y = ChlIdx, group = PlantID, color = Condition)) 
ChlIdx_lgraph <- ChlIdx_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
ChlIdx_lgraph <- ChlIdx_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
ChlIdx_lgraph <- ChlIdx_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
ChlIdx_lgraph <- ChlIdx_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
ChlIdx_lgraph <- ChlIdx_lgraph + labs(x = "Days of Experiment", y = "Chlorophyll Index (ChlIdx)") + scale_color_d3("category10") + theme(legend.position = "top")
ChlIdx_lgraph

AriIdx_lgraph <- ggplot(data = PS1, aes(x = Day, y = AriIdx, group = PlantID, color = Condition)) 
AriIdx_lgraph <- AriIdx_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
AriIdx_lgraph <- AriIdx_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
AriIdx_lgraph <- AriIdx_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
AriIdx_lgraph <- AriIdx_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
AriIdx_lgraph <- AriIdx_lgraph + labs(x = "Days of Experiment", y = "Anthocyanin Index (AriIdx)") + scale_color_d3("category10") + theme(legend.position = "top")
AriIdx_lgraph

NDVI_lgraph <- ggplot(data = PS1, aes(x = Day, y = NDVI, group = PlantID, color = Condition)) 
NDVI_lgraph <- NDVI_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
NDVI_lgraph <- NDVI_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
NDVI_lgraph <- NDVI_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
NDVI_lgraph <- NDVI_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
NDVI_lgraph <- NDVI_lgraph + labs(x = "Days of Experiment", y = "Normalized Difference Vegetation Index (NDVI)") + scale_color_d3("category10") + theme(legend.position = "top")
NDVI_lgraph

npqt_lgraph <- ggplot(data = PS1, aes(x = Day, y = npq.t., group = PlantID, color = Condition)) 
npqt_lgraph <- npqt_lgraph + geom_line(alpha = 0.1) + facet_grid(~Specie)
npqt_lgraph <- npqt_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) 
npqt_lgraph <- npqt_lgraph + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = F)
npqt_lgraph <- npqt_lgraph + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "dashed")
npqt_lgraph <- npqt_lgraph + labs(x = "Days of Experiment", y = "Fast NPQ (npq(t))") + scale_color_d3("category10") + theme(legend.position = "top")
npqt_lgraph

OK let’s save this into a file:

pdf("PhenoSight_Photosynthesis_per_Species.pdf", width = 12, height = 10)
plot_grid(FvFm_lgraph,
          FqFm_lgraph,
          npqt_lgraph, ncol = 1, labels = "AUTO"
          )
dev.off()

## quartz_off_screen 
##                 2

pdf("PhenoSight_Pigments_per_Species.pdf", width = 12, height = 10)
plot_grid(ChlIdx_lgraph,
          AriIdx_lgraph,
          NDVI_lgraph, ncol = 1, labels = "AUTO")
dev.off()

## quartz_off_screen 
##                 2

OK - so let’s add all of the info to summary file as well - but first let’s reshape it and summarize the data per species:

PS1

sPS1 <- summaryBy(Fv.Fm + Fq..Fm. + qP + qN + qL + qI + D.no + D.npq + npq.t. + ChlIdx + AriIdx + NDVI ~ Specie + Condition + Day, data = PS1)
sPS1

Now - I d like to have all values per day and conditions too:

msPS1 <- melt(sPS1, id = c("Specie", "Condition", "Day"))
msPS1$variable <- gsub(".mean", "", msPS1$variable)
msPS1

now let’s cast it:

csPS1 <- dcast(msPS1, Specie ~ variable + Condition + Day)
csPS1

Are some of these traits correlated? Let’s see:

library(ggcorrplot)
mPS1 <- as.matrix(csPS1[2:73])
corr <- cor(mPS1)
head(corr[, 1:6])

##                   AriIdx_Control_2 AriIdx_Control_7 AriIdx_Control_11
## AriIdx_Control_2         1.0000000        0.5316530         0.9231154
## AriIdx_Control_7         0.5316530        1.0000000         0.7873112
## AriIdx_Control_11        0.9231154        0.7873112         1.0000000
## AriIdx_Drought_2         0.9603381        0.5719991         0.9336097
## AriIdx_Drought_7         0.6508832        0.7130012         0.7533033
## AriIdx_Drought_11        0.9632113        0.6770105         0.9559352
##                   AriIdx_Drought_2 AriIdx_Drought_7 AriIdx_Drought_11
## AriIdx_Control_2         0.9603381        0.6508832         0.9632113
## AriIdx_Control_7         0.5719991        0.7130012         0.6770105
## AriIdx_Control_11        0.9336097        0.7533033         0.9559352
## AriIdx_Drought_2         1.0000000        0.7185075         0.9247256
## AriIdx_Drought_7         0.7185075        1.0000000         0.6640674
## AriIdx_Drought_11        0.9247256        0.6640674         1.0000000

p.mat <- cor_pmat(mPS1)

pdf("PhenoSight_Correlation.pdf", width = 18, height = 18)
ggcorrplot(
  corr,
  p.mat = p.mat,
  hc.order = FALSE,
  type = "lower",
  insig = "blank"
)
dev.off()

## quartz_off_screen 
##                 2

now let’s calculate the STIs for the few most interesting traits:

AriIdx npq(t) Fv/Fm Fq/Fm

csPS1$AriIdx_STI_2 <-csPS1$AriIdx_Drought_2 / csPS1$AriIdx_Control_2
csPS1$AriIdx_STI_7 <-csPS1$AriIdx_Drought_7 / csPS1$AriIdx_Control_7
csPS1$AriIdx_STI_11 <-csPS1$AriIdx_Drought_11 / csPS1$AriIdx_Control_11

csPS1$npqt_STI_2 <-csPS1$npq.t._Drought_2 / csPS1$npq.t._Control_2
csPS1$npqt_STI_7 <-csPS1$npq.t._Drought_7 / csPS1$npq.t._Control_7
csPS1$npqt_STI_11 <-csPS1$npq.t._Drought_11 / csPS1$npq.t._Control_11

csPS1$FvFm_STI_2 <-csPS1$Fv.Fm_Drought_2 / csPS1$Fv.Fm_Control_2
csPS1$FvFm_STI_7 <-csPS1$Fv.Fm_Drought_7 / csPS1$Fv.Fm_Control_7
csPS1$FvFm_STI_11 <-csPS1$Fv.Fm_Drought_11 / csPS1$Fv.Fm_Control_11

csPS1$FqFm_STI_2 <-csPS1$Fq..Fm._Drought_2 / csPS1$Fq..Fm._Control_2
csPS1$FqFm_STI_7 <-csPS1$Fq..Fm._Drought_7 / csPS1$Fq..Fm._Control_7
csPS1$FqFm_STI_11 <-csPS1$Fq..Fm._Drought_11 / csPS1$Fq..Fm._Control_11

csPS1

OK - now let’s have a look at how STI for AriIdx are changing throughout the experiment for each genotype studied:

temp <- csPS1[,c(1,74:85)]
temp

temp2 <- melt(temp, id = "Specie")
temp2$Day <- temp2$variable
temp2$Day <- gsub("AriIdx_STI_","", temp2$Day)
temp2$Day <- gsub("npqt_STI_","", temp2$Day)
temp2$Day <- gsub("FvFm_STI_","", temp2$Day)
temp2$Day <- gsub("FqFm_STI_","", temp2$Day)
temp2$Day <- as.numeric(temp2$Day)
temp2$variable <- gsub("_2","", temp2$variable)
temp2$variable <- gsub("_7","", temp2$variable)
temp2$variable <- gsub("_11","", temp2$variable)
temp2

library(tidyverse)

## Warning: package 'purrr' was built under R version 4.3.3

## Warning: package 'lubridate' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ✔ readr     2.1.5     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::discard()    masks scales::discard()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ doBy::order_by()    masks dplyr::order_by()
## ✖ lubridate::stamp()  masks cowplot::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

STI_graph <- ggerrorplot(temp2, x = "Specie", y = "value", color = "Specie", fill = "Specie",
                          desc_stat = "mean_sd", add = "jitter", ncol = 2, facet.by = "variable",
                          xlab="", ylab= "STI (D/C)", add.params = list(color = "darkgray"))  
STI_graph <- STI_graph + color_palette("aaas") + stat_compare_means(method = "aov") + ylim(0, 1.5)
STI_graph <- STI_graph + rremove("legend") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
STI_graph

Let’s save it to show that for these traits the variation in STI is not as big as for EVT or GR traits:

pdf("PhenoSight_STI.pdf", width = 10, height = 10)
plot(STI_graph)
dev.off()

## quartz_off_screen 
##                 2

OK - let’s reduce the PS data to just few relevant values for now:

PS_sum <- summaryBy(value ~ Specie + variable, data = temp2)
PS_sum2 <- dcast(PS_sum, Specie ~ variable)

## Using value.mean as value column: use value.var to override.

colnames(PS_sum2)[1] <- "Species"
unique(PS_sum2$Species)

## [1] "S. cheesmaniae"         "S. corneliomulleri"     "S. habrochaites"       
## [4] "S. lycopersicoides"     "S. lycopersicoides (B)" "S. neorickii"          
## [7] "S. pimpinellifolium"    "S. sitiens"

PS_sum2$Species <- gsub("S. cheesmaniae", "S. cheesemaniae", PS_sum2$Species)
PS_sum2$Species <- gsub("S. corneliomulleri", "S. corneliomulerii", PS_sum2$Species)
PS_sum2

now - let’s merge it with other summary data from EVT and GR:

sum_data3 <- merge(sum_data2, PS_sum2, by = "Species", all = T)
sum_data3

Accession ranking example

NOW - how do we decide what species is tolerant and what is susceptible to drought:

sum_matrix <- as.matrix(sum_data3[,c(6,10:12)])
rownames(sum_matrix) <- sum_data3$Species
heatmap(sum_matrix, scale = "column")

library("pheatmap")

## Warning: package 'pheatmap' was built under R version 4.3.3

pdf("Accession_ranking_STI_based_three_groups.pdf")
pheatmap(sum_matrix, scale = "column", cutree_rows = 3)

dev.off()

## pdf 
##   3

FFAR Pilot experiment - analysis of all data