Overview

This notebook combines the data analysis from two experiments - cowpea and tepary pilot experiments performed by Hayley Sussman and Aparna Srinivasan. The experiments were first analyzed by the respective scientists, and this notebook combines this analysis into one, as a preparation of data for the figure. The analysis was performed by Magdalena Julkowska.

###Load necessary libraries

library(ggplot2)
library(cowplot)
library(tidyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(stringr)
library(ggpubr)

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:cowplot':
## 
##     get_legend

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(ggsci)
library(ggbeeswarm)
library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

library(reshape)

## 
## Attaching package: 'reshape'

## The following objects are masked from 'package:reshape2':
## 
##     colsplit, melt, recast

## The following object is masked from 'package:plotly':
## 
##     rename

## The following object is masked from 'package:dplyr':
## 
##     rename

## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths

## The following object is masked from 'package:cowplot':
## 
##     stamp

library(multcompView)
library(doBy)

## 
## Attaching package: 'doBy'

## The following object is masked from 'package:dplyr':
## 
##     order_by

library(readxl)

Cowpea experiment

The original experiment analysis was performed by Hayley Sussman on 2022/01/06 to examine and fix the side view image analysis for the Fall 2021 Cowpea experiment. When Li’ang initially made the graphs, many pots’ leaf area fluctuated, so we had to scan through the images to find irregularities. What was found: remove the following time stamps (details in LabArchives): 2021.10.22-13-15.22.11 (empty images) 2021.10.13-15.46.26 (hand in image) 2021.10.20-14.33.44 (repeat of pot #51) 2021.10.22-13.40.34 (empty images) Two pots were skipped on two different days, so I want to make sure the timestamps are correctly aligned: pot 99 should be 2021.10.20-12.00.36 pot 27 should be 2021.10.22-14.07.45

###Load and Organize Data

In Excel, I removed the necessary time stamps and added a column called “Pot.correct” to ensure all pot numbers are correctly aligned

d11 <- read.csv("2021.10.11.clean.csv")
d13 <- read.csv("2021.10.13.clean_2.csv")
d15 <- read.csv("2021.10.15.clean.csv")
d18 <- read.csv("2021.10.18.clean.csv")
d20 <- read.csv("2021.10.20.clean_2.csv")
d22 <- read.csv("2021.10.22.clean_2.csv")
d25 <- read.csv("2021.10.25.clean.csv")
pot_geno <- read.csv("cowpea_pot_geno_HS_new.csv")
Weight <- read.csv("final_weight_metatable.csv")
Map <- Weight[,c(1:3)]
Map

#merge all files with rbind
all_days <- rbind(d11, d13, d15, d18, d20, d22, d25)
all_days

#merge with pot number/geno file
# combined_days_geno <- merge(all_days, pot_geno, all = TRUE)
# combined_days_geno

# split column X, which contains the remainder of the timestamp that wasn't split off in excel
all_days <- all_days %>% separate(X, c( "sec","Raspi","side","camera"))
all_days

#order by "year"-"month"-"date"-"hour"-"min"-"sec"-"side"
all_days <- all_days[
  order(all_days[,"year"], all_days[,"month"], all_days[,"date"], all_days[,"hour"], all_days[,"min"], all_days[,"sec"], all_days[,"side"]),]
all_days

#Build a consensus index 
all_days$index <- paste0(all_days$year, ".", all_days$month,".", all_days$date, "-", all_days$hour, ".", all_days$min, ".", all_days$sec)
Raspi_clean <- all_days[,c("area","year", "month", "date","hour","min", "sec","index")]
Raspi_clean

write.csv(Raspi_clean, "Raspi_clean.csv", row.names = F)

###Group images based on consensus time stamp

### Calculate the total area/mean area for each time stamp
Raspi_clean <- Raspi_clean %>% group_by(index) %>% mutate(side.counts = n())
Raspi_clean <- Raspi_clean %>% group_by(index) %>% mutate(area.mean = ave(area))
Raspi_clean <- Raspi_clean %>% group_by(index) %>% mutate(area.total = sum(area))
#nrow(Raspi_clean)
Raspi_unique <- unique(Raspi_clean[,c("month", "date", "index","side.counts","area.mean","area.total")])
Raspi_unique

write.csv(Raspi_unique, "raspi_clean_combined.csv", row.names = F)

#Merge map with image data

Date_list <- unique(na.omit(Raspi_unique$date))

# day 13
Map_new <- subset(Map, Map$POT != 22)
dim(Map_new)

## [1] 104   3

#day 20
Map_new2 <- subset(Map, Map$POT != 98)
#day 22
Map_new3 <- subset(Map, Map$POT != 26)

# day 11
i=1
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- sum.df

# day 13
i=2
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map_new)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- rbind(data_all, sum.df)
data_all

# day 15
i=3
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- rbind(data_all, sum.df)

# day 18
i=4
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- rbind(data_all, sum.df)

# day 20
i=5
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map_new2)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- rbind(data_all, sum.df)

# day 22
i=6
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map_new3)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- rbind(data_all, sum.df)

# day 25
i=7
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- rbind(data_all, sum.df)
data_all

Raspi_summary <- data_all
Raspi_summary

write.csv(Raspi_summary,"all_summary.csv", row.names = F, quote = F )

#Change date to days after stress to align with other analysis
Raspi_summary$date <- gsub("11", "-3", Raspi_summary$date)
Raspi_summary$date <- gsub("13", "-1", Raspi_summary$date)
Raspi_summary$date <- gsub("15", "1", Raspi_summary$date)
Raspi_summary$date <- gsub("18", "4", Raspi_summary$date)
Raspi_summary$date <- gsub("20", "7", Raspi_summary$date)
Raspi_summary$date <- gsub("22", "9", Raspi_summary$date)
Raspi_summary$date <- gsub("25", "12", Raspi_summary$date)
Raspi_summary$date <- factor(Raspi_summary$date, levels = c("-3", "-1", "1", "4", "7", "9", "12"))

#Edit the names of the genotypes that are incorrect
Raspi_summary$Genotype <- gsub("IT97IC", "IT97K-499", Raspi_summary$Genotype)
Raspi_summary$Genotype <- gsub("UCR7", "UCR779", Raspi_summary$Genotype)

Raspi_summary <- subset(Raspi_summary, Raspi_summary$Condition != "Salt")

Smoothinng the data using splines

Occasionally, the data from Raspberry Pi is a bit noisy - and we do not expect the Area to fluctuate. In order to smooth the data, we will apply smooth spline function, that will not only smooth the observed increase in shoot area, but also will provide us data points for additional days in between the measurements, allowing us to beter characterize when the significant difference in shoot area is occuring at higher spatial resolution.

days <- -3:12

temp <- subset(Raspi_summary, Raspi_summary$POT == unique(Raspi_summary$POT)[1])
temp$date <- as.numeric(as.character(temp$date))
temp$date

## [1] -3 -1  1  4  7  9 12

plot.spl <- with(temp, smooth.spline(date, area.total, df = 4))
plot(area.total ~ date, data = temp)
lines(plot.spl, col = "blue")
lines(predict(plot.spl, days), col="red")

# keep all of the sline-fitted data:

names <- c(text="POT", "genotype", "treatment", "day", "Area.SUM")
spline_data <- data.frame()
for (k in names) spline_data[[k]] <- as.character()
spline_data

pred_temp <- predict(plot.spl, days)
pred_temp

## $x
##  [1] -3 -2 -1  0  1  2  3  4  5  6  7  8  9 10 11 12
## 
## $y
##  [1]  660393.6  880551.2 1089640.2 1278325.8 1444322.9 1588527.1 1717509.0
##  [8] 1839258.1 1959807.2 2077364.4 2188181.7 2290884.4 2393592.4 2504465.6
## [15] 2622329.8 2743677.2

# make empty dataframe
spline_data[1:16,4] <- pred_temp$x
spline_data[1:16,5] <- pred_temp$y
spline_data[1:16,1] <- temp$POT[1]
spline_data[1:16,2] <- temp$Genotype[1]
spline_data[1:16,3] <- temp$Condition[1]

spline_data

final_spline <- spline_data
all_plants <- unique(Raspi_summary$POT)
length(all_plants)

## [1] 70

for(i in 2:70){
  temp <- subset(Raspi_summary, Raspi_summary$POT == all_plants[i])
  if(dim(temp)[1]>4){
    plot.spl <- with(temp, smooth.spline(date, area.total, df = 4))
    pred_temp <- predict(plot.spl, days)
    spline_data[1:16,4] <- pred_temp$x
    spline_data[1:16,5] <- pred_temp$y
    spline_data[1:16,1] <- temp$POT[1]
    spline_data[1:16,2] <- temp$Genotype[1]
    spline_data[1:16,3] <- temp$Condition[1]
    final_spline <- rbind(final_spline, spline_data)
  }
  else{
    spline_data[1:16,4] <- days
    spline_data[1:16,5] <- 0
    spline_data[1:16,1] <- temp$POT[1]
    spline_data[1:16,2] <- temp$Genotype[1]
    spline_data[1:16,3] <- temp$Condition[1]
  }
}

final_spline

###plot spline smoothed leaf area across all reps per genotype

final_spline$Area.SUM <- as.numeric(as.character(final_spline$Area.SUM))
final_spline$day <- factor(final_spline$day, levels = c("-3", "-2", "-1", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))
bye <- c(45, 70, 35, 15, 12, 25, 21, 64, 55, 44, 69, 53, 52)
final_spline <- subset(final_spline, !(final_spline$POT %in% bye))

control_v_drought_area <- ggplot(data = final_spline, aes(x = day, y = Area.SUM, color = treatment, group = POT)) + theme_classic()
control_v_drought_area <- control_v_drought_area + geom_line(alpha = 0.1)
control_v_drought_area <- control_v_drought_area + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = treatment), alpha = 0.3)
control_v_drought_area <- control_v_drought_area + stat_summary(fun = mean, aes(group = treatment), size = 0.9, geom = "line", linetype = "solid")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

control_v_drought_area <- control_v_drought_area + stat_compare_means(aes(group = treatment), label = "p.signif", method = "t.test", hide.ns = T)
control_v_drought_area <- control_v_drought_area + scale_color_aaas()  
control_v_drought_area <- control_v_drought_area + xlab("Days After Stress") + ylab("Shoot Area (pixels)") + theme(legend.position="top")
control_v_drought_area

## Warning: The dot-dot notation (`..p.signif..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(p.signif)` instead.

ggplotly(control_v_drought_area)

Growth rate calculations

We need to fit growth one plant at the time:

#Subset data to only include days that are after stress (positive)
final_spline

DAS <- subset(final_spline, final_spline$day != -3)
DAS <- subset(DAS, DAS$day != -2)
After_stress_only <- subset(DAS, DAS$day != -1)
temp <- subset(After_stress_only, After_stress_only$POT == 1)
temp

plot(temp$Area.SUM ~as.numeric(as.character(temp$day))) + abline(lm(temp$Area.SUM ~ as.numeric(as.character(temp$day))))

## integer(0)

model <- lm(temp$Area.SUM ~ as.numeric(as.character(temp$day)))
summary(model)

## 
## Call:
## lm(formula = temp$Area.SUM ~ as.numeric(as.character(temp$day)))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -62558 -15169   4692  22135  28106 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         1340884      14478   92.61  < 2e-16 ***
## as.numeric(as.character(temp$day))   118164       2048   57.71 5.22e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27620 on 11 degrees of freedom
## Multiple R-squared:  0.9967, Adjusted R-squared:  0.9964 
## F-statistic:  3331 on 1 and 11 DF,  p-value: 5.223e-15

summary(model)$coefficients[2]

## [1] 118163.6

So now - we need to assign this value to a POT # 01 - let’s repurpose Map for this:

Map$GrowthRate <- 0
Map$GrowthRate[1] <- summary(model)$coefficients[2]
Map <- subset(Map, Map$Condition != "Salt")

Cool - let’s loop it!

for (i in 1:70){
  temp <- subset(After_stress_only, After_stress_only$POT == i)
  if(dim(temp)[1]>0){
    model <- lm(temp$Area.SUM ~ as.numeric(as.character(temp$day)))
    Map$GrowthRate[i] <- summary(model)$coefficients[2]  
  }else{
    Map$GrowthRate[i] <- 0
  }
  
}

dim(temp)

## [1] 0 5

Map$Genotype <- gsub("UCR7", "UCR779", Map$Genotype)
Map$Genotype <- gsub("IT97IC", "IT97K-499", Map$Genotype)
Map <- subset(Map, Map$GrowthRate > 0)

library(ggbeeswarm)
Map$Genotype <- gsub("-", ".", Map$Genotype)

Growth_CP_plot <- ggplot(data = Map, mapping = aes(x = Genotype, y = GrowthRate, colour = Genotype)) 
Growth_CP_plot <- Growth_CP_plot + geom_beeswarm(alpha=0.6, priority = "density")
Growth_CP_plot <- Growth_CP_plot + facet_wrap(~ Condition) + theme_classic()
Growth_CP_plot <- Growth_CP_plot + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")

## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.

Growth_CP_plot <- Growth_CP_plot + theme(legend.position = "none")
Growth_CP_plot <- Growth_CP_plot + xlab("") + ylab("Growth Rate (Pix / day)")
Growth_CP_plot <- Growth_CP_plot + theme(axis.text.x = element_text(angle=90, vjust=0.5))
Growth_CP_plot <- Growth_CP_plot + stat_compare_means(method = "aov") + scale_color_jco()  
Growth_CP_plot

This is great - but I d like to have Tukey HSD letters there too. Lets try something:

MapC <- subset(Map, Map$Condition == "Control")
MapD <- subset(Map, Map$Condition == "Drought")

aov(GrowthRate ~ Genotype, data = MapC)

## Call:
##    aov(formula = GrowthRate ~ Genotype, data = MapC)
## 
## Terms:
##                     Genotype    Residuals
## Sum of Squares  115012499491 128325868768
## Deg. of Freedom            4           25
## 
## Residual standard error: 71645.2
## Estimated effects may be unbalanced

Output <- TukeyHSD(aov(GrowthRate ~ Genotype, data = MapC))
P7 = Output$Genotype[,'p adj']
stat.test<- multcompLetters(P7)
stat.test

## IT97K.499     Sanzi  Suvita.2    UCR779     CB5.2 
##       "a"       "a"       "b"       "a"       "a"

test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
test$Condition <- "Control"
colnames(test)[1] <- "Tukey"
test

aov(GrowthRate ~ Genotype, data = MapD)

## Call:
##    aov(formula = GrowthRate ~ Genotype, data = MapD)
## 
## Terms:
##                    Genotype   Residuals
## Sum of Squares  29598072124 56946042694
## Deg. of Freedom           4          22
## 
## Residual standard error: 50876.88
## Estimated effects may be unbalanced

Output <- TukeyHSD(aov(GrowthRate ~ Genotype, data = MapD))
P7 = Output$Genotype[,'p adj']
stat.test<- multcompLetters(P7)
testD <- as.data.frame(stat.test$Letters)
testD$group1 <- rownames(testD)
testD$group2 <- rownames(testD)
testD$Condition <- "Drought"
colnames(testD)[1] <- "Tukey"
testD

test <- rbind(test, testD)
test

Growth_CP_plot <- ggplot(data = Map, mapping = aes(x = Genotype, y = GrowthRate, colour = Genotype)) 
Growth_CP_plot <- Growth_CP_plot + geom_beeswarm(alpha=0.6, priority = "density")
Growth_CP_plot <- Growth_CP_plot + facet_wrap(~ Condition) + theme_classic()
Growth_CP_plot <- Growth_CP_plot + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
Growth_CP_plot <- Growth_CP_plot + theme(legend.position = "none")
Growth_CP_plot <- Growth_CP_plot + xlab("") + ylab("Growth Rate (Pix / day)")
Growth_CP_plot <- Growth_CP_plot + theme(axis.text.x = element_text(angle=90, vjust=0.5))
Growth_CP_plot <- Growth_CP_plot + stat_compare_means(method = "aov", label.y = 5.5e+5)
Growth_CP_plot <- Growth_CP_plot + stat_pvalue_manual(test, label = "Tukey", y.position = 5e+5) + scale_color_jco()
Growth_CP_plot

###Make Fresh weight vs. projected shoot area comparison

How well can we predict plant weight from 7 side view images?

#Combined two datasets for the last date of experiment
meta <- cbind(Weight, Raspi_unique[Raspi_unique$date == 25,])
meta2 <- subset(meta, meta$Condition != "Salt")

FW_Area_CP <- ggscatter(meta2, x = "area.total", y = "FW", color = "Condition", rug = TRUE) + stat_cor() + scale_color_aaas() + theme(legend.position = "none") + xlab("Shoot Area (pixels)") + ylab("Fresh Weigh (g)")
FW_Area_CP

Plot evapotranspiration rate

OK - last but not least - evapotranspiration! This data was collected over a different experiment, but using the same genotypes and slightly different time line.

EVT <- read.csv("2_20220516_Cowpea_Metabolomic_Drought.csv")
EVT$Pot <- 1:60
EVT

let’s plot it quickly:

library(reshape2)
EVTm <- melt(EVT, id = c("Treatment", "Genotype", "Pot"))
colnames(EVTm)[4] <- "day"
colnames(EVTm)[5] <- "EVT"
EVTm$day <- gsub("d", "", EVTm$day)
EVTm$day <- as.numeric(as.character(EVTm$day))
EVTm$day <- as.factor(EVTm$day)
EVTm

EVTm <- subset(EVTm, EVTm$EVT > 0)
EVTm <- subset(EVTm, EVTm$EVT < 120)
EVTm <- subset(EVTm, EVTm$day != 15)
EVTm <- subset(EVTm, EVTm$day != 14)
EVTm <- subset(EVTm, EVTm$day != 13)
EVTm <- subset(EVTm, EVTm$day != 10)

control_v_drought_evt <- ggplot(data = EVTm, aes(x = day, y = EVT, color = Treatment, group = Pot)) + theme_classic()
control_v_drought_evt <- control_v_drought_evt + geom_line(alpha = 0.1)
control_v_drought_evt <- control_v_drought_evt + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Treatment), alpha = 0.3)
control_v_drought_evt <- control_v_drought_evt + stat_summary(fun = mean, aes(group = Treatment), size = 0.9, geom = "line", linetype = "solid")
control_v_drought_evt <- control_v_drought_evt + stat_compare_means(aes(group = Treatment), label = "p.signif", method = "t.test", hide.ns = T)
control_v_drought_evt <- control_v_drought_evt + scale_color_aaas()  
control_v_drought_evt <- control_v_drought_evt + xlab("Days After Stress") + ylab("Evapotranspiration") + theme(legend.position="top")
control_v_drought_evt

ok - now - let’s calculate median evapotranspiration per accession per treatment:

library(doBy)
unique(EVTm$Genotype)

## [1] "UCR779"    "Suvita"    "IT97K-499" "CB5-2"     "Sanzi"

EVTm$Genotype <- gsub("Suvita", "Suvita.2", EVTm$Genotype)
EVTs <- summaryBy(EVT ~ Genotype + Treatment + Pot, data = EVTm, FUN = function(x) c(med=median(x)))
EVTs$Genotype <- gsub("-", ".", EVTs$Genotype)
EVTs

unique(EVTs$Genotype)

## [1] "CB5.2"     "IT97K.499" "Sanzi"     "Suvita.2"  "UCR779"

let’s calculate Tukey HSD letters for each batch:

unique(EVTs$Treatment)

## [1] "Control" "Drought"

EVTc <- subset(EVTs, EVTs$Treatment == "Control")
EVTd <- subset(EVTs, EVTs$Treatment == "Drought")
unique(EVTc$Genotype)

## [1] "CB5.2"     "IT97K.499" "Sanzi"     "Suvita.2"  "UCR779"

unique(EVTd$Genotype)

## [1] "CB5.2"     "IT97K.499" "Sanzi"     "Suvita.2"  "UCR779"

Output <- TukeyHSD(aov(EVT.med ~ Genotype, data = EVTc))
P7 = Output$Genotype[,'p adj']
stat.test<- multcompLetters(P7)
testC <- as.data.frame(stat.test$Letters)
testC$group1 <- rownames(testD)
testC$group2 <- rownames(testD)
testC$Treatment <- "Control"
colnames(testC)[1] <- "Tukey"

Output <- TukeyHSD(aov(EVT.med ~ Genotype, data = EVTd))
P7 = Output$Genotype[,'p adj']
stat.test<- multcompLetters(P7)
testD <- as.data.frame(stat.test$Letters)
testD$group1 <- rownames(testD)
testD$group2 <- rownames(testD)
testD$Treatment <- "Drought"
colnames(testD)[1] <- "Tukey"

test <- rbind(testC, testD)

Evt_CP_plot <- ggplot(data = EVTs, mapping = aes(x = Genotype, y = EVT.med, colour = Genotype)) 
Evt_CP_plot <- Evt_CP_plot + geom_beeswarm(alpha=0.6, priority = "density")
Evt_CP_plot <- Evt_CP_plot + facet_wrap(~ Treatment) + theme_classic()
Evt_CP_plot <- Evt_CP_plot + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
Evt_CP_plot <- Evt_CP_plot + theme(legend.position = "none")
Evt_CP_plot <- Evt_CP_plot + xlab("") + ylab("Evapotranspiration Rate (g H2O / day)")
Evt_CP_plot <- Evt_CP_plot + theme(axis.text.x = element_text(angle=90, vjust=0.5))
Evt_CP_plot <- Evt_CP_plot + stat_compare_means(method = "aov", label.y = 120)
Evt_CP_plot <- Evt_CP_plot + stat_pvalue_manual(test, label = "Tukey", y.position = 110) + scale_color_jco()
Evt_CP_plot

# Tepary bean experiment

This notebook describes the analysis of the experiment performed by Aparna Srinivasan on tepary beans grown under control (60% soil water holding capacity) and two drought conditions (10% and 5% SWHC).

Load and organize the data

d0 <- read.csv ("single_value_trait_All/Results_single_value_traits_041322.csv")
d2 <- read.csv("single_value_trait_All/Results_single_value_traits_041522.csv")
d4 <-read.csv("single_value_trait_All/Results_single_value_traits_041722.csv")
d6 <-read.csv("single_value_trait_All/Results_single_value_traits_041922.csv")
d8 <-read.csv("single_value_trait_All/Results_single_value_traits_042122.csv")
d10 <-read.csv("single_value_trait_All/Results_single_value_traits_042322.csv")
d12 <- read.csv ("single_value_trait_All/Results_single_value_traits_042522.csv")
d0

Removing 1st 15 empty columns and adding 31st column in front

d0 <- d0[,c(31, 16:30)]
d2 <- d2[,c(31, 16:30)]
d4 <- d4[,c(31, 16:30)]
d6 <- d6[,c(31, 16:30)]
d8 <- d8[,c(31, 16:30)]
d10 <- d10[,c(31, 16:30)]
d12 <- d12[,c(31, 16:30)]

Bind all days together

combining <- rbind(d0, d2, d4, d6, d8, d10, d12)
combining$info <- strsplit(combining$roi, split = "_")
combining$info[1][[1]][1]

## [1] "2022.04.13-10.07.53"

Side column and timestamp info isolated

for(i in 1:nrow(combining)){
  combining$side[i] <- combining$info[i][[1]][3]
  combining$timestamp[i] <- combining$info[i][[1]][1]
}
combining

Split the date and time and isolate both DATE and TIMESTAMP

combining$info2 <- strsplit(combining$timestamp, "-")
for(i in 1:nrow(combining)){
  combining$DATE[i] <- combining$info2[i][[1]][1]
  combining$timestamp[i] <- combining$info2[i][[1]][2]
}
combining

Selecting important variables for analysis

sel_variables <- combining[,c(21,19,18,4,8,9)]
sel_variables

calculate cumulative area from 7 different angles AND maximum width and height - Subset for one plant

today <- subset(sel_variables, sel_variables$DATE == unique(sel_variables$DATE)[1])
one_plant <- subset(today, today$timestamp == unique(today$timestamp)[1])
one_plant

AREA.sum will be the sum of ALL the areas in this dataset while max width and height will be the width.max and height.max

sum(one_plant$area)

## [1] 42569

max(one_plant$width)

## [1] 305

max(one_plant$height)

## [1] 342

Adding measurement to an empty table

names <- c("DATE", "TIME", "Area.sum", "Height.max", "Width.max")
data_sum <- data.frame()
for (k in names) data_sum[[k]] <- as.character()
data_sum[1,1] <- one_plant$DATE[1]
data_sum[1,2] <- one_plant$timestamp[1]
data_sum[1,3] <- sum(one_plant$area)
data_sum[1,4] <- max(one_plant$height)
data_sum[1,5] <- sum(one_plant$width)
data_sum

Loop through all the data

count <- 1
for(d in 1:length(unique(sel_variables$DATE))){
  today <- subset(sel_variables, sel_variables$DATE == unique(sel_variables$DATE)[d])
  for(t in 1:length(unique(today$timestamp))){
    one_plant <- subset(today, today$timestamp == unique(today$timestamp)[t])
    data_sum[count,1] <- one_plant$DATE[1]
    data_sum[count,2] <- one_plant$timestamp[1]
    data_sum[count,3] <- sum(one_plant$area)
    data_sum[count,4] <- max(one_plant$height)
    data_sum[count,5] <- sum(one_plant$width)
    count <- count + 1
  }
}

data_sum

dim(sel_variables)[1] / dim(data_sum)[1]

## [1] 7

data_sum$Area.sum <- as.numeric(as.character(data_sum$Area.sum))
min(data_sum$Area.sum)

## [1] 8610

unique(data_sum$DATE)

## [1] "2022.04.13" "2022.04.15" "2022.04.17" "2022.04.19" "2022.04.21"
## [6] "2022.04.23" "2022.04.25"

data_sum$DATE <- gsub("2022.04.", "", data_sum$DATE)
data_sum$DATE <- as.numeric(as.character(data_sum$DATE))
data_sum

plot(data_sum$Area.sum ~ data_sum$DATE)

Final data with pot no, genotype and treatment, matched with processed image_loading

cumm_data <- read_excel("Image_alldays_confirmed_pot_April2022.xlsx")
head(cumm_data)

drop rows with missing values

cumm_data_clean <- cumm_data %>% drop_na()
cumm_data_clean$Treatment <- factor(cumm_data_clean$Treatment, levels = c("Control","10% WHC", "5% WHC"))
cumm_data_clean <- subset(cumm_data_clean, cumm_data_clean$Treatment != "5% WHC")
cumm_data_clean$Treatment <- gsub("10% WHC", "Drought", cumm_data_clean$Treatment)

plot data:

cumm_data_clean$Days <- as.factor(cumm_data_clean$Days)
Tep_c_v_d_raw <- ggplot(data = cumm_data_clean, aes(x = Days, y = Area.sum, color = Treatment, group = PotNo)) + theme_classic()
Tep_c_v_d_raw <- Tep_c_v_d_raw + geom_line(alpha = 0.1)
Tep_c_v_d_raw <- Tep_c_v_d_raw + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Treatment), alpha = 0.3)
Tep_c_v_d_raw <- Tep_c_v_d_raw + stat_summary(fun = mean, aes(group = Treatment), size = 0.9, geom = "line", linetype = "solid")
Tep_c_v_d_raw <- Tep_c_v_d_raw + stat_compare_means(aes(group = Treatment), label = "p.signif", method = "t.test", hide.ns = T)
Tep_c_v_d_raw <- Tep_c_v_d_raw + scale_color_aaas()  
Tep_c_v_d_raw <- Tep_c_v_d_raw + xlab("Days After Stress") + ylab("Leaf Area (Pixels)") + theme(legend.position="top")
Tep_c_v_d_raw

OK - now let’s again use smooth splines to reduce the noise in the observed data:

days <- 0:12

temp <- subset(cumm_data_clean, cumm_data_clean$PotNo == unique(cumm_data_clean$PotNo)[1])
temp$Days <- as.numeric(as.character(temp$Days))
temp$Days

## [1]  0  2  4  6  8 10 12

plot.spl <- with(temp, smooth.spline(Days, Area.sum, df = 4))
plot(Area.sum ~ Days, data = temp)
lines(plot.spl, col = "blue")
lines(predict(plot.spl, days), col="red")

keep all of the spline-fitted data:

names <- c(text="POT", "genotype", "treatment", "day", "Area.SUM")
spline_data <- data.frame()
for (k in names) spline_data[[k]] <- as.character()
spline_data

pred_temp <- predict(plot.spl, days)
pred_temp

## $x
##  [1]  0  1  2  3  4  5  6  7  8  9 10 11 12
## 
## $y
##  [1]  39226.86  60051.41  82005.75 105776.51 130262.39 153820.81 174431.83
##  [8] 191164.57 207821.70 228171.30 251109.62 274679.84 298384.85

# make empty dataframe
spline_data[1:13,4] <- pred_temp$x
spline_data[1:13,5] <- pred_temp$y
spline_data[1:13,1] <- temp$PotNo[1]
spline_data[1:13,2] <- temp$Genotype[1]
spline_data[1:13,3] <- temp$Treatment[1]

spline_data

final_spline <- spline_data
all_plants <- unique(cumm_data_clean$PotNo)
length(all_plants)

## [1] 36

for(i in 2:36){
  temp <- subset(cumm_data_clean, cumm_data_clean$PotNo == all_plants[i])
  if(dim(temp)[1]>4){
    plot.spl <- with(temp, smooth.spline(Days, Area.sum, df = 4))
    pred_temp <- predict(plot.spl, days)
    spline_data[1:13,4] <- pred_temp$x
    spline_data[1:13,5] <- pred_temp$y
    spline_data[1:13,1] <- temp$PotNo[1]
    spline_data[1:13,2] <- temp$Genotype[1]
    spline_data[1:13,3] <- temp$Treatment[1]
    final_spline <- rbind(final_spline, spline_data)
  }
  else{
    spline_data[1:13,4] <- days
    spline_data[1:13,5] <- 0
    spline_data[1:13,1] <- temp$PotNo[1]
    spline_data[1:13,2] <- temp$Genotype[1]
    spline_data[1:13,3] <- temp$Treatment[1]
  }
}

final_spline

###plot spline smoothed leaf area across all reps per genotype

final_spline$Area.SUM <- as.numeric(as.character(final_spline$Area.SUM))
final_spline$day <- factor(final_spline$day, levels = c("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))
#bye <- c(45, 70, 35, 15, 12, 25, 21, 64, 55, 44, 69, 53, 52)
#final_spline <- subset(final_spline, !(final_spline$POT %in% bye))

Tep_c_v_d_smooth <- ggplot(data = final_spline, aes(x = day, y = Area.SUM, color = treatment, group = POT)) + theme_classic()
Tep_c_v_d_smooth <- Tep_c_v_d_smooth + geom_line(alpha = 0.1)
Tep_c_v_d_smooth <- Tep_c_v_d_smooth + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = treatment), alpha = 0.3)
Tep_c_v_d_smooth <- Tep_c_v_d_smooth + stat_summary(fun = mean, aes(group = treatment), size = 0.9, geom = "line", linetype = "solid")
Tep_c_v_d_smooth <- Tep_c_v_d_smooth + stat_compare_means(aes(group = treatment), label = "p.signif", method = "t.test", hide.ns = T)
Tep_c_v_d_smooth <- Tep_c_v_d_smooth + scale_color_aaas()
Tep_c_v_d_smooth <- Tep_c_v_d_smooth + xlab("Days After Stress") + ylab("Shoot Area (pixels)") + theme(legend.position="top")
Tep_c_v_d_smooth

ggplotly(Tep_c_v_d_smooth)

ok - now let’s calculate the growth rate based on the spline data:

Growth rate calculations

We need to fit growth one plant at the time:

#Subset data to only include days that are after stress (positive)
final_spline

temp <- subset(final_spline, final_spline$POT == 1)
temp

plot(temp$Area.SUM ~as.numeric(as.character(temp$day))) + abline(lm(temp$Area.SUM ~ as.numeric(as.character(temp$day))))

## integer(0)

model <- lm(temp$Area.SUM ~ as.numeric(as.character(temp$day)))
summary(model)

## 
## Call:
## lm(formula = temp$Area.SUM ~ as.numeric(as.character(temp$day)))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4516.4 -2783.4  -471.1  2002.3  6059.5 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         41603.2     1901.2   21.88 2.03e-10 ***
## as.numeric(as.character(temp$day))  21231.6      268.9   78.97  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3627 on 11 degrees of freedom
## Multiple R-squared:  0.9982, Adjusted R-squared:  0.9981 
## F-statistic:  6236 on 1 and 11 DF,  p-value: < 2.2e-16

summary(model)$coefficients[2]

## [1] 21231.61

So now - we need to assign this value to a POT # 01 - Let’s make a new table containing the unique POT numbers of all tepary beans within this experiment:

new_table <- final_spline[,c(1:3)]
new_table

new_table2 <- unique(new_table)
new_table2

new_table2$GrowthRate <- 0
new_table2$GrowthRate[1] <- summary(model)$coefficients[2]
new_table2

Cool - let’s loop it!

for (i in 1:36){
  temp <- subset(final_spline, final_spline$POT == i)
  if(dim(temp)[1]>0){
    model <- lm(temp$Area.SUM ~ as.numeric(as.character(temp$day)))
    new_table2$GrowthRate[i] <- summary(model)$coefficients[2]  
  }else{
    new_table2$GrowthRate[i] <- 0
  }
  
}

new_table2 <- subset(new_table2, new_table2$GrowthRate > 0)
new_table2

new_table2$genotype <- gsub("_", ".", new_table2$genotype)
new_table2$genotype <- gsub("TDP22.1", "TDP22", new_table2$genotype)
new_table2$genotype <- gsub("TDP22.2", "TDP22", new_table2$genotype)

Growth_TB_plot <- ggplot(data = new_table2, mapping = aes(x = genotype, y = GrowthRate, colour = genotype)) 
Growth_TB_plot <- Growth_TB_plot + geom_beeswarm(alpha=0.6, priority = "density")
Growth_TB_plot <- Growth_TB_plot + facet_wrap(~ treatment) + theme_classic()
Growth_TB_plot <- Growth_TB_plot + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
Growth_TB_plot <- Growth_TB_plot + theme(legend.position = "none")
Growth_TB_plot <- Growth_TB_plot + xlab("") + ylab("Growth Rate (Pix / day)")
Growth_TB_plot <- Growth_TB_plot + theme(axis.text.x = element_text(angle=90, vjust=0.5))
Growth_TB_plot <- Growth_TB_plot + stat_compare_means(method = "aov", label.y = 19e+4) + scale_color_jco()  
Growth_TB_plot

FW vs. Projected Area comparison

fw <- read_excel("Fw_dw_April2022.xlsx")
colnames(fw)[1] <- "PotNo"
fw

Now - let’s match it to the last day of observed shoot area:

for_fw <- subset(cumm_data_clean, cumm_data_clean$Days == 12)
for_fw

fw <- fw[,c(1, 6)]
fwf <- merge(fw, for_fw, by = "PotNo")
fwf <- subset(fwf, fwf$FW < 20)
fwf

FW_Area_TB <- ggscatter(fwf, x = "Area.sum", y = "FW", color = "Treatment", rug = TRUE) + stat_cor() + scale_color_aaas()   + theme(legend.position = "none") + xlab("Shoot Area (pixels)") + ylab("Fresh Weigh (g)")
FW_Area_TB

Evapotranspiration data:

EVT <- read_xlsx("EVT_April2022.xlsx")
EVT

##### Restructuring the data####
colnames(EVT)

##  [1] "Pot_number" "Genotype"   "Treatment"  "Day1"       "Day2"      
##  [6] "Day3"       "Day4"       "Day5"       "Day6"       "Day7"      
## [11] "Day8"       "Day9"       "Day10"      "Day11"      "Day12"     
## [16] "Day13"

EVT <- as.data.frame(EVT)
April_EVT <- melt(EVT, id.vars = c("Pot_number", "Genotype", "Treatment"), na.rm = F)

colnames(April_EVT)[4] <- "Day"
colnames(April_EVT)[5] <- "Evapotranspiration"
April_EVT$Day <- gsub("Day", "", April_EVT$Day)
April_EVT$Day <- as.numeric(April_EVT$Day)
April_EVT

##### Ordering the levels ####
April_EVT$Treatment <- factor(April_EVT$Treatment, levels = c('Control','10% WHC','5% WHC'))
April_EVT$Pot_number <- as.factor(April_EVT$Pot_number)
April_EVT <- subset(April_EVT, April_EVT$Day != 7)
April_EVT <- subset(April_EVT, April_EVT$Day != 8)

##### Evapotranspiration calculated over time ######
EVT_over_time <- ggplot(data = April_EVT, aes(x = Day, y = Evapotranspiration, id = Pot_number, colour = Genotype), na.rm = TRUE)
EVT_over_time <- EVT_over_time + geom_line(alpha = 0.1)
EVT_over_time <- EVT_over_time + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Genotype), alpha = 0.3) + stat_summary(fun.y = mean, aes(group = Genotype), size = 0.7, geom = "line", linetype = "dashed")
EVT_over_time <- EVT_over_time + facet_wrap(~ Treatment)
EVT_over_time <- EVT_over_time + ylab("Evapotranspiration") + xlab("Day") + rremove("legend")
ggplotly(EVT_over_time)

## Warning: Removed 26 rows containing non-finite values (`stat_summary()`).
## Removed 26 rows containing non-finite values (`stat_summary()`).

ok - now again- let’s remove all the data from 5% WHC and calculate median EVT for each plant:

EVT <- read_excel("EVT_April2022_MMJ.xlsx")
EVT$Genotype <- gsub("TDP22_1", "TDP22", EVT$Genotype)
EVT$Genotype <- gsub("TDP22_2", "TDP22", EVT$Genotype)
EVT$Treatment <- gsub("10% WHC", "Drought", EVT$Treatment)
EVT

EVT$Treatment <- factor(EVT$Treatment, levels = c("Control", "Drought"))
Evt_TB_plot <- ggplot(data = EVT, mapping = aes(x = Genotype, y = EVT.med, colour = Genotype)) 
Evt_TB_plot <- Evt_TB_plot + geom_beeswarm(alpha=0.6, priority = "density")
Evt_TB_plot <- Evt_TB_plot + facet_wrap(~ Treatment) + theme_classic()
Evt_TB_plot <- Evt_TB_plot + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
Evt_TB_plot <- Evt_TB_plot + theme(legend.position = "none")
Evt_TB_plot <- Evt_TB_plot + xlab("") + ylab("Evapotranspiration Rate (g H2O / day)")
Evt_TB_plot <- Evt_TB_plot + theme(axis.text.x = element_text(angle=90, vjust=0.5))
Evt_TB_plot <- Evt_TB_plot + stat_compare_means(method = "aov", label.y = 90)
#Evt_TB_plot <- Evt_TB_plot + stat_pvalue_manual(test, label = "Tukey", y.position = 110) + scale_color_jco()
Evt_TB_plot

making figure:

now that we have all the graphs - let’s assemble them into figure:

pdf("Fig.Cowpea_and_Tepary_bean_pilot.pdf", width = 10, height = 15)
plot_grid(control_v_drought_area, Tep_c_v_d_smooth,
          FW_Area_CP, FW_Area_TB,
          Growth_CP_plot, Growth_TB_plot, 
          Evt_CP_plot, Evt_TB_plot, ncol=2, labels = "AUTO")
dev.off()

## quartz_off_screen 
##                 2

Cowpea and Tepary bean pilot experiments

Overview

Cowpea experiment

Smoothinng the data using splines

Growth rate calculations

Plot evapotranspiration rate

Load and organize the data

Growth rate calculations

FW vs. Projected Area comparison

Evapotranspiration data:

making figure: