The experiment is aimed to test the growth rate (GR) and total leaf area variation of Arabidopsis mutant allele selection based on top candidate genes using GWAS output for the data collected over 360 Arabidopsis accessions in HapMap population. This markdown file includes the first batch (from 2022-11-09 to 2022-11-18) with 8 Raspi PhenoRigs and 16 cameras.

load packages

library(ggplot2)
library(tidyr)
library(dplyr)
library(ggpubr)
library(tidyverse)
library(reshape2)
library(corrplot)
library(plotly)
library(cowplot)
library(npreg)
library(ggformula)

Load batch 1 experiment

#import list containing samples
sample.list1 <- read.table("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript10/Experiments/At_mutant/Batch_1/Results/At_mutant_set1.list", sep ="\t", header = F)

### import each samples
for (i in 1:nrow(sample.list1)){
  sample.df <- read.csv(paste0("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript10/Experiments/At_mutant/Batch_1/Results/",sample.list1[i,1]), header = T)
  assign(sample.list1[i,1], sample.df)
}

#Create Meta phenotype sheet by combining multiple files
Set1_list <- lapply(ls(pattern = "At_mutant_set1.result-single-value"), get)
Raspi_At_mutant1 <- bind_rows(Set1_list)

Reformat the Plant_individual data spreadsheet

Raspi <- Raspi_At_mutant1

### clean the file contatning plantID and plant are information
Raspi_clean <- Raspi[,c(31,18:30)]
Raspi_clean <- Raspi_clean %>% separate(plantID, c("Raspi.ID","Cam.ID", "Year", "Month", "Date","Hour", "Minute", "Second", "PlantID"))

# Fisrt - make sure that EVERYTHING is as numeric
numeric_list <- c("Year", "Month", "Date", "Hour", "Minute", "Second")
for (num in numeric_list){
  Raspi_clean [, num] <- as.numeric(as.character(Raspi_clean [, num]))
}

Raspi_clean <- Raspi_clean[
  order(Raspi_clean[,"Raspi.ID"], 
        Raspi_clean[,"Cam.ID"], 
        Raspi_clean[,"Year"], 
        Raspi_clean[,"Month"], 
        Raspi_clean[,"Date"], 
        Raspi_clean[,"Hour"], 
        Raspi_clean[,"Minute"], 
        Raspi_clean[,"PlantID"]),]

Raspi_clean$ID <- paste(Raspi_clean$Raspi.ID, Raspi_clean$Cam.ID, Raspi_clean$PlantID, sep="_")

Calculate the accumulative minutes for each plant

# Transform all timestamps into minutes (where we have to also integrate the month):
Raspi_clean$month.min <- (Raspi_clean[,"Month"] - Raspi_clean[1,"Month"])*31*24*60
Raspi_clean$day.min <- (Raspi_clean[,"Date"]- Raspi_clean[1,"Date"])*24*60
Raspi_clean$hour.min <- (Raspi_clean[,"Hour"]-Raspi_clean[1,"Hour"])*60

### calculate the total minutes
Raspi_clean$all.min <- Raspi_clean$month.min + Raspi_clean$day.min + Raspi_clean$hour.min  + Raspi_clean$Minute
tail(Raspi_clean)
Raspi_clean

Load decoding file

Note: decoding file should be in a correct column settings:

1: Raspi
2: Camera
3: Position
4: Treatment

###load decoding information
decoding <- read.csv("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript10/Experiments/At_mutant/Batch_1/decoding_set1.csv", header = T)
decoding$ID <- paste(decoding[,"Raspi"], decoding[,"Camera"], decoding[,"position"], sep="_")

### check if decoding information matched the plant ID 
decoding$ID %in% unique(Raspi_clean$ID)
##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [256] TRUE
### merge decode information with phenotype dataset
deco_data <- na.omit(right_join(Raspi_clean, decoding, by = "ID"))
write.csv(deco_data,
          "/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript10/Experiments/At_mutant/Batch_1/deco_data_batch1.csv", row.names = F)

Remove plants with trivial growth through entire growth session

### remove data points associated with no plants
deco_data <- deco_data[deco_data$area > 1000,]

### define a cutoff of plant pixel ranges
Plant_pixel_limit <- 5000

### remove the plant with limited growth
Plant_individual <- unique(deco_data[,"ID"])
length(unique(deco_data[,"ID"]))
## [1] 254
deco_data$Range <- "fill"

for (i in 1:length(Plant_individual)){
  temp <- deco_data[deco_data$ID == Plant_individual[i],]
  ### calculate the range of area 
  Plant_pixel_range <- (max(temp$area)-min(temp$area))
  if(Plant_pixel_range < Plant_pixel_limit){
  ### Mark plants to be removed
   deco_data[deco_data$ID == Plant_individual[i],"Range"] <- "FALSE"
  } else {
  ### Mark plants to be retained
  deco_data[deco_data$ID == Plant_individual[i],"Range"] <- "TRUE"
  }
}

### remove small plants
deco_data_clean <- deco_data[deco_data$Range == "TRUE",] 
  
### Check numbers of plants been removed
length(unique(deco_data_clean[,"ID"]))
## [1] 253
### remove time point at the final stage
#deco_plot <- deco_data_clean[deco_data_clean$all.min < 10080,]
deco_plot <- deco_data_clean[deco_data_clean$all.min < 7800,]

First glance of all plants

rig_list <- c("raspiL", "raspiM", "raspiU","raspiT","raspiR", "raspiK", "raspiS", "raspiV" )

for (i in 1:length(rig_list)){
  temp <- deco_plot[deco_plot $Raspi.ID == rig_list[i],]
  
    ### Plot clean deco_data
    temp_plot <- ggplot(temp, aes(x= all.min, y=area, group = ID, color = Genotype)) +
      geom_line(alpha = 0.2) +
      geom_point(alpha = 0.2, size = 0.2) +
      facet_wrap( ~ Treatment) +
      ylab("Rosette Area of individual plant") +
      xlab("Total Time (minutes)")+
      stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group=Treatment), alpha=0.3)+
        ### Plot stats summary of the average data
      stat_summary(fun=mean, aes(group=Treatment),  size=0.7, geom="line", linetype = "dashed") +
      theme_classic() +
      scale_x_continuous(breaks=seq(0,max(deco_plot$area),by=2500)) +
      scale_y_continuous(breaks=seq(0,max(deco_plot$area),by=100000)) 
      #print(ggplotly(temp_plot))
      assign(paste0(rig_list[i],"_graph"),temp_plot)
}

Plot results at the first glance before curating data

Remove_list <- c("raspiV_cameraA_3","raspiV_cameraB_11","raspiV_cameraB_13","raspiV_cameraB_14",
                 "raspiV_cameraB_3","raspiV_cameraB_15",
                 "raspiS_cameraB_12","raspiS_cameraB_4","raspiS_cameraA_6",
                 "raspiK_cameraB_9","raspiK_cameraB_9","raspiK_cameraA_15","raspiK_cameraA_9",
                 "raspiK_cameraA_3","raspiK_cameraA_7","raspiK_cameraB_15","raspiK_cameraB_12",
                 "raspiR_cameraB_3","raspiR_cameraA_15","raspiR_cameraA_11","raspiR_cameraB_5",
                 "raspiR_cameraB_8","raspiR_cameraA_2","raspiR_cameraA_9","raspiR_cameraB_13",
                 "raspiR_cameraA_13","raspiR_cameraA_7","raspiR_cameraA_5","raspiR_cameraB_10",
                 "raspiT_cameraA_3","raspiT_cameraB_13","raspiT_cameraA_11","raspiT_cameraA_10",
                 "raspiT_cameraB_15","raspiT_cameraB_15","raspiT_cameraA_7","raspiT_cameraA_6",
                 "raspiT_cameraB_6","raspiT_cameraB_11",
                 "raspiU_cameraB_7","raspiU_cameraB_3","raspiU_cameraA_13","raspiU_cameraB_2",
                 "raspiU_cameraB_14","raspiU_cameraB_6","raspiU_cameraB_13","raspiU_cameraA_3",
                 "raspiU_cameraB_15","raspiU_cameraA_10","raspiU_cameraA_1","raspiU_cameraB_11",
                 "raspiU_cameraA_14","raspiU_cameraA_0",
                 "raspiM_cameraA_12","raspiM_cameraB_12","raspiM_cameraB_14","raspiM_cameraA_15",
                 "raspiM_cameraA_5","raspiM_cameraB_9","raspiM_cameraB_7",
                 "raspiL_cameraA_8","raspiL_cameraB_5","raspiL_cameraA_3","raspiL_cameraA_14",
                 "raspiL_cameraB_15","raspiL_cameraB_13","raspiL_cameraA_11","raspiL_cameraB_7",
                 "raspiL_cameraB_14","raspiL_cameraB_8","raspiL_cameraA_12","raspiL_cameraB_12",
                 "raspiM_cameraB_3","raspiL_cameraB_14","raspiM_cameraA_0"
                 )


deco_plot_clean  <- deco_plot[! deco_plot $ID %in% Remove_list,]


for (i in 1:length(rig_list)){
  temp <- deco_plot_clean[deco_plot_clean$Raspi.ID == rig_list[i],]
  
    ### Plot clean deco_data
    temp_plot <- ggplot(temp, aes(x= all.min, y=area, group = ID, color = Genotype)) +
      geom_line(alpha = 0.2) +
      geom_point(alpha = 0.2, size = 0.2) +
      facet_wrap( ~ Treatment) +
      ylab("Rosette Area of individual plant") +
      xlab("Total Time (minutes)")+
      stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group=Treatment), alpha=0.3)+
        ### Plot stats summary of the average data
      stat_summary(fun=mean, aes(group=Treatment),  size=0.7, geom="line", linetype = "dashed") +
      theme_classic() 

      
    print(ggplotly(temp_plot))
    assign(paste0(rig_list[i],"_graph"),temp_plot)
}

raspiL_graph

raspiM_graph

raspiU_graph

raspiT_graph

raspiR_graph

raspiK_graph

raspiS_graph

raspiV_graph

Plot for all plants under the drought and control condition

### Plot clean deco_data
clean_graph <- ggplot(deco_plot_clean, aes(x= all.min, y=area, group = ID, color = Treatment)) +
  geom_line(alpha = 0.2) +
  geom_point(alpha = 0.2, size = 0.2) +
  facet_wrap( ~ Genotype) +
  ylab("Rosette Area of individual plant") +
  xlab("Total Time (minutes)")+
  stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group=Treatment), alpha=0.3)+
    ### Plot stats summary of the average data
  stat_summary(fun=mean, aes(group=Treatment),  size=0.7, geom="line", linetype = "dashed") +
  theme_classic() +
  scale_color_manual(values = c(Drought="tomato", Control="steelblue"))

clean_graph

Use smooth.spline to to curate the dataset

### import data
test_data <- deco_plot_clean
Plant_individual <- unique(deco_plot_clean[,"ID"])

##Parameter settings
Range1 = 1
Range2 = 1.5
Range3 = 2
Range4 = 3

nknots = 5
Fit_night = "Yes"


### non-linear model with smooth.spline (ss function)
for (i in 1:length(Plant_individual)){
  temp <- test_data[test_data$ID == Plant_individual[i], ]
  mod.ss <- ss(temp$all.min, temp$area, nknots = nknots)
  mod.sum <- summary(mod.ss)
  temp$sigma1 <- Range1 * mod.sum$sigma
  temp$sigma2 <- Range2 * mod.sum$sigma
  temp$sigma3 <- Range3 * mod.sum$sigma
  temp$sigma4 <- Range4 * mod.sum$sigma
  temp$residual <- abs(mod.sum$residuals)
  temp_clean <- temp[temp$residual < temp$sigma2,]

  ### Regression model before removal of outliers
  Predict_matrix1 <- predict(mod.ss, temp$all.min)
  P1 <- ggplot(temp, aes(x=all.min, y=area) ) + 
    geom_point(aes(y = area), size=1.5, shape = 21) +
    geom_spline(aes(x = all.min, y = area), nknots = nknots, size=1, color = "blue") +
    geom_ribbon(aes(ymin = Predict_matrix1$y - unique(temp$sigma1), 
                    ymax = Predict_matrix1$y + unique(temp$sigma1)), alpha = 0.2, fill = "#542788") +
    theme_classic() +
    xlab("Total Time (minutes)") +
    ylab(paste0("Leaf area of ",Plant_individual[i]))
  
  ### plot after removal of outliers
  Predict_matrix2 <- predict(mod.ss, temp_clean$all.min)
  P2 <- ggplot(temp_clean, aes(x=all.min, y=area) ) + 
    geom_point(aes(y = area), size=1.5, shape = 21) +
    geom_spline(aes(x = all.min, y = area), nknots = nknots, size=1, color = "blue") +
    geom_ribbon(aes(ymin = Predict_matrix2$y - unique(temp$sigma2), 
                    ymax = Predict_matrix2$y + unique(temp$sigma2)), alpha = 0.2, fill = "#8073AC") +
    theme_classic() +
    xlab("Total Time (minutes)") +
    ylab(paste0("Leaf area of ",Plant_individual[i]))
  
  ### Combine and output the plots 
  #print(plot_grid(P1, P2, labels = c('A', 'B'), label_size = 12))
    
  
  if (Fit_night == "Yes"){
    ### Generate the timpoint during night
    timeline <- seq(0, max(temp$all.min), 30)
    Predict_matrix <- predict(mod.ss, timeline)
    Predict_matrix$ID <- unique(temp$ID)
    colnames(Predict_matrix) <- c("all.min", "Fit", "se", "ID")
    
    ### Extract all predicted values across time-point (Using the temp file)
    smooth_matrix <- predict(mod.ss, temp$all.min)
    temp$Fit <- smooth_matrix$y
    
  } else {
    ### Extract all predicted values across time-point (Using the temp file)
    smooth_matrix <- predict(mod.ss, temp$all.min)
    temp$Fit <- smooth_matrix$y
  }
    ### export clean data-sheet (without night interval fit)
    assign(paste0("Smooth_raw", Plant_individual[i]), temp_clean)
    assign(paste0("Smooth_fit", Plant_individual[i]), temp)
    
    ### export clean data-sheet (with night interval fit)
    assign(paste0("Smooth_night", Plant_individual[i]), Predict_matrix)
}

###Combined the curated data using raw data for plot 
Smooth_list_raw <- lapply(ls(pattern = "Smooth_raw"), get)
deco_Smooth_raw <- bind_rows(Smooth_list_raw )

###Combined the curated data using fitted data for plot (without night point)
Smooth_list_fit <- lapply(ls(pattern = "Smooth_fit"), get)
deco_Smooth_fit <- bind_rows(Smooth_list_fit)
deco_Smooth_fit
###Combined the curated data using fitted data for plot (with night point)
Smooth_list_night_fit <- lapply(ls(pattern = "Smooth_night"), get)
deco_Smooth_night_fit <- bind_rows(Smooth_list_night_fit)
deco_Smooth_night_fit
write.csv(deco_Smooth_fit,"/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript10/Experiments/At_mutant/Batch_1/Raspi_At_mutant_smoothspline.csv")

plot the predicted data derived from curation process

### Replot the curation dataset
mydata=deco_Smooth_fit

curation_graph <- ggplot(data=mydata, aes(x= all.min, y=Fit, group = ID, color = Treatment)) + 
  theme_classic() +
  geom_line(alpha = 0.2) +
  geom_point(alpha = 0.1, size = 0.5) +
  ylab("Smoothed rosette area of individual plant") + xlab("Total Time (minutes)") +
  stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group=Treatment), alpha=0.3)  +
  stat_summary(fun=mean, aes(group= Treatment),  size=0.7, geom="line", linetype = "dashed") +
  stat_compare_means(aes(group = Treatment), label = "p.signif", method = "t.test") +
  scale_x_continuous(breaks=seq(0,max(deco_data_clean$area),by=2500)) +
  facet_wrap( ~ Genotype) +
  scale_color_manual(values = c(Drought="tomato", Control="steelblue"))

curation_graph 

Pairwise comparison relative to the Col-0 genotype under the control condition

### extract the accession information
Genotype <- unique(deco_Smooth_fit$Genotype)
Genotype <- Genotype[Genotype!= "Col-0"]

### extract the control dataset
deco_Smooth_fit_control <- deco_Smooth_fit[deco_Smooth_fit$Treatment == "Control",]

for(i in 1:length(Genotype)){
  smooth_temp <- deco_Smooth_fit_control[deco_Smooth_fit_control$Genotype == "Col-0" | deco_Smooth_fit_control$Genotype == Genotype[i],]
  temp_graph <- ggplot(data=smooth_temp, aes(x= all.min, y=Fit, group = ID, color = Genotype)) + 
                theme_classic() +
                geom_line(alpha = 0.2) +
                geom_point(alpha = 0.1, size = 0.5) +
                ylab("Smoothed rosette area of individual plant (Control)") + xlab("Total Time (minutes)") +
                stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group=Genotype), alpha=0.3)  +
                stat_summary(fun=mean, aes(group= Genotype),  size=0.7, geom="line", linetype = "dashed") +
                stat_compare_means(aes(group = Genotype), label = "p.signif", method = "t.test") +
                scale_x_continuous(breaks=seq(0,max(deco_data_clean$area),by=2500)) +
                scale_color_manual(values = c("#666666","#E7298A"))
  assign(paste0("Graph_Control_",str_replace(Genotype[i], "-","_")),temp_graph)
}


plot_grid(Graph_Control_CP.GR4_2, Graph_Control_CP.EVT2_1, Graph_Control_CP.NPQ1_1, Graph_Control_CP.EVT5_1, 
          Graph_Control_CP.NPQ6_4, Graph_Control_CP.EVT9_2, Graph_Control_CP.EVT6_2, Graph_Control_CP.NPQ4_1, 
          Graph_Control_CP.EVT4_1, Graph_Control_CP.NPQ5_2, Graph_Control_CP.EVT4_2, Graph_Control_CP.NPQ7_1, 
          Graph_Control_CP.EVT7, Graph_Control_CP.EVT1_1, Graph_Control_CP.GR2_2, Graph_Control_CP.GR5,
          Graph_Control_CP.GR4_1, Graph_Control_CP.GR3_1, Graph_Control_CP.EVT3_1, Graph_Control_CP.EVT2_2,
          Graph_Control_CP.NPQ6_5, Graph_Control_CP.GR2_1, Graph_Control_CP.NPQ6_3, Graph_Control_CP.NPQ1_2,
          Graph_Control_CP.GR2_3, Graph_Control_CP.GR3_2,Graph_Control_CP.NPQ5_1, Graph_Control_CP.EVT8, Graph_Control_CP.EVT9_1,
          Graph_Control_CP.EVT1_2)

### Pairwise comparison relative to the Col-0 genotype under the Drought condition

### extract the drought dataset
deco_Smooth_fit_drought <- deco_Smooth_fit[deco_Smooth_fit$Treatment == "Drought",]

for(i in 1:length(Genotype)){
  smooth_temp <- deco_Smooth_fit_drought[deco_Smooth_fit_drought$Genotype == "Col-0" | deco_Smooth_fit_drought$Genotype == Genotype[i],]
  temp_graph <- ggplot(data=smooth_temp, aes(x= all.min, y=Fit, group = ID, color = Genotype)) + 
                theme_classic() +
                geom_line(alpha = 0.2) +
                geom_point(alpha = 0.1, size = 0.5) +
                ylab("Smoothed rosette area of individual plant (Drought)") + xlab("Total Time (minutes)") +
                stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group=Genotype), alpha=0.3)  +
                stat_summary(fun=mean, aes(group= Genotype),  size=0.7, geom="line", linetype = "dashed") +
                stat_compare_means(aes(group = Genotype), label = "p.signif", method = "t.test") +
                scale_x_continuous(breaks=seq(0,max(deco_data_clean$area),by=2500)) +
                scale_color_manual(values = c("#666666","#E7298A"))
  assign(paste0("Graph_Drought_",str_replace(Genotype[i], "-","_")),temp_graph)
}


plot_grid(Graph_Drought_CP.GR4_2, Graph_Drought_CP.EVT2_1, Graph_Drought_CP.NPQ1_1, Graph_Drought_CP.EVT5_1, 
          Graph_Drought_CP.NPQ6_4, Graph_Drought_CP.EVT9_2, Graph_Drought_CP.EVT6_2, Graph_Drought_CP.NPQ4_1, 
          Graph_Drought_CP.EVT4_1, Graph_Drought_CP.NPQ5_2, Graph_Drought_CP.EVT4_2, Graph_Drought_CP.NPQ7_1, 
          Graph_Drought_CP.EVT7, Graph_Drought_CP.EVT1_1, Graph_Drought_CP.GR2_2, Graph_Drought_CP.GR5,
          Graph_Drought_CP.GR4_1, Graph_Drought_CP.GR3_1, Graph_Drought_CP.EVT3_1, Graph_Drought_CP.EVT2_2,
          Graph_Drought_CP.NPQ6_5, Graph_Drought_CP.GR2_1, Graph_Drought_CP.NPQ6_3, Graph_Drought_CP.NPQ1_2,
          Graph_Drought_CP.GR2_3, Graph_Drought_CP.GR3_2,Graph_Drought_CP.NPQ5_1, Graph_Drought_CP.EVT8, Graph_Drought_CP.EVT9_1,
          Graph_Drought_CP.EVT1_2)

Calcualte the growth rate overtime using a sliding window

window size defined by number of minutes (unit: Minutes)
step size by minutes (unit: Minutes)

### define numbers of hour in a certain interval
window_size <- 300
step_size <- 300
data_source <- deco_Smooth_night_fit

Calcualte the growth rate (GR) under the certain window size and step

### define interval using the numbers of hours and total minutes of experiments
timeline <- seq(0, max(data_source$all.min), step_size)
Plant_individual_clean <- Plant_individual

### Calculate the plant-wise growth rate (GR) across defined window above
for (NUMBER in 1:length(Plant_individual_clean)){
  ### Subset data for each Plant
  Sub_plant <- na.omit((data_source[data_source$ID == Plant_individual_clean[NUMBER], ]))
    
  if (nrow(Sub_plant) != 0) {
    ### Create plant-wise statistical table
    Sub_plant_stats <- data.frame(matrix(ncol = 5 , nrow = length(timeline)))
    colnames(Sub_plant_stats) <- c("Starttime","Endtime", "Intercept","Slope",  "R.square")
    Sub_plant_stats$Starttime <- timeline
    Sub_plant_stats$Endtime <- timeline + window_size
    
    for (i in 1:length(timeline)){
    ## Subset data for each window under the same plant
    Window <- na.omit(Sub_plant[Sub_plant$all.min > Sub_plant_stats[i,1] & Sub_plant$all.min <= Sub_plant_stats[i,2],])
    
      if (nrow(Window) >= 3){
      ### generate the linear model
      linear_model <- lm(Window$Fit~ Window$all.min)
      linear_summary <- summary(linear_model)
      
      ### extract intercept
      Sub_plant_stats[i,4] <- linear_summary$coefficients[2]
      ### extract the slope
      Sub_plant_stats[i,3] <- linear_summary$coefficients[1]
      ### extract the R.square
      Sub_plant_stats[i,5] <- linear_summary$r.squared
        
      ### Plot the linear regression fit
      ggplot(Window, aes(x = all.min, y = area)) +
        geom_point() + theme_classic() +
        geom_smooth(method = "lm", alpha = .15)
      } else {
      Sub_plant_stats[i,4] <- NA
      ### extract the slope
      Sub_plant_stats[i,3] <- NA
      ### extract the R.square
      Sub_plant_stats[i,5] <- NA
        
      }
    }
  Sub_plant_stats$ID <- Plant_individual_clean[NUMBER]
  assign(paste0(Plant_individual_clean[NUMBER], "_GR_summary"), Sub_plant_stats)
  }
}

### Combine GR for all plants 
GR_list <- lapply(ls(pattern = "_GR_summary"), get)
Total_GR <- na.omit(bind_rows(GR_list))

Plot the GR for each accession with windows and step specified

### join decoding file and the total_GR file
growth_data <- left_join(Total_GR, decoding, by = "ID")
growth_data <- growth_data[growth_data$Slope > 0,]
growth_data$Point <- (growth_data$Starttime + growth_data$Endtime)/2

GR_lgraph <- ggplot(data=growth_data, aes(x= Point, y=Slope, group = ID, color = Treatment)) + 
            geom_line(alpha = 0.2)  +
            stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group= Treatment), alpha=0.3) +
            stat_summary(fun=mean, aes(group= Treatment),  size=0.7, geom="line", linetype = "dashed") +
            stat_compare_means(aes(group = Treatment), label = "p.signif", method = "t.test", hide.ns = T) +
            ylab(paste0("Growth Rate under ",window_size,"window_size")) + xlab("Minutes After Experiments") +
            theme_classic() +
            facet_wrap( ~ Genotype) +
            scale_color_manual(values = c(Drought="tomato", Control="steelblue"))

GR_lgraph

Calcualte the daily growth rate (DGR)

### determine the input data
data_source <- deco_Smooth_fit
Plant_individual_clean <- unique(deco_Smooth_fit$ID) 

### define interval using the numbers of hours and total minutes of experiments
timeline <- unique(data_source[,c("Year", "Month", "Date")])

### Calculate the plant-wise growth rate (GR) across defined window above
for (NUMBER in 1:length(Plant_individual_clean)){
  ### Subset data for each Plant
  Sub_plant <- na.omit((data_source[data_source$ID == Plant_individual_clean[NUMBER], ]))
    
  if (nrow(Sub_plant) != 0) {
    ### Create plant-wise statistical table
    Sub_plant_stats <- data.frame(matrix(ncol = 7 , nrow = nrow(timeline)))
    colnames(Sub_plant_stats) <- c("Year","Month","Date","Intercept",  "Slope","R.square", "Index")
    Sub_plant_stats[,1:3] <- timeline[,1:3]
    Sub_plant_stats$Index <- rownames(Sub_plant_stats)
    
    for (i in 1:nrow(timeline)){
    ## Subset data for each window under the same plant
    Window <- na.omit(Sub_plant[Sub_plant$Year == timeline[i,1] & Sub_plant$Month == timeline[i,2] & Sub_plant$Date == timeline[i,3],])
    
      if (nrow(Window) >= 3){
      ### generate the linear model
      linear_model <- lm(Window$Fit ~ Window$all.min)
      linear_summary <- summary(linear_model)
      
      ### extract intercept
      Sub_plant_stats[i,4] <- linear_summary$coefficients[1]
      ### extract the slope
      Sub_plant_stats[i,5] <- linear_summary$coefficients[2]
      ### extract the R.square
      Sub_plant_stats[i,6] <- linear_summary$r.squared

      } else {
      Sub_plant_stats[i,5] <- NA
      ### extract the slope
      Sub_plant_stats[i,4] <- NA
      ### extract the R.square
      Sub_plant_stats[i,6] <- NA
        
      }
    }
  Sub_plant_stats$ID <- Plant_individual_clean[NUMBER]
  assign(paste0(Plant_individual_clean[NUMBER], "_DGR_summary"), Sub_plant_stats)
  }
}

### Combine GR for all plants 
DGR_list <- lapply(ls(pattern = "_DGR_summary"), get)
Total_DGR <- na.omit(bind_rows(DGR_list))

Plot the daily growth rate for each accession (DGR)

### join decoding file and the total_GR file
growth_data <- left_join(Total_DGR , decoding, by = "ID")
growth_data <- growth_data[growth_data$Slope < 40,]
growth_data$Index <- as.integer(growth_data$Index)

### Plot DGR
DGR_lgraph <- ggplot(data=growth_data, aes(x= Index, y=Slope, group = ID, color = Treatment)) + 
              theme_classic() +
              geom_line(alpha = 0.2) +
              geom_point(alpha = 0.1, size = 0.5) +
              ylab("Smoothed rosette area of individual plant") + xlab("Total Time (minutes)") +
              stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group=Treatment), alpha=0.3)  +
              stat_summary(fun=mean, aes(group= Treatment),  size=0.7, geom="line", linetype = "dashed") +
              stat_compare_means(aes(group = Treatment), label = "p.signif", method = "t.test") +
              scale_x_continuous(breaks=seq(0,max(deco_data_clean$area),by=2500)) +
              facet_wrap( ~ Genotype) +
              scale_color_manual(values = c(Drought="tomato", Control="steelblue"))
DGR_lgraph 

Reformat the DGR table for pairwise comparison

### join decoding file and the total_GR file
growth_data <- left_join(Total_DGR , decoding, by = "ID")
growth_data <- growth_data[growth_data$Slope < 40,]
growth_data$Index <- as.integer(growth_data$Index)

Pairwise comparison of DGR relative to the Col-0 genotype under the control condition

### extract the drought dataset
growth_data_control <- growth_data[growth_data$Treatment == "Control",]

for(i in 1:length(Genotype)){
  DGR_temp <- growth_data_control[growth_data_control$Genotype == "Col-0" | growth_data_control$Genotype == Genotype[i],]
  tempDGR_graph <- ggplot(data=DGR_temp, aes(x= Index, y=Slope, group = ID, color = Genotype)) + 
                theme_classic() +
                geom_line(alpha = 0.2) +
                geom_point(alpha = 0.1, size = 0.5) +
                ylab("DGR of individual plant (Control)") + xlab("Total Time (days)") +
                stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group=Genotype), alpha=0.3)  +
                stat_summary(fun=mean, aes(group= Genotype),  size=0.7, geom="line", linetype = "dashed") +
                stat_compare_means(aes(group = Genotype), label = "p.signif", method = "t.test") +
                scale_x_continuous(breaks=seq(0,max(deco_data_clean$area),by=2500)) +
                scale_color_manual(values = c("#666666","#E7298A"))
  assign(paste0("DGR_Control_",str_replace(Genotype[i], "-","_")),tempDGR_graph)
}

plot_grid(DGR_Control_CP.GR4_2, DGR_Control_CP.EVT2_1, DGR_Control_CP.NPQ1_1, DGR_Control_CP.EVT5_1, 
          DGR_Control_CP.NPQ6_4, DGR_Control_CP.EVT9_2, DGR_Control_CP.EVT6_2, DGR_Control_CP.NPQ4_1, 
          DGR_Control_CP.EVT4_1, DGR_Control_CP.NPQ5_2, DGR_Control_CP.EVT4_2, DGR_Control_CP.NPQ7_1, 
          DGR_Control_CP.EVT7, DGR_Control_CP.EVT1_1, DGR_Control_CP.GR2_2, DGR_Control_CP.GR5,
          DGR_Control_CP.GR4_1, DGR_Control_CP.GR3_1, DGR_Control_CP.EVT3_1, DGR_Control_CP.EVT2_2,
          DGR_Control_CP.NPQ6_5, DGR_Control_CP.GR2_1, DGR_Control_CP.NPQ6_3, DGR_Control_CP.NPQ1_2,
          DGR_Control_CP.GR2_3, DGR_Control_CP.GR3_2,DGR_Control_CP.NPQ5_1, DGR_Control_CP.EVT8, DGR_Control_CP.EVT9_1,
          DGR_Control_CP.EVT1_2)

Pairwise comparison of DGR relative to the Col-0 genotype under the drought condition

### extract the drought dataset
growth_data_drought <- growth_data[growth_data$Treatment == "Drought",]

for(i in 1:length(Genotype)){
  DGR_temp <- growth_data_drought[growth_data_drought$Genotype == "Col-0" | growth_data_drought$Genotype == Genotype[i],]
  tempDGR_graph <- ggplot(data=DGR_temp, aes(x= Index, y=Slope, group = ID, color = Genotype)) + 
                theme_classic() +
                geom_line(alpha = 0.2) +
                geom_point(alpha = 0.1, size = 0.5) +
                ylab("DGR of individual plant (Drought)") + xlab("Total Time (days)") +
                stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group=Genotype), alpha=0.3)  +
                stat_summary(fun=mean, aes(group= Genotype),  size=0.7, geom="line", linetype = "dashed") +
                stat_compare_means(aes(group = Genotype), label = "p.signif", method = "t.test") +
                scale_x_continuous(breaks=seq(0,max(deco_data_clean$area),by=2500)) +
                scale_color_manual(values = c("#666666","#E7298A"))
  assign(paste0("DGR_Drought_",str_replace(Genotype[i], "-","_")),tempDGR_graph)
}

plot_grid(DGR_Drought_CP.GR4_2, DGR_Drought_CP.EVT2_1, DGR_Drought_CP.NPQ1_1, DGR_Drought_CP.EVT5_1, 
          DGR_Drought_CP.NPQ6_4, DGR_Drought_CP.EVT9_2, DGR_Drought_CP.EVT6_2, DGR_Drought_CP.NPQ4_1, 
          DGR_Drought_CP.EVT4_1, DGR_Drought_CP.NPQ5_2, DGR_Drought_CP.EVT4_2, DGR_Drought_CP.NPQ7_1, 
          DGR_Drought_CP.EVT7, DGR_Drought_CP.EVT1_1, DGR_Drought_CP.GR2_2, DGR_Drought_CP.GR5,
          DGR_Drought_CP.GR4_1, DGR_Drought_CP.GR3_1, DGR_Drought_CP.EVT3_1, DGR_Drought_CP.EVT2_2,
          DGR_Drought_CP.NPQ6_5, DGR_Drought_CP.GR2_1, DGR_Drought_CP.NPQ6_3, DGR_Drought_CP.NPQ1_2,
          DGR_Drought_CP.GR2_3, DGR_Drought_CP.GR3_2,DGR_Drought_CP.NPQ5_1, DGR_Drought_CP.EVT8, DGR_Drought_CP.EVT9_1,
          DGR_Drought_CP.EVT1_2)