This notebook is constructed to guide the user through the data analysis and curation of the output generated by high-throughput phenotyping platform at KAUST.

The example datafiles that we use in this workshop are avaialable here.

The data is composed of Arabidopsis plants grown under control and salt stress conditions. The start of the experiment is

Data import

Let’s start by making sure that our data is loaded correctly from the correct folder

# Where is your working directory (wd) at the moment?
getwd()

## [1] "/Users/magdalena/Dropbox/Data and analysis/PSI/BigSalt2"

# Change wd into the location you have your .csv files stored:
setwd("/Users/magdalena/Dropbox/Data and analysis/PSI/BigSalt2/")
list.files()

##  [1] "20191112_PSI_workshop_material.nb.html"
##  [2] "20191112_PSI_workshop_material.Rmd"    
##  [3] "all_growth_clean.pdf"                  
##  [4] "Clean_Growth_factors_and_SIIT.csv"     
##  [5] "Col0 _Area_raw_data.pdf"               
##  [6] "data_export.png"                       
##  [7] "Fc_Plant.csv"                          
##  [8] "FC1"                                   
##  [9] "M1 _Area_raw_data.pdf"                 
## [10] "M11 _Area_raw_data.pdf"                
## [11] "M12 _Area_raw_data.pdf"                
## [12] "M13 _Area_raw_data.pdf"                
## [13] "M14 _Area_raw_data.pdf"                
## [14] "M15 _Area_raw_data.pdf"                
## [15] "M16 _Area_raw_data.pdf"                
## [16] "M17 _Area_raw_data.pdf"                
## [17] "M18 _Area_raw_data.pdf"                
## [18] "M19 _Area_raw_data.pdf"                
## [19] "M2 _Area_raw_data.pdf"                 
## [20] "M20 _Area_raw_data.pdf"                
## [21] "M21 _Area_raw_data.pdf"                
## [22] "M22 _Area_raw_data.pdf"                
## [23] "M23 _Area_raw_data.pdf"                
## [24] "M24 _Area_raw_data.pdf"                
## [25] "M25 _Area_raw_data.pdf"                
## [26] "M26 _Area_raw_data.pdf"                
## [27] "M27 _Area_raw_data.pdf"                
## [28] "M3 _Area_raw_data.pdf"                 
## [29] "M4 _Area_raw_data.pdf"                 
## [30] "M5 _Area_raw_data.pdf"                 
## [31] "M6 _Area_raw_data.pdf"                 
## [32] "M7 _Area_raw_data.pdf"                 
## [33] "M8 _Area_raw_data.pdf"                 
## [34] "M9 _Area_raw_data.pdf"                 
## [35] "Rgb_Color_Plant.csv"                   
## [36] "Rgb_Morpho_Plant.csv"                  
## [37] "RGB2"                                  
## [38] "SC1"                                   
## [39] "Subset1_growth_clean.pdf"              
## [40] "Subset1_growth_SIIT.pdf"

Let’s load individual datasets into R:

morpho_data <- read.csv("Rgb_Morpho_Plant.csv")
color_data <- read.csv("Rgb_Color_Plant.csv")
fc_data <- read.csv("Fc_Plant.csv")

# Have a look at the tables by typing:
head(morpho_data)

Outlier selection

First - let’s subset the FC for the directly measured trait in the light curve protocol:

head(fc_data)

fc_reliable <- subset(fc_data, select = c(Tray.ID, Position, Measuring.Date, Measuring.Time, Tray.Info, Plant.ID, Plant.Info, 
                  Size,Fo, Fm, Fm_Lss1, Fm_Lss2, Fm_Lss3, Fm_Lss4, Fm_Lss5, Fm_Lss6, 
                  Ft_Lss1, Ft_Lss2, Ft_Lss3, Ft_Lss4, Ft_Lss5, Ft_Lss6, 
                  Fo_Lss1, Fo_Lss2, Fo_Lss3, Fo_Lss4, Fo_Lss5, Fo_Lss6))

fc_reliable <- na.omit(fc_reliable)

Then - the assumptions are that the value of Fm > Fo, and that’s how we calculate Fv (dark adapted state)

We also do the same for light adapted state using Fm_LssX > Fo_LssX and Ft_LssX > Fo_LssX

If these asumptions are not met - Fv is replaced with “missing value”

for(i in 1:nrow(fc_reliable)){
  
  if(fc_reliable$Fm[i] > fc_reliable$Fo[i]){
    fc_reliable$Fv[i] <- fc_reliable$Fm[i] - fc_reliable$Fo[i]  
  }
  else{
    fc_reliable$Fv[i] <- NaN
  }
  
  if(fc_reliable$Fo_Lss1[i] > 0){
    if(fc_reliable$Fm_Lss1[i] > fc_reliable$Fo_Lss1[i]){
      fc_reliable$Fv_Lss1[i] <- fc_reliable$Fm_Lss1[i] - fc_reliable$Fo_Lss1[i]  
    }
    else{
      fc_reliable$Fv_Lss1[i] <- NaN
    }}
  else{
    fc_reliable$Fv_Lss1[i] <- NaN
  }
  
  
  if(fc_reliable$Fo_Lss2[i] > 0){
    if(fc_reliable$Fm_Lss2[i] > fc_reliable$Fo_Lss2[i]){
      fc_reliable$Fv_Lss2[i] <- fc_reliable$Fm_Lss2[i] - fc_reliable$Fo_Lss2[i]  
    }
    else{
      fc_reliable$Fv_Lss1[i] <- NaN
    }}
  else{
    fc_reliable$Fv_Lss2[i] <- NaN
  }
  
  
  if(fc_reliable$Fo_Lss3[i] > 0){
    if(fc_reliable$Fm_Lss3[i] > fc_reliable$Fo_Lss3[i]){
      fc_reliable$Fv_Lss3[i] <- fc_reliable$Fm_Lss3[i] - fc_reliable$Fo_Lss3[i]  
    }
    else{
      fc_reliable$Fv_Lss3[i] <- NaN
    }}
  else{
    fc_reliable$Fv_Lss3[i] <- NaN
  }
  
  
  if(fc_reliable$Fo_Lss4[i] > 0){
    if(fc_reliable$Fm_Lss4[i] > fc_reliable$Fo_Lss4[i]){
      fc_reliable$Fv_Lss4[i] <- fc_reliable$Fm_Lss4[i] - fc_reliable$Fo_Lss4[i]  
    }
    else{
      fc_reliable$Fv_Lss4[i] <- NaN
    }}
  else{
    fc_reliable$Fv_Lss4[i] <- NaN
  }
  
  
  if(fc_reliable$Fo_Lss5[i] > 0){
    if(fc_reliable$Fm_Lss5[i] > fc_reliable$Fo_Lss5[i]){
      fc_reliable$Fv_Lss5[i] <- fc_reliable$Fm_Lss5[i] - fc_reliable$Fo_Lss5[i]  
    }
    else{
      fc_reliable$Fv_Lss5[i] <- NaN
    }}
  else{
    fc_reliable$Fv_Lss5[i] <- NaN
  }
  
  
  if(fc_reliable$Fo_Lss6[i] > 0){
    if(fc_reliable$Fm_Lss6[i] > fc_reliable$Fo_Lss6[i]){
      fc_reliable$Fv_Lss6[i] <- fc_reliable$Fm_Lss6[i] - fc_reliable$Fo_Lss6[i]  
    }
    else{
      fc_reliable$Fv_Lss6[i] <- NaN
    }}
  else{
    fc_reliable$Fv_Lss6[i] <- NaN
  }
  
  if(fc_reliable$Ft_Lss1[i] > fc_reliable$Fo_Lss1[i]){
    fc_reliable$qP_Lss1[i] <- (fc_reliable$Fm_Lss1[i] - fc_reliable$Ft_Lss1[i])/(fc_reliable$Fm_Lss1[i]-fc_reliable$Fo_Lss1[i])
  }
  else{
    fc_reliable$qP_Lss1[i] <- NaN
  }
  
  if(fc_reliable$Ft_Lss2[i] > fc_reliable$Fo_Lss2[i]){
    fc_reliable$qP_Lss2[i] <- (fc_reliable$Fm_Lss2[i] - fc_reliable$Ft_Lss2[i])/(fc_reliable$Fm_Lss2[i]-fc_reliable$Fo_Lss2[i])
  }
  else{
    fc_reliable$qP_Lss2[i] <- NaN
  }
  
  if(fc_reliable$Ft_Lss3[i] > fc_reliable$Fo_Lss3[i]){
    fc_reliable$qP_Lss3[i] <- (fc_reliable$Fm_Lss3[i] - fc_reliable$Ft_Lss3[i])/(fc_reliable$Fm_Lss3[i]-fc_reliable$Fo_Lss3[i])
  }
  else{
    fc_reliable$qP_Lss3[i] <- NaN
  }
  
  if(fc_reliable$Ft_Lss4[i] > fc_reliable$Fo_Lss4[i]){
    fc_reliable$qP_Lss4[i] <- (fc_reliable$Fm_Lss4[i] - fc_reliable$Ft_Lss4[i])/(fc_reliable$Fm_Lss4[i]-fc_reliable$Fo_Lss4[i])
  }
  else{
    fc_reliable$qP_Lss4[i] <- NaN
  }
  
  if(fc_reliable$Ft_Lss5[i] > fc_reliable$Fo_Lss5[i]){
    fc_reliable$qP_Lss5[i] <- (fc_reliable$Fm_Lss5[i] - fc_reliable$Ft_Lss5[i])/(fc_reliable$Fm_Lss5[i]-fc_reliable$Fo_Lss5[i])
  }
  else{
    fc_reliable$qP_Lss5[i] <- NaN
  }
  
  if(fc_reliable$Ft_Lss6[i] > fc_reliable$Fo_Lss6[i]){
    fc_reliable$qP_Lss6[i] <- (fc_reliable$Fm_Lss6[i] - fc_reliable$Ft_Lss6[i])/(fc_reliable$Fm_Lss6[i]-fc_reliable$Fo_Lss6[i])
  }
  else{
    fc_reliable$qP_Lss6[i] <- NaN
  }
}

fc_reliable <- na.omit(fc_reliable)

fc_reliable$QY_max <- fc_reliable$Fv / fc_reliable$Fm

fc_reliable$QY_Lss1 <- (fc_reliable$Fm_Lss1 - fc_reliable$Ft_Lss1) / fc_reliable$Fm
fc_reliable$QY_Lss2 <- (fc_reliable$Fm_Lss2 - fc_reliable$Ft_Lss2) / fc_reliable$Fm
fc_reliable$QY_Lss3 <- (fc_reliable$Fm_Lss3 - fc_reliable$Ft_Lss3) / fc_reliable$Fm
fc_reliable$QY_Lss4 <- (fc_reliable$Fm_Lss4 - fc_reliable$Ft_Lss4) / fc_reliable$Fm
fc_reliable$QY_Lss5 <- (fc_reliable$Fm_Lss5 - fc_reliable$Ft_Lss5) / fc_reliable$Fm
fc_reliable$QY_Lss6 <- (fc_reliable$Fm_Lss6 - fc_reliable$Ft_Lss6) / fc_reliable$Fm

fc_reliable$FvFm_Lss1 <- (fc_reliable$Fm_Lss1 - fc_reliable$Fo_Lss1) / fc_reliable$Fm_Lss1
fc_reliable$FvFm_Lss2 <- (fc_reliable$Fm_Lss2 - fc_reliable$Fo_Lss2) / fc_reliable$Fm_Lss2
fc_reliable$FvFm_Lss3 <- (fc_reliable$Fm_Lss3 - fc_reliable$Fo_Lss3) / fc_reliable$Fm_Lss3
fc_reliable$FvFm_Lss4 <- (fc_reliable$Fm_Lss4 - fc_reliable$Fo_Lss4) / fc_reliable$Fm_Lss4
fc_reliable$FvFm_Lss5 <- (fc_reliable$Fm_Lss5 - fc_reliable$Fo_Lss5) / fc_reliable$Fm_Lss5
fc_reliable$FvFm_Lss6 <- (fc_reliable$Fm_Lss6 - fc_reliable$Fo_Lss6) / fc_reliable$Fm_Lss6

fc_reliable$NPQ_Lss1 <- (fc_reliable$Fm - fc_reliable$Fm_Lss1)/fc_reliable$Fm_Lss1
fc_reliable$NPQ_Lss2 <- (fc_reliable$Fm - fc_reliable$Fm_Lss2)/fc_reliable$Fm_Lss2
fc_reliable$NPQ_Lss3 <- (fc_reliable$Fm - fc_reliable$Fm_Lss3)/fc_reliable$Fm_Lss3
fc_reliable$NPQ_Lss4 <- (fc_reliable$Fm - fc_reliable$Fm_Lss4)/fc_reliable$Fm_Lss4
fc_reliable$NPQ_Lss5 <- (fc_reliable$Fm - fc_reliable$Fm_Lss5)/fc_reliable$Fm_Lss5
fc_reliable$NPQ_Lss6 <- (fc_reliable$Fm - fc_reliable$Fm_Lss6)/fc_reliable$Fm_Lss6

fc_reliable$Fq_Lss1 <- fc_reliable$Fm_Lss1 - fc_reliable$Ft_Lss1
fc_reliable$Fq_Lss2 <- fc_reliable$Fm_Lss2 - fc_reliable$Ft_Lss2
fc_reliable$Fq_Lss3 <- fc_reliable$Fm_Lss3 - fc_reliable$Ft_Lss3
fc_reliable$Fq_Lss4 <- fc_reliable$Fm_Lss4 - fc_reliable$Ft_Lss4
fc_reliable$Fq_Lss5 <- fc_reliable$Fm_Lss5 - fc_reliable$Ft_Lss5
fc_reliable$Fq_Lss6 <- fc_reliable$Fm_Lss6 - fc_reliable$Ft_Lss6

Subsequently, what I usually do is to re-calculate the measuring date into the “Days after stress” of “Days of the experiment”. Usually - we need to get that from the date which is stored in collumn called “Measuring.Date” (please note - that depending on the downloaded version this column can be also named “Measuring_Date”)

What we need to do is to convert this column into the numerical values - but to do this - we need to first change it into a character - to be able to split the numbers making up the date:

unique(fc_reliable$Measuring.Date)

##  [1] 10/14/19 10/15/19 10/16/19 10/17/19 10/18/19 10/19/19 10/20/19
##  [8] 10/21/19 10/22/19 10/23/19 10/24/19 10/25/19 10/26/19 10/27/19
## [15] 10/28/19 10/29/19 10/30/19 10/31/19 11/1/19  11/2/19  11/3/19 
## [22] 11/4/19 
## 22 Levels: 10/14/19 10/15/19 10/16/19 10/17/19 10/18/19 ... 11/4/19

fc_reliable$Measuring.Date <- as.character(fc_reliable$Measuring.Date)
head(fc_reliable)

fc_reliable$Measuring.Date <- strsplit(fc_reliable$Measuring.Date, "/")
unique(fc_reliable$Measuring.Date)

## [[1]]
## [1] "10" "14" "19"
## 
## [[2]]
## [1] "10" "15" "19"
## 
## [[3]]
## [1] "10" "16" "19"
## 
## [[4]]
## [1] "10" "17" "19"
## 
## [[5]]
## [1] "10" "18" "19"
## 
## [[6]]
## [1] "10" "19" "19"
## 
## [[7]]
## [1] "10" "20" "19"
## 
## [[8]]
## [1] "10" "21" "19"
## 
## [[9]]
## [1] "10" "22" "19"
## 
## [[10]]
## [1] "10" "23" "19"
## 
## [[11]]
## [1] "10" "24" "19"
## 
## [[12]]
## [1] "10" "25" "19"
## 
## [[13]]
## [1] "10" "26" "19"
## 
## [[14]]
## [1] "10" "27" "19"
## 
## [[15]]
## [1] "10" "28" "19"
## 
## [[16]]
## [1] "10" "29" "19"
## 
## [[17]]
## [1] "10" "30" "19"
## 
## [[18]]
## [1] "10" "31" "19"
## 
## [[19]]
## [1] "11" "1"  "19"
## 
## [[20]]
## [1] "11" "2"  "19"
## 
## [[21]]
## [1] "11" "3"  "19"
## 
## [[22]]
## [1] "11" "4"  "19"

unique(morpho_data$Measuring.Date)

##  [1] 10/14/19 10/15/19 10/16/19 10/17/19 10/18/19 10/19/19 10/20/19
##  [8] 10/21/19 10/22/19 10/23/19 10/24/19 10/25/19 10/26/19 10/27/19
## [15] 10/28/19 10/29/19 10/30/19 10/31/19 11/1/19  11/2/19  11/3/19 
## [22] 11/4/19 
## 22 Levels: 10/14/19 10/15/19 10/16/19 10/17/19 10/18/19 ... 11/4/19

morpho_data$Measuring.Date <- as.character(morpho_data$Measuring.Date)
head(morpho_data)

morpho_data$Measuring.Date <- strsplit(morpho_data$Measuring.Date, "/")
unique(morpho_data$Measuring.Date)

## [[1]]
## [1] "10" "14" "19"
## 
## [[2]]
## [1] "10" "15" "19"
## 
## [[3]]
## [1] "10" "16" "19"
## 
## [[4]]
## [1] "10" "17" "19"
## 
## [[5]]
## [1] "10" "18" "19"
## 
## [[6]]
## [1] "10" "19" "19"
## 
## [[7]]
## [1] "10" "20" "19"
## 
## [[8]]
## [1] "10" "21" "19"
## 
## [[9]]
## [1] "10" "22" "19"
## 
## [[10]]
## [1] "10" "23" "19"
## 
## [[11]]
## [1] "10" "24" "19"
## 
## [[12]]
## [1] "10" "25" "19"
## 
## [[13]]
## [1] "10" "26" "19"
## 
## [[14]]
## [1] "10" "27" "19"
## 
## [[15]]
## [1] "10" "28" "19"
## 
## [[16]]
## [1] "10" "29" "19"
## 
## [[17]]
## [1] "10" "30" "19"
## 
## [[18]]
## [1] "10" "31" "19"
## 
## [[19]]
## [1] "11" "1"  "19"
## 
## [[20]]
## [1] "11" "2"  "19"
## 
## [[21]]
## [1] "11" "3"  "19"
## 
## [[22]]
## [1] "11" "4"  "19"

unique(color_data$Measuring.Date)

##  [1] 10/14/19 10/15/19 10/16/19 10/17/19 10/18/19 10/19/19 10/20/19
##  [8] 10/21/19 10/22/19 10/23/19 10/24/19 10/25/19 10/26/19 10/27/19
## [15] 10/28/19 10/29/19 10/30/19 10/31/19 11/1/19  11/2/19  11/3/19 
## [22] 11/4/19 
## 22 Levels: 10/14/19 10/15/19 10/16/19 10/17/19 10/18/19 ... 11/4/19

color_data$Measuring.Date <- as.character(color_data$Measuring.Date)
head(color_data)

color_data$Measuring.Date <- strsplit(color_data$Measuring.Date, "/")
unique(color_data$Measuring.Date)

## [[1]]
## [1] "10" "14" "19"
## 
## [[2]]
## [1] "10" "15" "19"
## 
## [[3]]
## [1] "10" "16" "19"
## 
## [[4]]
## [1] "10" "17" "19"
## 
## [[5]]
## [1] "10" "18" "19"
## 
## [[6]]
## [1] "10" "19" "19"
## 
## [[7]]
## [1] "10" "20" "19"
## 
## [[8]]
## [1] "10" "21" "19"
## 
## [[9]]
## [1] "10" "22" "19"
## 
## [[10]]
## [1] "10" "23" "19"
## 
## [[11]]
## [1] "10" "24" "19"
## 
## [[12]]
## [1] "10" "25" "19"
## 
## [[13]]
## [1] "10" "26" "19"
## 
## [[14]]
## [1] "10" "27" "19"
## 
## [[15]]
## [1] "10" "28" "19"
## 
## [[16]]
## [1] "10" "29" "19"
## 
## [[17]]
## [1] "10" "30" "19"
## 
## [[18]]
## [1] "10" "31" "19"
## 
## [[19]]
## [1] "11" "1"  "19"
## 
## [[20]]
## [1] "11" "2"  "19"
## 
## [[21]]
## [1] "11" "3"  "19"
## 
## [[22]]
## [1] "11" "4"  "19"

Subsequently - we will apply a formula to re-calculate the Days After Stress.

In our example experiment, the first day of measurement (and also 1st day after stress) is 20th of October, and thus the calculations go as follow:

fc_reliable$delta_days <- sapply(fc_reliable$Measuring.Date, function(x){
  x= as.numeric(x)
  # in here please replace month number (10) of your first measurement in the the first part of the equation 
  # and the day of the month (20) in the second part of the equation
  (x[1]-10)*30+x[2]-20
})

head(fc_reliable)

# make sure that your delta_days column is seen as numeric by executing the following:
fc_reliable$delta_days <- as.numeric(fc_reliable$delta_days)

color_data$delta_days <- sapply(color_data$Measuring.Date, function(x){
  x= as.numeric(x)
  # in here please replace month number (10) of your first measurement in the the first part of the equation 
  # and the day of the month (20) in the second part of the equation
  (x[1]-10)*30+x[2]-20
})

# make sure that your delta_days column is seen as numeric by executing the following:
color_data$delta_days <- as.numeric(color_data$delta_days)

morpho_data$delta_days <- sapply(morpho_data$Measuring.Date, function(x){
  x= as.numeric(x)
  # in here please replace month number (10) of your first measurement in the the first part of the equation 
  # and the day of the month (20) in the second part of the equation
  (x[1]-10)*30+x[2]-20
})

# make sure that your delta_days column is seen as numeric by executing the following:
morpho_data$delta_days <- as.numeric(morpho_data$delta_days)

And then - the problem is that we have different periods measured - one in the morning and one at night. So for this we need to do a smilar approach to what we did with the date

fc_reliable$Measuring.Time <- as.character(fc_reliable$Measuring.Time)
fc_reliable$Measuring.Time <- strsplit(fc_reliable$Measuring.Time, ":")

fc_reliable$measuring_h <- sapply(fc_reliable$Measuring.Time, function(x){
  x= as.numeric(x)
  x[1]
})
fc_reliable$measuring_h <- as.numeric(fc_reliable$measuring_h)
fc_day_data <- subset(fc_reliable, fc_reliable$measuring_h > 9 & fc_reliable$measuring_h < 15)
fc_night_data <- subset(fc_reliable, fc_reliable$measuring_h < 9 | fc_reliable$measuring_h > 15)

color_data$Measuring.Time <- as.character(color_data$Measuring.Time)
color_data$Measuring.Time <- strsplit(color_data$Measuring.Time, ":")

color_data$measuring_h <- sapply(color_data$Measuring.Time, function(x){
  x= as.numeric(x)
  x[1]
})
color_data$measuring_h <- as.numeric(color_data$measuring_h)
color_day_data <- subset(color_data, color_data$measuring_h > 9 & color_data$measuring_h < 15)
color_night_data <- subset(color_data, color_data$measuring_h < 9 | color_data$measuring_h > 15)

morpho_data$Measuring.Time <- as.character(morpho_data$Measuring.Time)
morpho_data$Measuring.Time <- strsplit(morpho_data$Measuring.Time, ":")

morpho_data$measuring_h <- sapply(morpho_data$Measuring.Time, function(x){
  x= as.numeric(x)
  x[1]
})
morpho_data$measuring_h <- as.numeric(morpho_data$measuring_h)
morpho_day_data <- subset(morpho_data, morpho_data$measuring_h > 9 & morpho_data$measuring_h < 15)
morpho_night_data <- subset(morpho_data, morpho_data$measuring_h < 9 | morpho_data$measuring_h > 15)

Combining data into final files

First let’s get rid of “measuring day” and “measuring time” columns for each of the datasets:

colnames(morpho_day_data)

##  [1] "Measuring.Date"  "Measuring.Time"  "Experiment.ID"  
##  [4] "Round.Order"     "Tray.ID"         "Tray.Info"      
##  [7] "Plant.ID"        "Position"        "Plant.Name"     
## [10] "Plant.Info"      "PID"             "Camera.Position"
## [13] "AREA_PX"         "AREA_MM"         "PERIMETER_PX"   
## [16] "PERIMETER_MM"    "ROUNDNESS"       "ROUNDNESS2"     
## [19] "ISOTROPY"        "COMPACTNESS"     "ECCENTRICITY"   
## [22] "RMS"             "SOL"             "delta_days"     
## [25] "measuring_h"

morpho_day <- morpho_day_data[,c(5:10,24,13:23)]
morpho_night <- morpho_night_data[,c(5:10,24,13:23)]
head(morpho_day)

colnames(color_day_data)

##  [1] "Measuring.Date"  "Measuring.Time"  "Experiment.ID"  
##  [4] "Round.Order"     "Tray.ID"         "Tray.Info"      
##  [7] "Plant.ID"        "Position"        "Plant.Name"     
## [10] "Plant.Info"      "PID"             "Camera.Position"
## [13] "RGB1"            "RGB2"            "RGB3"           
## [16] "RGB4"            "RGB5"            "RGB6"           
## [19] "RGB7"            "RGB8"            "RGB9"           
## [22] "delta_days"      "measuring_h"

color_day <- color_day_data[,c(5:10,22,13:21)]
color_night <- color_night_data[,c(5:10,22,13:21)]
head(color_day)

colnames(fc_day_data)

##  [1] "Tray.ID"        "Position"       "Measuring.Date" "Measuring.Time"
##  [5] "Tray.Info"      "Plant.ID"       "Plant.Info"     "Size"          
##  [9] "Fo"             "Fm"             "Fm_Lss1"        "Fm_Lss2"       
## [13] "Fm_Lss3"        "Fm_Lss4"        "Fm_Lss5"        "Fm_Lss6"       
## [17] "Ft_Lss1"        "Ft_Lss2"        "Ft_Lss3"        "Ft_Lss4"       
## [21] "Ft_Lss5"        "Ft_Lss6"        "Fo_Lss1"        "Fo_Lss2"       
## [25] "Fo_Lss3"        "Fo_Lss4"        "Fo_Lss5"        "Fo_Lss6"       
## [29] "Fv"             "Fv_Lss1"        "Fv_Lss2"        "Fv_Lss3"       
## [33] "Fv_Lss4"        "Fv_Lss5"        "Fv_Lss6"        "qP_Lss1"       
## [37] "qP_Lss2"        "qP_Lss3"        "qP_Lss4"        "qP_Lss5"       
## [41] "qP_Lss6"        "QY_max"         "QY_Lss1"        "QY_Lss2"       
## [45] "QY_Lss3"        "QY_Lss4"        "QY_Lss5"        "QY_Lss6"       
## [49] "FvFm_Lss1"      "FvFm_Lss2"      "FvFm_Lss3"      "FvFm_Lss4"     
## [53] "FvFm_Lss5"      "FvFm_Lss6"      "NPQ_Lss1"       "NPQ_Lss2"      
## [57] "NPQ_Lss3"       "NPQ_Lss4"       "NPQ_Lss5"       "NPQ_Lss6"      
## [61] "Fq_Lss1"        "Fq_Lss2"        "Fq_Lss3"        "Fq_Lss4"       
## [65] "Fq_Lss5"        "Fq_Lss6"        "delta_days"     "measuring_h"

fc_day <- fc_day_data[,c(5:7,67,9:66)]
fc_night <- fc_night_data[,c(5:7,67,9:66)]
head(fc_day)

all_day <- merge(morpho_day, color_day, by=c("Tray.ID","Tray.Info", "Plant.ID", "Position",  "Plant.Name", "Plant.Info", "delta_days"))
all_night <- merge(morpho_night, color_night, by=c("Tray.ID","Tray.Info", "Plant.ID", "Position",  "Plant.Name", "Plant.Info", "delta_days"))

all_day <- merge(all_day, fc_day, by=c("Tray.Info", "Plant.ID", "Plant.Info", "delta_days"))
all_night <- merge(all_night, fc_night, by=c("Tray.Info", "Plant.ID", "Plant.Info", "delta_days"))
head(morpho_data)

# Remove rows containing missing data:
all_day_nona <- na.omit(all_day)
all_night_nona <- na.omit(all_night)
head(all_day_nona)

max(all_day_nona$QY_max)

## [1] 0.8665241

max(all_day_nona$FvFm_Lss1)

## [1] 0.864046

max(all_day_nona$qP_Lss1)

## [1] 0.9999984

max(all_day_nona$NPQ_Lss1)

## [1] 0.08774152

Further selection for samples with all days measured

As I am getting a bit confused by now which collumns contain data and which don;t - I want to make a bit of a clean-up. So let’s look at column names to select the traits that I want to keep and put them in right order:

colnames(all_day_nona)[1] <- "Treatment"
colnames(all_day_nona)[3] <- "Genotype"
colnames(all_day_nona)[4] <- "days"
head(all_day_nona)

colnames(all_night_nona)[1] <- "Treatment"
colnames(all_night_nona)[3] <- "Genotype"
colnames(all_night_nona)[4] <- "days"
head(all_night_nona)

Visual removal of samples based on Area

library(ggplot2)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

Area_lgraph <- ggplot(data=all_day_nona, aes(x= days, y=AREA_MM, group = Plant.ID)) 
Area_lgraph <- Area_lgraph + geom_line() 
#Area_lgraph <- Area_lgraph + ylim (0, 2000)
# geom_smooth(method="lm", aes(color="Exp Model"), formula= (y ~ exp(x)), se=FALSE, linetype = 1) +
Area_lgraph <- Area_lgraph + facet_grid(~ Treatment) 
Area_lgraph <- Area_lgraph + ylab(expression(paste("Area (", mm^2, ")", sep = ""))) + xlab("Days After Stress") + theme(legend.position='none')
Area_lgraph

ggplotly(Area_lgraph)

## Warning: The `printer` argument is deprecated as of rlang 0.3.0.
## This warning is displayed once per session.

# Outlier curation:

all_genotypes <- unique(all_day_nona$Genotype)
all_day_nona$days <- as.factor(all_day_nona$days)
length(all_genotypes)

## [1] 27

head(all_day_nona)

unique(all_day_nona$Treatment)

## [1] Control Salt   
## Levels: Control Salt

all_genotypes

##  [1] M7   M1   M5   M9   M3   M8   M2   M6   Col0 M4   M22  M26  M20  M24 
## [15] M23  M27  M21  M25  M19  M15  M13  M14  M17  M18  M12  M16  M11 
## 28 Levels: Col0 M1 M10 M11 M12 M13 M14 M15 M16 M17 M18 M19 M2 M20 ... M9

# There are 27 genotypes:

# because I am lazy - let's loop all the genotypes:

for(i in 1:27){
  temp_data <- subset(all_day_nona, all_day_nona$Genotype == all_genotypes[i]) 
  Area_lgraph <- ggplot(data=temp_data, aes(x= days, y=AREA_MM, group = Plant.ID, color = Treatment)) 
  Area_lgraph <- Area_lgraph + geom_line(alpha=0.5) 
  Area_lgraph <- Area_lgraph + stat_summary(fun.y = mean, aes(group=Treatment), size=1.5, geom="line", linetype="dashed")
  Area_lgraph <- Area_lgraph + stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group=Treatment), alpha=0.3)
  Area_lgraph <- Area_lgraph + facet_grid(~ Treatment) + scale_color_manual(values=c("turquoise3", "maroon3"))
  Area_lgraph <- Area_lgraph + labs(title = as.character(all_genotypes[i])) + ylim(0,1700)
  Area_lgraph <- Area_lgraph + ylab(expression(paste("Area (", mm^2, ")", sep = ""))) + xlab("time (days)") + theme(legend.position='none')
  Area_lgraph
  pdf(paste(all_genotypes[i],"_Area_raw_data.pdf"), width=20, height = 10) 
  plot(Area_lgraph)
  dev.off()
}

## Warning: Removed 3 rows containing non-finite values (stat_summary).

## Warning: Removed 3 rows containing non-finite values (stat_summary).

## Warning: Removed 3 rows containing missing values (geom_path).

## Warning: Removed 12 rows containing non-finite values (stat_summary).

## Warning: Removed 12 rows containing non-finite values (stat_summary).

## Warning: Removed 10 rows containing missing values (geom_path).

## Warning: Removed 1 rows containing non-finite values (stat_summary).

## Warning: Removed 1 rows containing non-finite values (stat_summary).

## Warning: Removed 1 rows containing missing values (geom_path).

## Warning: Removed 7 rows containing non-finite values (stat_summary).

## Warning: Removed 7 rows containing non-finite values (stat_summary).

## Warning: Removed 3 rows containing missing values (geom_path).

## Warning: Removed 3 rows containing non-finite values (stat_summary).

## Warning: Removed 3 rows containing non-finite values (stat_summary).

## Warning: Removed 3 rows containing missing values (geom_path).

## Warning: Removed 4 rows containing non-finite values (stat_summary).

## Warning: Removed 4 rows containing non-finite values (stat_summary).

## Warning: Removed 4 rows containing missing values (geom_path).

## Warning: Removed 71 rows containing non-finite values (stat_summary).

## Warning: Removed 71 rows containing non-finite values (stat_summary).

## Warning: Removed 61 rows containing missing values (geom_path).

## Warning: Removed 10 rows containing non-finite values (stat_summary).

## Warning: Removed 10 rows containing non-finite values (stat_summary).

## Warning: Removed 10 rows containing missing values (geom_path).

## Warning: Removed 67 rows containing non-finite values (stat_summary).

## Warning: Removed 67 rows containing non-finite values (stat_summary).

## Warning: Removed 63 rows containing missing values (geom_path).

## Warning: Removed 34 rows containing non-finite values (stat_summary).

## Warning: Removed 34 rows containing non-finite values (stat_summary).

## Warning: Removed 30 rows containing missing values (geom_path).

## Warning: Removed 44 rows containing non-finite values (stat_summary).

## Warning: Removed 44 rows containing non-finite values (stat_summary).

## Warning: Removed 34 rows containing missing values (geom_path).

## Warning: Removed 75 rows containing non-finite values (stat_summary).

## Warning: Removed 75 rows containing non-finite values (stat_summary).

## Warning: Removed 75 rows containing missing values (geom_path).

## Warning: Removed 47 rows containing non-finite values (stat_summary).

## Warning: Removed 47 rows containing non-finite values (stat_summary).

## Warning: Removed 41 rows containing missing values (geom_path).

## Warning: Removed 36 rows containing non-finite values (stat_summary).

## Warning: Removed 36 rows containing non-finite values (stat_summary).

## Warning: Removed 30 rows containing missing values (geom_path).

## Warning: Removed 20 rows containing non-finite values (stat_summary).

## Warning: Removed 20 rows containing non-finite values (stat_summary).

## Warning: Removed 20 rows containing missing values (geom_path).

## Warning: Removed 35 rows containing non-finite values (stat_summary).

## Warning: Removed 35 rows containing non-finite values (stat_summary).

## Warning: Removed 27 rows containing missing values (geom_path).

## Warning: Removed 28 rows containing non-finite values (stat_summary).

## Warning: Removed 28 rows containing non-finite values (stat_summary).

## Warning: Removed 20 rows containing missing values (geom_path).

## Warning: Removed 4 rows containing non-finite values (stat_summary).

## Warning: Removed 4 rows containing non-finite values (stat_summary).

## Warning: Removed 4 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing non-finite values (stat_summary).

## Warning: Removed 2 rows containing non-finite values (stat_summary).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 5 rows containing non-finite values (stat_summary).

## Warning: Removed 5 rows containing non-finite values (stat_summary).

## Warning: Removed 5 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing non-finite values (stat_summary).

## Warning: Removed 2 rows containing non-finite values (stat_summary).

## Warning: Removed 2 rows containing missing values (geom_path).

## Warning: Removed 5 rows containing non-finite values (stat_summary).

## Warning: Removed 5 rows containing non-finite values (stat_summary).

## Warning: Removed 5 rows containing missing values (geom_path).

## Warning: Removed 18 rows containing non-finite values (stat_summary).

## Warning: Removed 18 rows containing non-finite values (stat_summary).

## Warning: Removed 15 rows containing missing values (geom_path).

## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to
## adjust the group aesthetic?

# Fill in the remove_pls list the Plant.ID of the samples that you thought look suspicious or downright wrong - and then you can remove them from both day & night datasets by un-hashing the following commands:

 # remove_pls <- c("PS_Tray_023 _ D5", "PS_Tray_115 _ D1", "PS_Tray_006 _ C1", "PS_Tray_023 _ A2", "PS_Tray_115 _ C4", 
 #                 "PS_Tray_047 _ D4", "PS_Tray_072 _ D1", "PS_Tray_063 _ C2", "PS_Tray_063 _ A1", "PS_Tray_094 _ C4",
 #                 "PS_Tray_045 _ B2", "PS_Tray_054 _ B2", "PS_Tray_092 _ D2", "PS_Tray_010 _ A4", "PS_Tray_068 _ D2",
 #                 "PS_Tray_100 _ A2", "PS_Tray_102 _ A4", "PS_Tray_010 _ B3")
  
  # head(all_data_complete)
  
  # all_data_clean <- subset(all_data_complete, !(all_data_complete$PlantID %in% remove_pls))

  # dim(all_data_clean)  
  # dim(all_data_complete)  
  
  # Area_lgraph <- ggplot(data=all_data_complete, aes(x= days, y=AREA_MM, group = PlantID, color = Tray.Info)) 
  # Area_lgraph <- Area_lgraph + geom_line(alpha=0.5) 
  # Area_lgraph <- Area_lgraph + stat_summary(fun.y = mean, aes(group=Tray.Info), size=1.5, geom="line", linetype="dashed")
  # Area_lgraph <- Area_lgraph + stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group=Tray.Info), alpha=0.3)
  # Area_lgraph <- Area_lgraph + facet_grid(~ Tray.Info) + ylim(0,1700)
  # Area_lgraph <- Area_lgraph + labs(title = "Entire experiment") + scale_color_manual(values=c("turquoise3", "maroon3"))
  # Area_lgraph <- Area_lgraph + ylab("Area (mm2)") + xlab("time (days)") + theme(legend.position='none')
  # pdf("Entire_exp_Area_complete_data.pdf", width=20, height = 10)
  # plot(Area_lgraph)
  # dev.off()
  
  # Area_lgraph <- ggplot(data=all_data_clean, aes(x= days, y=AREA_MM, group = PlantID, color = Tray.Info)) 
  # Area_lgraph <- Area_lgraph + geom_line(alpha=0.5) 
  # Area_lgraph <- Area_lgraph + stat_summary(fun.y = mean, aes(group=Tray.Info), size=1.5, geom="line", linetype="dashed")
  # Area_lgraph <- Area_lgraph + stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group=Tray.Info), alpha=0.3)
  # Area_lgraph <- Area_lgraph + facet_grid(~ Tray.Info) + ylim(0,1700)
  # Area_lgraph <- Area_lgraph + labs(title = "Entire experiment") + scale_color_manual(values=c("turquoise3", "maroon3"))
  # Area_lgraph <- Area_lgraph + ylab("Area (mm2)") + xlab("time (days)") + theme(legend.position='none')
  # pdf("Entire_exp_Area_clean_data.pdf", width=20, height = 10)
  # plot(Area_lgraph)
  # dev.off()

# Calculating the growth factors

Let’s calculate the exponential growth factors to fit into the observed increase in rosette area:

First - let’s make an empty table that will contain all the values that we will calculate:

names <- c(text="Plant.ID", "Genotype", "Treatment", "intercept", "delta")
growth_factors <- data.frame()

for (k in names) growth_factors[[k]] <- as.character()


i=1

uni <- subset(all_day_nona, all_day_nona$Plant.ID == unique(all_day_nona$Plant.ID)[i])
uni

# let's get also all the individual identifiers in the table:
growth_factors[i,1] <- as.character(unique(uni$Plant.ID))
growth_factors[i,2] <- as.character(unique(uni$Genotype))
growth_factors[i,3] <- as.character(unique(uni$Treatment))

# let's calculate the model:  
Area_mm2 <- uni$AREA_MM
time_d <- uni$days
model_C <- lm(log(Area_mm2)~ time_d)
# add model parts into the main table with growth factors
growth_factors[i,4] <- as.numeric(model_C$coefficients[[1]])
growth_factors[i,5] <- as.numeric(model_C$coefficients[[2]])

# calculate predicted Area values for this specific sample:
timevalues <- unique(time_d)
timevalues

##  [1] -1 -2 -3 -4 -5 -6 0  1  10 11 12 13 2  3  4  5  6  7  8  9 
## Levels: -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13

Area.pred <- exp(predict(model_C,list(Time=timevalues)))
Area.pred

##           1           2           3           4           5           6 
##   36.976289   25.897148   18.804299   11.903891    8.220008    5.663283 
##           7           8           9          10          11          12 
##   52.041721   60.976512 1020.078213 1192.177427 1192.177427 1192.177427 
##          13          14          15          16          17          18 
## 1192.177427 1192.177427 1192.177427 1192.177427 1192.177427 1301.070517 
##          19          20          21          22          23          24 
## 1359.462812   98.255209  153.843355  198.379851  268.731021  356.594381 
##          25          26          27 
##  472.691680  666.562899  822.330670

uni$Area_pred <- Area.pred
done <- uni

done

growth_factors

# We have in total 621 plants, so let's loop it for all the remaining ones:
for(i in 2:255){
  uni <- subset(all_day_nona, all_day_nona$Plant.ID == unique(all_day_nona$Plant.ID)[i])
  growth_factors[i,1] <- as.character(unique(uni$Plant.ID))
  growth_factors[i,2] <- as.character(unique(uni$Genotype))
  growth_factors[i,3] <- as.character(unique(uni$Treatment))
  
  # let's calculate the model:  
  Area_mm2 <- uni$AREA_MM
  time_d <- uni$days
  model_C <- lm(log(Area_mm2)~ time_d)
  # add model parts into the main table with growth factors
  growth_factors[i,4] <- as.numeric(model_C$coefficients[[1]])
  growth_factors[i,5] <- as.numeric(model_C$coefficients[[2]])
  
  # calculate predicted Area values for this specific sample:
  timevalues <- unique(time_d)
  timevalues
  
  Area.pred <- exp(predict(model_C,list(Time=timevalues)))
  
  uni$Area_pred <- Area.pred
  done <- rbind(done, uni)
}

# Something is off with ID 256 - so let's finish it for the other subset

for(i in 257:621){
  uni <- subset(all_day_nona, all_day_nona$Plant.ID == unique(all_day_nona$Plant.ID)[i])
  growth_factors[i,1] <- as.character(unique(uni$Plant.ID))
  growth_factors[i,2] <- as.character(unique(uni$Genotype))
  growth_factors[i,3] <- as.character(unique(uni$Treatment))
  
  # let's calculate the model:  
  Area_mm2 <- uni$AREA_MM
  time_d <- uni$days
  model_C <- lm(log(Area_mm2)~ time_d)
  # add model parts into the main table with growth factors
  growth_factors[i,4] <- as.numeric(model_C$coefficients[[1]])
  growth_factors[i,5] <- as.numeric(model_C$coefficients[[2]])
  
  # calculate predicted Area values for this specific sample:
  timevalues <- unique(time_d)
  timevalues
  
  Area.pred <- exp(predict(model_C,list(Time=timevalues)))
  
  uni$Area_pred <- Area.pred
  done <- rbind(done, uni)
}

head(growth_factors)

dim(growth_factors)

## [1] 621   5

calculating the SIIT and Visualizing growth factors:

Very often - we would like to examine the relative performance of the plants under salt stress - relative how the genotype grows under control conditions. To do this, we can calculate the Salt Tolerance Index (or for that matter - ANY tolerance index).

We can easily do it automatically by calculating the average growth rate under control condition for each Genotype, match it with plants belonging to the same genotype under salt stress condition, and calculating the performance index:

growth_factors$intercept <- as.numeric(growth_factors$intercept)
growth_factors$delta <- as.numeric(growth_factors$delta)

# Calculate average growth rate for each genotype:

#install.packages("doBy")
library(doBy)

growth_sum <- summaryBy(data = growth_factors, delta ~ Genotype + Treatment)
head(growth_sum)

growth_C <- subset(growth_sum, growth_sum$Treatment == "Control")
head(growth_C)

growth_C <- growth_C[,c(1,3)]
growth_factors2 <- merge(growth_factors, growth_C, by="Genotype", all=T)
unique(growth_factors2$Treatment)

## [1] "Salt"    "Control" NA

head(growth_factors2)

colnames(growth_factors2)[6] <- "Delta_Control.mean"

growth_factors2$SIIT <- growth_factors2$delta / growth_factors2$Delta_Control.mean

head(growth_factors2)

write.csv(growth_factors2, "Clean_Growth_factors_and_SIIT.csv", row.names = F)

So now we can have a look at plant’s performance under salt stress. If we visualize everything all together…

library(ggsci)
library(ggpubr)

## Loading required package: magrittr

library(ggbeeswarm)
library(gapminder)
library(RColorBrewer)
library(ggridges)

## 
## Attaching package: 'ggridges'

## The following object is masked from 'package:ggplot2':
## 
##     scale_discrete_manual

library(cowplot)

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:ggpubr':
## 
##     get_legend

## The following object is masked from 'package:ggplot2':
## 
##     ggsave

growth_factors2 <- subset(growth_factors2, growth_factors2$Treatment == c("Control", "Salt"))

## Warning in growth_factors2$Treatment == c("Control", "Salt"): longer object
## length is not a multiple of shorter object length

my_box_plot <- ggplot(data = growth_factors2, mapping = aes(x = Genotype, y = delta, colour = Genotype)) 
#my_box_plot <- my_box_plot + geom_boxplot()
my_box_plot <- my_box_plot + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot <- my_box_plot + facet_wrap(~ Treatment)
my_box_plot <- my_box_plot + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
my_box_plot <- my_box_plot + theme(legend.position = "none") + ylim(0.2, 0.4)
my_box_plot <- my_box_plot + xlab("") + ylab("RGR") # + scale_colour_manual(values=c("steelblue", "sienna3", "slateblue3", "slateblue3","slateblue3","slateblue3","slateblue3","slateblue3", "firebrick3", "deeppink4"))
my_box_plot <- my_box_plot + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot <- my_box_plot + stat_compare_means(label = "p.signif", method = "t.test", ref.group = "Col0") 
my_box_plot

## Warning: Removed 126 rows containing non-finite values (stat_summary).

## Warning: Removed 126 rows containing non-finite values
## (stat_compare_means).

## Warning: Computation failed in `stat_compare_means()`:
## not enough 'x' observations

## Warning: Removed 62 rows containing missing values (position_beeswarm).

## Warning: Removed 64 rows containing missing values (position_beeswarm).

# Save the graph with these few commands
pdf("all_growth_clean.pdf", width=20, height = 10)
plot(my_box_plot)

## Warning: Removed 126 rows containing non-finite values (stat_summary).

## Warning: Removed 126 rows containing non-finite values
## (stat_compare_means).

## Warning: Computation failed in `stat_compare_means()`:
## not enough 'x' observations

## Warning: Removed 62 rows containing missing values (position_beeswarm).

## Warning: Removed 64 rows containing missing values (position_beeswarm).

dev.off()

## quartz_off_screen 
##                 2

As you see - the above graph is a mess - so let’s have a look at the subset of the data

# Let's divide per groups of individual mutants that need to be analyzed separately:

unique(growth_factors2$Genotype)

##  [1] "Col0" "M1"   "M12"  "M13"  "M14"  "M15"  "M16"  "M17"  "M18"  "M19" 
## [11] "M2"   "M20"  "M21"  "M22"  "M23"  "M24"  "M25"  "M26"  "M27"  "M3"  
## [21] "M4"   "M5"   "M6"   "M7"   "M8"   "M9"

sub1 <- c("Col0", "M12", "M13")

sub1_growth_data <- subset(growth_factors2, growth_factors2$Genotype %in% sub1)

my_box_plot <- ggplot(data = sub1_growth_data, mapping = aes(x = Genotype, y = SIIT, colour = Genotype)) 
#my_box_plot <- my_box_plot + geom_boxplot()
my_box_plot <- my_box_plot + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot <- my_box_plot + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
my_box_plot <- my_box_plot + theme(legend.position = "none")
my_box_plot <- my_box_plot + xlab("") + ylab("faction of RGR at Control")
my_box_plot <- my_box_plot + scale_colour_manual(values=c("steelblue", "sienna3", "deeppink4"))
my_box_plot <- my_box_plot + theme(axis.text.x = element_text(angle=90, vjust=0.5))
my_box_plot <- my_box_plot + stat_compare_means(label = "p.signif", method = "t.test", ref.group = "Col0") 
my_box_plot

pdf("Subset1_growth_SIIT.pdf", width=20, height = 10)
plot(my_box_plot)
dev.off()

## quartz_off_screen 
##                 2

my_box_plot <- ggplot(data = sub1_growth_data, mapping = aes(x = Genotype, y = delta, colour = Genotype)) 
#my_box_plot <- my_box_plot + geom_boxplot()
my_box_plot <- my_box_plot + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot <- my_box_plot + facet_wrap(~ Treatment)
my_box_plot <- my_box_plot + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
my_box_plot <- my_box_plot + theme(legend.position = "none")
my_box_plot <- my_box_plot + xlab("") + ylab("RGR") + scale_colour_manual(values=c("steelblue", "sienna3", "deeppink4"))
my_box_plot <- my_box_plot + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot <- my_box_plot + stat_compare_means(label = "p.signif", method = "t.test", ref.group = "Col0") 
my_box_plot

pdf("Subset1_growth_clean.pdf", width=20, height = 10)
plot(my_box_plot)
dev.off()

## quartz_off_screen 
##                 2

visualizing timeseries of other parameters:

If you would like to visualize changes over time in other parameters - including Area, morphology, Fc, you can run the following timeseries graph, which will test e.g. the difference between your treatment vs. control across the genotypes of interest.

So first prepare your data

all_day_nona_sub1 <- subset(all_day_nona, all_day_nona$Genotype %in% sub1)

head(all_day_nona_sub1)

SOL_graph <- ggplot(data=all_day_nona_sub1, aes(x= days, y=AREA_MM, group = Plant.ID, color = Treatment)) 
SOL_graph <- SOL_graph + geom_line(alpha = 0.3) + scale_colour_manual(values = c("dodgerblue3", "tomato1")) 
SOL_graph <- SOL_graph + stat_summary(fun.y=mean, aes(group= Treatment),  size=0.7, geom="line", linetype = "dashed")
SOL_graph <- SOL_graph + facet_wrap(~ Genotype, ncol = 1)
SOL_graph <- SOL_graph + stat_summary(geom = "ribbon", linetype=0, fun.data = mean_cl_normal, aes(group= Treatment), alpha = 0.2)
SOL_graph <- SOL_graph + theme(legend.position="none")+ labs(color = "Treatment")
SOL_graph <- SOL_graph + ylab("Slenderness of leaves (a.u.)") + xlab("Days After Stress") 
SOL_graph <- SOL_graph + stat_compare_means(aes(group = Treatment), label = "p.signif", method = "t.test", hide.ns = T)
SOL_graph

for other types of graphs and ways to analyze the data - please see the other PSI tutorial

Data Analysis - Plant Phenotyping workshop 13th of November 2019