SPIRO Hormone mutants

This is a pipeline describing the general approach for analyzing the SPIRO data from Jade’s experiment on hormone mutants and their reaction to NAT:

Load data

I m going to load the data from each SPIRO and add a collumn with the treatment of each plate - since I dont have it coded within my root names:

list.files()

##  [1] "E01J01P01.csv"                                          
##  [2] "E01J01P02.csv"                                          
##  [3] "E01J01P03.csv"                                          
##  [4] "E01J01P04.csv"                                          
##  [5] "E01J02P01.csv"                                          
##  [6] "E01J02P02.csv"                                          
##  [7] "E01J02P03.csv"                                          
##  [8] "E01J02P04.csv"                                          
##  [9] "Figure.Experiment01.pdf"                                
## [10] "Jade_NAT_Experiment01.nb.html"                          
## [11] "Jade_NAT_Experiment01.Rmd"                              
## [12] "NAT_effect_Experiment01_btw_treatments_per_genotype.pdf"

J1P1 <- read.csv("E01J01P01.csv")
J1P1$treatment <- "NAT"
J1P2 <- read.csv("E01J01P02.csv")
J1P2$treatment <- "NAT"
J1P3 <- read.csv("E01J01P03.csv")
J1P3$treatment <- "MS"
J1P4 <- read.csv("E01J01P04.csv")
J1P4$treatment <- "MS"

J2P1 <- read.csv("E01J02P01.csv")
J2P1$treatment <- "MS"
J2P2 <- read.csv("E01J02P02.csv")
J2P2$treatment <- "NAT"
J2P3 <- read.csv("E01J02P03.csv")
J2P3$treatment <- "MS"
J2P4 <- read.csv("E01J02P04.csv")
J2P4$treatment <- "NAT"

Next - I want to mmerge all of my experimental data into one dataset:

data <-  rbind(J1P1, J1P2, J1P3, J1P4, J2P1, J2P2, J2P3, J2P4)
data

Calculating time

cool - now let’s extract from image name the date and time so we can calculate hours of experiment:

strsplit(data$image[1], "-")

## [[1]]
## [1] "plate1"   "20240618" "214018"   "day.rsml"

strsplit(data$image[1], "-")[[1]][2]

## [1] "20240618"

strsplit(data$image[1], "-")[[1]][3]

## [1] "214018"

date is going to be the 2nd item, while time is going to be the third one - let’s save them into their own column.

Since our experiment does not go over change of month - we are going to be just OK extracting the day from the date and hour from the time stamp

# for day
substr(strsplit(data$image[1], "-")[[1]][2],7,8)

## [1] "18"

# for hour
substr(strsplit(data$image[1], "-")[[1]][3],1,2)

## [1] "21"

OK - now let’s loop it throughout the entire data:

for(i in 1:nrow(data)){
  data$day[i] <- substr(strsplit(data$image[i], "-")[[1]][2],7,8)
  data$hour[i] <- substr(strsplit(data$image[i], "-")[[1]][3],1,2)
}

data

awesome! Now - let’s extract only the columns we are going to use (length of the root), since our roots do not produce any lateral roots throughout the experiment:

colnames(data)

##  [1] "image"                 "root_name"             "root"                 
##  [4] "length"                "surface"               "volume"               
##  [7] "diameter"              "root_order"            "root_ontology"        
## [10] "parent_name"           "parent"                "insertion_position"   
## [13] "insertion_angle"       "n_child"               "child_density"        
## [16] "first_child"           "insertion_first_child" "last_child"           
## [19] "insertion_last_child"  "treatment"             "day"                  
## [22] "hour"

data

data2 <- data[,c(20:22,1:4)]
data2

ok - now let’s calculate time of experiment (in hours). This specific experiment was started on June 18th at 11:00 AM, and thus - TOE will be:

data2$TOE <- (as.numeric(as.character(data2$day)) - 18)*24 + (as.numeric(as.character(data2$hour)) - 11)
data2

OK - now - lets plot all of the different roots for their growth to see if the overall patterns are matching with our expectations:

library(ggplot2)

lgraph <- ggplot(data=data2, aes(x= TOE, y=length, group = root, color = treatment)) 
lgraph <- lgraph + geom_line(alpha = 0.7) 
lgraph <- lgraph + ylab("root legnth") + xlab("hours of imaging")
lgraph

OK looks OK - with couple of exceptions - but we will get rid of them later.

Splines and calculating growth rates:

First - let’s calculate splines for our plants, as well as the overall growth rate throughout this experiment for each genotype.

Let’s first isolate one plant and establish the calculations on one sample:

# Create a vector 'hours' containing values from 0 to 72 (3 days of imaging)
hours <- seq(0, 72, by = 1)
hours

##  [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## [26] 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
## [51] 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72

temp <- subset(data2, data2$root == unique(data2$root)[1])
# Convert the 'TOE' column in 'temp' from a factor to numeric
temp$TOE <- as.numeric(as.character(temp$TOE))
temp <- temp[order(temp$TOE, decreasing = F),]


# Display the 'hours' column from the 'temp' data frame
temp$TOE

## [1]  0 10 19 34 43 58 72

# Fit a cubic smoothing spline to the 'TOE' and 'length' data in the 'temp' data frame
plot.spl <- with(temp, smooth.spline(TOE, length, df = 5))
# Create a scatterplot of 'Area.sum' against 'days' using data from 'temp'
plot(length ~ TOE, data = temp)

# Add a blue line to the plot based on the cubic smoothing spline ('plot.spl')
lines(plot.spl, col = "blue")

# Add a red line to the plot by predicting values using the cubic smoothing spline ('plot.spl')
lines(predict(plot.spl, hours), col = "red")

Now - let’s save all of these data into spline file:

# Define a vector 'names' containing column names
names <- c(text = "root", "genotype", "treatment", "TOE", "length")

# Create an empty data frame 'spline_data' to store the data
spline_data <- data.frame()

# Loop through each element 'k' in the 'names' vector
for (k in names) {
  # Create a new column in 'spline_data' with the name specified by 'k' and initialize it as character type
  spline_data[[k]] <- as.character()
}

spline_data

pred_temp <- predict(plot.spl, hours)

# Display the predictions stored in 'pred_temp'
pred_temp

## $x
##  [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## [26] 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
## [51] 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## 
## $y
##  [1] 1.051824 1.074113 1.096382 1.118610 1.140778 1.162865 1.184851 1.206715
##  [9] 1.228436 1.249996 1.271373 1.292550 1.313527 1.334305 1.354885 1.375270
## [17] 1.395462 1.415462 1.435273 1.454896 1.474337 1.493613 1.512743 1.531748
## [25] 1.550648 1.569464 1.588215 1.606921 1.625604 1.644282 1.662977 1.681708
## [33] 1.700495 1.719359 1.738320 1.757398 1.776606 1.795959 1.815472 1.835158
## [41] 1.855031 1.875106 1.895396 1.915916 1.936669 1.957624 1.978735 1.999961
## [49] 2.021259 2.042585 2.063896 2.085150 2.106303 2.127312 2.148135 2.168727
## [57] 2.189047 2.209051 2.228696 2.247951 2.266833 2.285371 2.303592 2.321527
## [65] 2.339204 2.356652 2.373899 2.390975 2.407907 2.424725 2.441458 2.458134
## [73] 2.474782

spline_data[1:73,4] <- pred_temp$x
spline_data[1:73,5] <- pred_temp$y
spline_data[1:73,1] <- temp$root[1]
spline_data[1:73,2] <- temp$root_name[1]
spline_data[1:73,3] <- temp$treatment[1]
spline_data

great - now let’s save it into another dataframe:

# Create a copy of 'spline_data' and store it in 'final_spline'
final_spline <- spline_data
# check how many unique plant IDs we have 
all_plants <- unique(data2$root)
length(all_plants)

## [1] 221

Cool - now lets calculate it for all of the plants:

for (i in 2:221) {
  temp <- subset(data2, data2$root == unique(data2$root)[i])

  # Check if 'temp' has more than 4 rows
  if (dim(temp)[1] > 4) {
    # Fit a cubic smoothing spline to 'days' and 'Area.sum' in 'temp'
    
    temp$TOE <- as.numeric(as.character(temp$TOE))
    temp <- temp[order(temp$TOE, decreasing = F),]
    
    plot.spl <- with(temp, smooth.spline(TOE, length, df = 5))

    # Generate predictions using the smoothing spline
    pred_temp <- predict(plot.spl, hours)

    # Update specific columns in 'spline_data' with 'pred_temp' and 'temp' values
    spline_data[1:73,4] <- pred_temp$x
    spline_data[1:73,5] <- pred_temp$y
    spline_data[1:73,1] <- temp$root[1]
    spline_data[1:73,2] <- temp$root_name[1]
    spline_data[1:73,3] <- temp$treatment[1]

    # Append 'spline_data' to 'final_spline'
    final_spline <- rbind(final_spline, spline_data)
  } else {
    # Set specific columns in 'spline_data' when 'temp' has <= 4 rows
    spline_data[1:73,4] <- hours
    spline_data[1:73,5] <- 0
    spline_data[1:73,1] <- temp$root[1]
    spline_data[1:73,2] <- temp$root_name[1]
    spline_data[1:73,3] <- temp$treatment[1]
  }
}

final_spline

Let’s do some small cleanup:

final_spline$TOE <- as.numeric(as.character(final_spline$TOE))

# there are some stray roots that were not assigned per genotype = let's remove them
bye <- c(" root_0", " root_2", " root_9")
final_spline <- subset(final_spline, !(final_spline$genotype %in% bye))
unique(final_spline$genotype)

##  [1] " 14" " wt" " 15" " 13" " 10" " 12" " 11" " 9"  " 3"  " 2"  " 4"  " 1" 
## [13] " 7"  " 8"  " 6"  " 5"

# Convert the 'length' column in 'final_spline' to numeric type
final_spline$length <- as.numeric(as.character(final_spline$length))

final_spline

this looks great - now let’s re-plot it with some additional stats:

library(ggsci)
library(ggpubr)

# Convert 'TOE' column to a factor with numeric levels
final_spline$TOE <- as.factor(as.numeric(as.character(final_spline$TOE)))

# Create a line graph (Area_lgraph) using ggplot2
Length_lgraph <- ggplot(data = final_spline, aes(x = TOE, y = length, group = root, color = treatment)) 
Length_lgraph <- Length_lgraph + geom_line(alpha = 0.1)
Length_lgraph <- Length_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = treatment), alpha = 0.3) 
Length_lgraph <- Length_lgraph + stat_summary(fun = mean, aes(group = treatment), size = 0.7, geom = "line", linetype = "dashed")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Length_lgraph <- Length_lgraph + stat_compare_means(aes(group = treatment), label = "p.signif", method = "t.test", hide.ns = T) 
Length_lgraph <- Length_lgraph + labs(x = "Hours of Imaging", y = "length (cm)") + scale_color_d3("category10") + theme(legend.position = "bottom")
Length_lgraph

OK - while this seems great - we still would like to know a little more about how different genotypes are behaving here. Let’s facet this graph per genotype:

# Let's order genotypes in the order that I d like to see them:
final_spline$genotype <- factor(final_spline$genotype, levels =c(" wt", " 1", " 2", " 3", " 4", " 5", " 6", " 7", " 8", " 9", " 10", " 11", " 12", " 13", " 14", " 15"))
final_spline$TOE <- as.numeric(final_spline$TOE)
Length_lgraph <- ggplot(data = final_spline, aes(x = TOE, y = length, group = root, color = treatment)) 
Length_lgraph <- Length_lgraph + geom_line(alpha = 0.1)
Length_lgraph <- Length_lgraph + facet_wrap(~ genotype)
Length_lgraph <- Length_lgraph + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = treatment), alpha = 0.3) 
Length_lgraph <- Length_lgraph + stat_summary(fun = mean, aes(group = treatment), size = 0.7, geom = "line", linetype = "dashed") 
#Length_lgraph <- Length_lgraph + stat_compare_means(aes(group = treatment), label = "p.signif", method = "t.test", hide.ns = T) 
Length_lgraph <- Length_lgraph + labs(x = "Hours of Imaging", y = "length (cm)") + scale_color_d3("category10") + theme(legend.position = "bottom")
Length_lgraph

Growth rate calculations:

additionally - we can calculate a linear growth rate by fitting a linear regression into the observed data:

temp <- subset(final_spline, final_spline$root == unique(final_spline$root)[1])
temp$length <- as.numeric(temp$length)
temp$TOE <- as.numeric(temp$TOE)
temp

plot(length ~ TOE, data = temp)

plot(temp$length ~ temp$TOE) +
# Linear regression line is added to the scatterplot using the lm() function
abline(lm(temp$length ~ temp$TOE))

## integer(0)

# Fit a linear regression model to predict 'length' based on 'hours' in the 'temp' data frame
model <- lm(temp$length ~ temp$TOE)
model

## 
## Call:
## lm(formula = temp$length ~ temp$TOE)
## 
## Coefficients:
## (Intercept)     temp$TOE  
##      1.0503       0.0198

# Extract the growth rate (GR) coefficient from the linear regression model
GR <- model$coefficients[2]
GR

##   temp$TOE 
## 0.01979515

# Calculate the coefficient of determination (R-squared) for the model
R2 <- summary(model)$r.squared
R2

## [1] 0.9996501

# Define vector 'names' containing column names for the 'growth_data' data frame
names <- c(text = "root", "genotype", "treatment", "GR", "R2")

# Create an empty data frame 'growth_data' to store growth-related data
growth_data <- data.frame()

# Loop through each element 'k' in the 'names' vector
for (k in names) {
  # Create a new column in 'growth_data' with the name specified by 'k' and set as character type
  growth_data[[k]] <- as.character()
}

# Assign the 'ID' value from the 'temp' data frame to the first row, first column of 'growth_data'
growth_data[1, 1] <- temp$root[1]

# Assign the 'Accession' value from the 'temp' data frame to the first row, second column of 'growth_data'
growth_data[1, 2] <- as.character(temp$genotype[1])

# Assign the 'Treatment' value from the 'temp' data frame to the first row, third column of 'growth_data'
growth_data[1, 3] <- temp$treatment[1]

# Assign the growth rate (GR) to the first row, fourth column of 'growth_data'
growth_data[1, 4] <- GR

# Assign the coefficient of determination (R-squared, R2) to the first row, fifth column of 'growth_data'
growth_data[1, 5] <- R2

growth_data

ok - now let’s calculate it for all:

# Create vector 'all_plants' containing unique 'ID' values from 'final_spline'
all_plants <- unique(final_spline$root)

# Loop through each unique 'ID' in 'all_plants'
for (i in 1:length(all_plants)) {
  # Subset 'final_spline' to select data for the current 'ID'
  temp <- subset(final_spline, final_spline$root == all_plants[i])
  temp$length <- as.numeric(temp$length)
  temp$TOE <- as.numeric(temp$TOE)

  # Fit a linear regression model to predict 'Area.SUM' based on 'day'
  model <- lm(temp$length ~ temp$TOE)

  # Extract the growth rate (GR) coefficient from the linear regression model
  GR <- model$coefficients[2]

  # Calculate the coefficient of determination (R-squared, R2) for the model
  R2 <- summary(model)$r.squared

  # Populate 'growth_data' with relevant information for the current 'ID'
  growth_data[i, 1] <- temp$root[1]
  growth_data[i, 2] <-  as.character(temp$genotype[1])
  growth_data[i, 3] <- temp$treatment[1]
  growth_data[i, 4] <- GR
  growth_data[i, 5] <- R2
}

growth_data

Let’s fo a little clean up:

# Convert the 'GR' and 'R2' columns in 'growth_data' to numeric
growth_data$GR <- as.numeric(as.character(growth_data$GR))
growth_data$R2 <- as.numeric(as.character(growth_data$R2))

growth_data_clean <- subset(growth_data, growth_data$R2 > 0.7)
growth_data_clean <- subset(growth_data_clean, growth_data_clean$GR > 0)
growth_data_clean

Great - now we have all of this data - let’s plot it:

# Let's order genotypes in the order that I d like to see them:
growth_data_clean$genotype <- factor(growth_data_clean$genotype, levels =c(" wt", " 1", " 2", " 3", " 4", " 5", " 6", " 7", " 8", " 9", " 10", " 11", " 12", " 13", " 14", " 15"))
growth_data_clean$treatment <- factor(growth_data_clean$treatment, levels =c("MS", "NAT"))


GR_overall <- ggerrorplot(growth_data_clean, x = "treatment", y = "GR", color = "treatment", fill = "treatment", facet.by = "genotype",
                          desc_stat = "mean_sd", add = "jitter", ncol = 8,
                          xlab="", ylab= "Growth Rate (cm / hour)", add.params = list(color = "darkgray")) + scale_color_d3("category10") 
GR_overall <- GR_overall + stat_compare_means(method = "aov", label.y = 0.04)
GR_overall <- GR_overall + rremove("legend")
GR_overall

OK - this is one way of seeing it - which is - do we see differences between treatments. Not much differences - but let’s save it as PDF for a better view.

pdf("NAT_effect_Experiment01_btw_treatments_per_genotype.pdf", width = 15, height = 10)
print(GR_overall)
dev.off()

## quartz_off_screen 
##                 2

OK - there are definitely few that show SIGNIFICANT differences between treatment and control. Which is COOL! <3

Now - let’s look at the differences between genotypes:

GR_overall02 <- ggerrorplot(growth_data_clean, x = "genotype", y = "GR", color = "genotype", fill = "genotype", facet.by = "treatment",
                          desc_stat = "mean_sd", add = "jitter", ncol = 8,
                          xlab="", ylab= "Growth Rate (cm / hour)", add.params = list(color = "darkgray")) 
GR_overall02 <- GR_overall02 + stat_compare_means(aes(label = after_stat(p.signif)), method = "t.test", ref.group = " wt", hide.ns = T)
GR_overall02 <- GR_overall02 + rremove("legend")
GR_overall02

now let’s save these figures into one nice figure:

library(cowplot)

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:ggpubr':
## 
##     get_legend

pdf("Figure.Experiment01.pdf", height = 25, width =  15)
plot_grid(Length_lgraph, GR_overall, GR_overall02, labels = "AUTO", ncol = 1)
dev.off()

## quartz_off_screen 
##                 2

SPIRO Hormone mutants - Jade NAT

Load data

Calculating time

Splines and calculating growth rates:

Growth rate calculations: