This notebook describes the analysis of the experiment performed by Aparna Srinivasan on tepary beans grown under control (60% soil water holding capacity) and two drought conditions (10% and 5% SWHC).

Load Libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.1.3
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.1.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.1.3
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
library(plotly)
## Warning: package 'plotly' was built under R version 4.1.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
library(tinytex)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.1.3
library(wesanderson)
## Warning: package 'wesanderson' was built under R version 4.1.3
Set working directory
getwd()
## [1] "C:/Users/Aparna/Desktop/April_RPi_Analysis"
setwd("C:/Users/Aparna/Desktop/April_RPi_Analysis")
##### Load all days after stress ####
d0 <- read.csv ("single_value_trait_All/Results_single_value_traits_041322.csv")
d2 <- read.csv("single_value_trait_All/Results_single_value_traits_041522.csv")
d4 <-read.csv("single_value_trait_All/Results_single_value_traits_041722.csv")
d6 <-read.csv("single_value_trait_All/Results_single_value_traits_041922.csv")
d8 <-read.csv("single_value_trait_All/Results_single_value_traits_042122.csv")
d10 <-read.csv("single_value_trait_All/Results_single_value_traits_042322.csv")
d12 <- read.csv ("single_value_trait_All/Results_single_value_traits_042522.csv")
##### Data Cleaning####
head(d0)
##### Removing 1st 15 empty coloumns and adding 31st coloumn infront ####
d0 <- d0[,c(31, 16:30)]
d2 <- d2[,c(31, 16:30)]
d4 <- d4[,c(31, 16:30)]
d6 <- d6[,c(31, 16:30)]
d8 <- d8[,c(31, 16:30)]
d10 <- d10[,c(31, 16:30)]
d12 <- d12[,c(31, 16:30)]
##### Bind all days together ####
combining <- rbind(d0, d2, d4, d6, d8, d10, d12)
###### Split _ #####
combining$info <- strsplit(combining$roi, split = "_")
combining$info[1][[1]][1]
## [1] "2022.04.13-10.07.53"
### Side column and timestamp info isolated #####
for(i in 1:nrow(combining)){
  combining$side[i] <- combining$info[i][[1]][3]
 combining$timestamp[i] <- combining$info[i][[1]][1]
}
combining
##### Split the date and time and isolate both DATE and TIMESTAMP
combining$info2 <- strsplit(combining$timestamp, "-")
for(i in 1:nrow(combining)){
  combining$DATE[i] <- combining$info2[i][[1]][1]
  combining$timestamp[i] <- combining$info2[i][[1]][2]
}
combining
head(combining)
##### Selecting important variables for analysis####
sel_variables <- combining[,c(21,19,18,4,8,9)]
sel_variables
#### calculate cumulative area from 7 different angles AND maximum width and height ####
##### Subset for one plant ####
today <- subset(sel_variables, sel_variables$DATE == unique(sel_variables$DATE)[1])
one_plant <- subset(today, today$timestamp == unique(today$timestamp)[1])
one_plant
#### AREA.sum will be the sum of ALL the areas in this dataset while max width and height will be the width.max and height.max ####
sum(one_plant$area)
## [1] 42569
max(one_plant$width)
## [1] 305
max(one_plant$height)
## [1] 342
##### Adding measurement to an empty table ####
names <- c("DATE", "TIME", "Area.sum", "Height.max", "Width.max")
data_sum <- data.frame()
for (k in names) data_sum[[k]] <- as.character()
data_sum[1,1] <- one_plant$DATE[1]
data_sum[1,2] <- one_plant$timestamp[1]
data_sum[1,3] <- sum(one_plant$area)
data_sum[1,4] <- max(one_plant$height)
data_sum[1,5] <- sum(one_plant$width)
data_sum
##### Loop through all the data ####
count <- 1
for(d in 1:length(unique(sel_variables$DATE))){
  today <- subset(sel_variables, sel_variables$DATE == unique(sel_variables$DATE)[d])
  for(t in 1:length(unique(today$timestamp))){
    one_plant <- subset(today, today$timestamp == unique(today$timestamp)[t])
    data_sum[count,1] <- one_plant$DATE[1]
    data_sum[count,2] <- one_plant$timestamp[1]
    data_sum[count,3] <- sum(one_plant$area)
    data_sum[count,4] <- max(one_plant$height)
    data_sum[count,5] <- sum(one_plant$width)
    count <- count + 1
  }
}
##### Adding ####
dim(sel_variables)[1] / dim(data_sum)[1]
## [1] 7
##### Sel_variable data need to be checked ####
write.csv(data_sum, "SideviewImageAnalysis_Tepary_Cummulative_Check_April2022.csv", row.names = FALSE)
data_sum$Area.sum <- as.numeric(as.character(data_sum$Area.sum))
min(data_sum$Area.sum)
## [1] 8610
unique(data_sum$DATE)
## [1] "2022.04.13" "2022.04.15" "2022.04.17" "2022.04.19" "2022.04.21"
## [6] "2022.04.23" "2022.04.25"
data_sum$DATE <- gsub("2022.04.", "", data_sum$DATE)
data_sum$DATE <- as.numeric(as.character(data_sum$DATE))
data_sum
plot(data_sum$Area.sum ~ data_sum$DATE)

write.csv(data_sum, "data_sum.csv")
##### Final data with pot no, genotype and treatment, matched with processed image_loading ####
cumm_data <- read_excel("Image_alldays_confirmed_pot_April2022.xlsx")
head(cumm_data)
##### Calculate Replication Mean of genotypes ####
#drop rows with missing values
cumm_data_clean <- cumm_data %>% drop_na()
cumm_data_clean$Treatment <- factor(cumm_data_clean$Treatment, levels = c("Control","10% WHC", "5% WHC"))
##### Plot leaf area per genotype ####
la_gen <- ggplot(data = cumm_data_clean, aes(x= Days, y=Area.sum, color = Treatment)) + 
  geom_line(alpha = 0.3,size = 0.8, aes(group= PotNo)) + facet_wrap(~ Treatment, ncol=3) +
  xlab("Days After Stress") +  ylab("Average Leaf area across each rep per genotype") +
  theme_bw()

ggplotly(la_gen)
#remove outliers and plot again
cumm_data_clean2 <- subset(cumm_data_clean, cumm_data_clean$Days != 2)
cumm_data_clean2 <- subset(cumm_data_clean2, cumm_data_clean2$Days != 14)
cumm_data_clean2 <- subset(cumm_data_clean2, cumm_data_clean2$Days != 6)


la_gen2 <- ggplot(data = cumm_data_clean2, aes(x= Days, y=Area.sum, color = Treatment)) + 
  geom_line(alpha = 0.3,size = 0.8, aes(group= PotNo)) + facet_wrap(~ Treatment, ncol=3) +
  xlab("Days After Stress") +  ylab("Average Leaf area across each rep per genotype") +
  theme_bw()

ggplotly(la_gen2)
# calculate average leaf area across replication for each genotype
rep_mean <- cumm_data_clean %>% group_by(Genotype, Date, Treatment) %>% mutate(total.area.mean = ave(Area.sum))
rpi_avg <- unique(rep_mean[,c("Days","Genotype","Treatment", "total.area.mean")])
rpi_avg
write.csv(rpi_avg, "rpi_avg_sheet.csv")
##### Plot genotypes under three treatment ####

rpi_avg$Treatment <- factor(rpi_avg$Treatment, levels = c("Control","10% WHC", "5% WHC"))

Avg_la <- ggplot(rpi_avg, aes (x= Days,y= total.area.mean, color = Genotype))
Avg_la <- Avg_la + ggtitle("Effect of treatment on genotypes") + xlab("Days After Stress") + ylab("Average Leaf area across rep")
Avg_la <- Avg_la + geom_point(aes(col = Genotype), size=3) + facet_wrap(~ Treatment, ncol=3) + geom_line(alpha = 0.2,size = 1, aes(group= Genotype)) + facet_wrap(~ Treatment, ncol=3) + xlim(0,10)
Avg_la <- Avg_la + theme_stata(base_family = "Calibri", base_size = 10)  + theme(panel.grid.major.x = element_line(color = "gray", size = .5, linetype = "dashed"))
Avg_la
## Warning: Removed 9 rows containing missing values (geom_point).
## Warning: Removed 3 row(s) containing missing values (geom_path).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

##### Comparing between treatments ####
perc.10 <- c("Control", "10% WHC")
cumm_data_clean2_10 <- subset(cumm_data_clean2, cumm_data_clean2$Treatment %in% perc.10)

perc.05 <- c("Control", "5% WHC")
cumm_data_clean2_05 <- subset(cumm_data_clean2, cumm_data_clean2$Treatment %in% perc.05)
unique(cumm_data_clean2_05$Treatment)
## [1] Control 5% WHC 
## Levels: Control 10% WHC 5% WHC
cumm_data_clean2_10$Days <- as.factor(cumm_data_clean2_10$Days)
cumm_data_clean2_05$Days <- as.factor(cumm_data_clean2_05$Days)


c_d_10 <- ggplot(data = cumm_data_clean2_10, aes(x = Days, y = Area.sum, color = Treatment, group = PotNo)) + theme_classic()
c_d_10 <- c_d_10 + geom_line(alpha = 0.1)
c_d_10 <- c_d_10 + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Treatment), alpha = 0.3) + stat_summary(fun = mean, aes(group = Treatment), size = 0.9, geom = "line", linetype = "solid")
c_d_10 <- c_d_10 + stat_compare_means(aes(group = Treatment), label = "p.signif", method = "t.test", hide.ns = F)
c_d_10 <- c_d_10 + scale_color_manual(values = c("red","blue")) + theme(panel.border = element_rect(color = "black",
                                    fill = NA,
                                    size = 0.5))
c_d_10 <- c_d_10 + xlab("Days After Stress") + ylab("Cummulative Leaf Area") + ggtitle("Control v Drought (10% WHC) - Leaf Area Through Experiment") + theme(legend.position="bottom")
c_d_10

c_d_05 <- ggplot(data = cumm_data_clean2_05, aes(x = Days, y = Area.sum, color = Treatment, group = PotNo)) + theme_classic()
c_d_05 <- c_d_05 + geom_line(alpha = 0.1)
c_d_05 <- c_d_05 + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Treatment), alpha = 0.3) 
c_d_05 <- c_d_05 + stat_summary(fun = mean, aes(group = Treatment), size = 0.9, geom = "line", linetype = "solid")
c_d_05 <- c_d_05 + stat_compare_means(aes(group = Treatment), label = "p.signif", method = "t.test", hide.ns = F)
c_d_05 <- c_d_05 + scale_color_manual(values = c("red","blue")) + theme(panel.border = element_rect(color = "black",
                                    fill = NA,
                                    size = 0.5))
c_d_05 <- c_d_05 + xlab("Days After Stress") + ylab("Cummulative Leaf Area") + ggtitle("Control v Drought (5% WHC) - Leaf Area Through Experiment") 
c_d_05 <- c_d_05 + theme(legend.position="bottom")
c_d_05

##### Compare 5% WHC Treatment vs control and facet by genotype ####
cumm_data_clean2_05$Genotype <- factor(cumm_data_clean2_05$Genotype , levels = c("TDP362_1","TDP362_2", "TDP22"))

c_d_g_05 <- ggplot(data = cumm_data_clean2_05, aes(x = Days, y = Area.sum, group = PotNo, color = Treatment)) + theme_classic()
c_d_g_05  <- c_d_g_05 + geom_line(alpha = 0.4)
c_d_g_05 <- c_d_g_05 + facet_wrap("Genotype")
c_d_g_05 <- c_d_g_05 + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Treatment), alpha = 0.3) + stat_summary(fun = mean, aes(group = Treatment), size = 0.7, geom = "line", linetype = "solid")
c_d_g_05  <- c_d_g_05 + stat_compare_means(aes(group = Treatment), label = "p.signif", method = "t.test", hide.ns = T)
c_d_g_05  <- c_d_g_05 + scale_color_manual(values = c("red","green")) + theme(panel.border = element_rect(color = "black",fill = NA,size = 0.5))
c_d_g_05  <- c_d_g_05 + xlab("Days After Stress") + ylab("Average leaf Area") + ggtitle("Control V 5% Drought - Leaf Area") + theme(legend.position="bottom")
c_d_g_05 

##### Compare 10% WHC Treatment vs control and facet by genotype ####
cumm_data_clean2_10$Genotype <- factor(cumm_data_clean2_10$Genotype , levels = c("TDP362_1","TDP362_2", "TDP22"))

c_d_g_10 <- ggplot(data = cumm_data_clean2_10, aes(x = Days, y = Area.sum, group = PotNo, color = Treatment)) + theme_classic()
c_d_g_10  <- c_d_g_10 + geom_line(alpha = 0.1)
c_d_g_10 <- c_d_g_10 + facet_wrap("Genotype")
c_d_g_10 <- c_d_g_10 + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Treatment), alpha = 0.3) + stat_summary(fun = mean, aes(group = Treatment), size = 0.7, geom = "line", linetype = "solid")
c_d_g_10  <- c_d_g_10 + stat_compare_means(aes(group = Treatment), label = "p.signif", method = "t.test", hide.ns = T)
c_d_g_10  <- c_d_g_10 + scale_color_manual(values = c("green","blue")) + theme(panel.border = element_rect(color = "black",
                                    fill = NA,
                                    size = 0.5))
c_d_g_10  <- c_d_g_10 + xlab("Days After Stress") + ylab("Average leaf Area") + ggtitle("Control V 10% Drought - Leaf Area") + theme(legend.position="bottom")
c_d_g_10

##### Growth rate Calculation ####
growth <- plot(cumm_data_clean$Area.sum ~ as.factor(cumm_data_clean$Days))

####growth <- xlab("Days after Stress") + ylab("Average leaf Area") 
##### Linear Model ####
model <- lm(cumm_data_clean$Area.sum ~ cumm_data_clean$Days)
summary(model)
## 
## Call:
## lm(formula = cumm_data_clean$Area.sum ~ cumm_data_clean$Days)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -316811  -96142  -32152   54890 1103025 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             69385      16073   4.317 2.03e-05 ***
## cumm_data_clean$Days    26373       2229  11.832  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 173300 on 376 degrees of freedom
## Multiple R-squared:  0.2713, Adjusted R-squared:  0.2694 
## F-statistic:   140 on 1 and 376 DF,  p-value: < 2.2e-16
summary(model)$coefficients[2]
## [1] 26372.98
#### Mapping pot, genotype and condition with growth_rate ####
map <- read_xlsx("\\Users\\Aparna\\Desktop\\April_RPi_Analysis\\map_April2022.xlsx")
head(map)
map$GrowthRate <- 0
map$GrowthRate[1] <- summary(model)$coefficients[2]
map
##### Looping ####
for (i in 1:nrow(map)){
  temp <- subset(cumm_data_clean, cumm_data_clean$PotNo == i)
  #print(temp)
  model <- lm(temp$Area.sum ~ temp$Date)
  map$GrowthRate[i] <- summary(model)$coefficients[2]
}
map
##### Write to excel ####
write.csv(map,"GrowthRate_RPi_TB_March2022.csv", row.names = F, quote = F)
#Check growth rate of another pot outside of the loop
 temp <- subset(cumm_data_clean, cumm_data_clean$PotNo == 2)
 model <- lm(temp$Area.sum ~ temp$Date)
 summary(model)$coefficients[2]
## [1] 28614
#### Value matches ####
##### Scatterplot_growth rates and Compare Genotype and facet by Treatment ####
map$Genotype <- factor(map$Genotype,levels = c("TDP359", "TDP22_1", "TDP22_2"))
map$Treatment <- factor(map$Treatment,levels = c("Control", "10% WHC", "5% WHC"))
gr_scatter <- ggerrorplot(map, y = "GrowthRate", x = "Genotype", fill = "Genotype", color = "Genotype", facet.by = c("Treatment"), ncol = 4, desc_stat = "mean_sd", add = "jitter",add.params = list(color = "darkgrey"), xlab = "Genotype", ylab = "Growth Rate")
gr_scatter <- gr_scatter + rremove("legend") + stat_compare_means(method = "t.test", ref.group = ".all.", label = "p.signif", hide.ns = TRUE)
gr_scatter <- gr_scatter + theme(axis.text.x = element_text(angle = 90))
gr_scatter
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

##### Scatterplot_growth rates and Compare Genotype and facet by Genotype ####
gr_scatter <- ggerrorplot(map, y = "GrowthRate", x = "Treatment", fill = "Treatment", color = "Treatment", facet.by = c("Genotype"), ncol = 4, desc_stat = "mean_sd", add = "jitter",add.params = list(color = "darkgray"), xlab = "Treatment", ylab = "Growth Rate")
gr_scatter <- gr_scatter + rremove("legend") + stat_compare_means(method = "t.test", ref.group = ".all.", label = "p.signif", hide.ns = TRUE)
gr_scatter <- gr_scatter + theme(axis.text.x = element_text(angle = 90))
gr_scatter

Calculating Relative growth rates

#install.packages("doBy")
library(doBy)
## Warning: package 'doBy' was built under R version 4.1.3
## 
## Attaching package: 'doBy'
## The following object is masked from 'package:dplyr':
## 
##     order_by
head(map)
map_summary <- summaryBy(GrowthRate ~ Treatment + Genotype, data= map)
map_summary
map_sum_C <- subset(map_summary, map_summary$Treatment == "Control")
colnames(map_sum_C)[3] <- "GrowthRate.C"
map_sum_C <- map_sum_C[,2:3]
map_sum_C
# Let's combine this with map file:
library(reshape2)
map2 <- merge(map, map_sum_C, by = "Genotype")
map2$GR.rel <- map2$GrowthRate / map2$GrowthRate.C
map2
map3 <- subset(map2, map2$Treatment != "Control")

##### Scatterplot_growth rates and Compare Genotype and facet by Genotype ####
gr.rel_scatter <- ggerrorplot(map3, y = "GR.rel", x = "Genotype", fill = "Genotype", color = "Genotype", facet.by = c("Treatment"), ncol = 4, desc_stat = "mean_sd", add = "jitter",add.params = list(color = "darkgray"), xlab = "", ylab = "Growth Rate")
gr.rel_scatter <- gr.rel_scatter + rremove("legend") + stat_compare_means(method = "t.test", ref.group = "Sycamore", label = "p.signif", hide.ns = FALSE)
gr.rel_scatter <- gr.rel_scatter + theme(axis.text.x = element_text(angle = 90))
gr.rel_scatter
## Warning: Computation failed in `stat_compare_means()`:
## missing value where TRUE/FALSE needed
## Warning: Computation failed in `stat_compare_means()`:
## missing value where TRUE/FALSE needed

##### Final Fresh weight comparison ####
fw <- read_excel("\\Users\\Aparna\\Desktop\\April_RPi_Analysis\\Fw_dw_April2022.xlsx")
fw_mean <- fw %>% group_by(Genotype, Treatment) %>% mutate(fw_avg = mean(FW))
fw_mean_unq <- unique(fw_mean[,c("Genotype","Treatment","fw_avg")])
fw_mean_unq
rpi_avg_12D <- rpi_avg %>% filter(Days == 12)
meta <- cbind(fw_mean_unq, rpi_avg_12D)
## New names:
## * Genotype -> Genotype...1
## * Treatment -> Treatment...2
## * Genotype -> Genotype...5
## * Treatment -> Treatment...6
meta
##### check correlation in between total leaf area and FW ####
ggscatter(meta, x = "total.area.mean", y = "fw_avg", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Total leaf area", ylab = "Final weight (g)") + theme(panel.border = element_rect(color = "black",
                                    fill = NA,
                                    size = 0.5))
## `geom_smooth()` using formula 'y ~ x'