R Notebook

This markdown was created on 2022/01/06 to examine and fix the side view image analysis for the Fall 2021 Cowpea experiment. When Leon initially made the graphs, many pots’ leaf area fluctuated, so we had to scan through the images to find irregularities. What was found: remove the following time stamps (details in LabArchives): 2021.10.22-13-15.22.11 (empty images) 2021.10.13-15.46.26 (hand in image) 2021.10.20-14.33.44 (repeat of pot #51) 2021.10.22-13.40.34 (empty images) Two pots were skipped on two different days, so I want to make sure the timestamps are correctly aligned: pot 99 should be 2021.10.20-12.00.36 pot 27 should be 2021.10.22-14.07.45

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

###Load necessary libraries

library(ggplot2)
library(tidyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(stringr)
library(ggpubr)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

###Load and Organize Data

In Excel, I removed the necessary time stamps and added a column called “Pot.correct” to ensure all pot numbers are correctly aligned

setwd("/Users/julkowskalab/")
#getwd()
#list.files(pattern="clean")
#Load all CSVs from the 7 days of imaging 
d11 <- read.csv("2021.10.11.clean.csv")
d13 <- read.csv("2021.10.13.clean_2.csv")
d15 <- read.csv("2021.10.15.clean.csv")
d18 <- read.csv("2021.10.18.clean.csv")
d20 <- read.csv("2021.10.20.clean_2.csv")
d22 <- read.csv("2021.10.22.clean_2.csv")
d25 <- read.csv("2021.10.25.clean.csv")
pot_geno <- read.csv("cowpea_pot_geno_HS_new.csv")
Weight <- read.csv("final_weight_metatable.csv")
Map <- Weight[,c(1:3)]
Map

#merge all files with rbind
all_days <- rbind(d11, d13, d15, d18, d20, d22, d25)
all_days

#merge with pot number/geno file
# combined_days_geno <- merge(all_days, pot_geno, all = TRUE)
# combined_days_geno

# split column X, which contains the remainder of the timestamp that wasn't split off in excel
all_days <- all_days %>% separate(X, c( "sec","Raspi","side","camera"))
all_days

#order by "year"-"month"-"date"-"hour"-"min"-"sec"-"side"
all_days <- all_days[
  order(all_days[,"year"], all_days[,"month"], all_days[,"date"], all_days[,"hour"], all_days[,"min"], all_days[,"sec"], all_days[,"side"]),]
all_days

#Build a consensus index 
all_days$index <- paste0(all_days$year, ".", all_days$month,".", all_days$date, "-", all_days$hour, ".", all_days$min, ".", all_days$sec)
Raspi_clean <- all_days[,c("area","year", "month", "date","hour","min", "sec","index")]
Raspi_clean

write.csv(Raspi_clean, "Raspi_clean.csv", row.names = F)

###Group images based on consensus time stamp

### Calculate the total area/mean area for each time stamp
Raspi_clean <- Raspi_clean %>% group_by(index) %>% mutate(side.counts = n())
Raspi_clean <- Raspi_clean %>% group_by(index) %>% mutate(area.mean = ave(area))
Raspi_clean <- Raspi_clean %>% group_by(index) %>% mutate(area.total = sum(area))
#nrow(Raspi_clean)
Raspi_unique <- unique(Raspi_clean[,c("month", "date", "index","side.counts","area.mean","area.total")])
Raspi_unique

write.csv(Raspi_unique, "raspi_clean_combined.csv", row.names = F)

#Merge map with image data

Date_list <- unique(na.omit(Raspi_unique$date))
dim(Map)

## [1] 105   3

Map

# day 13
Map_new <- subset(Map, Map$POT != 22)
dim(Map_new)

## [1] 104   3

#day 20
Map_new2 <- subset(Map, Map$POT != 98)
#day 22
Map_new3 <- subset(Map, Map$POT != 26)

# day 11
i=1
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- sum.df

# day 13
i=2
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map_new)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- rbind(data_all, sum.df)
data_all

# day 15
i=3
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- rbind(data_all, sum.df)

# day 18
i=4
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- rbind(data_all, sum.df)

# day 20
i=5
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map_new2)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- rbind(data_all, sum.df)

# day 22
i=6
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map_new3)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- rbind(data_all, sum.df)

# day 25
i=7
sum.df <- Raspi_unique[Raspi_unique$date == Date_list[i],] 
sum.df <- cbind(sum.df, Map)
assign(paste0("Exp_Date_",Date_list[i],"_summary"), sum.df)
data_all <- rbind(data_all, sum.df)
data_all

Raspi_summary <- data_all
Raspi_summary

write.csv(Raspi_summary,"all_summary.csv", row.names = F, quote = F )

#Change date to days after stress to align with other analysis
Raspi_summary$date <- gsub("11", "-3", Raspi_summary$date)
Raspi_summary$date <- gsub("13", "-1", Raspi_summary$date)
Raspi_summary$date <- gsub("15", "1", Raspi_summary$date)
Raspi_summary$date <- gsub("18", "4", Raspi_summary$date)
Raspi_summary$date <- gsub("20", "7", Raspi_summary$date)
Raspi_summary$date <- gsub("22", "9", Raspi_summary$date)
Raspi_summary$date <- gsub("25", "12", Raspi_summary$date)
Raspi_summary$date <- factor(Raspi_summary$date, levels = c("-3", "-1", "1", "4", "7", "9", "12"))

#Edit the names of the genotypes that are incorrect
Raspi_summary$Genotype <- gsub("IT97IC", "IT97K-499", Raspi_summary$Genotype)
Raspi_summary$Genotype <- gsub("UCR7", "UCR779", Raspi_summary$Genotype)
# Below in the graph pot 45 goes down from d23 to d25 so here I checked its listed total area
#pot_45 <- subset(Raspi_summary, Raspi_summary$POT == 45)
# pot_45

###Begin Data Visualization Characterize the variations of genotypes across three conditions

# Calculate the average numbers of each replicates under the same date per genotype
temp <- Raspi_summary %>% group_by(Condition, date, Genotype) %>% mutate(total.area.mean = ave(area.total))
Raspi_summary_ave <- unique(temp[,c("month", "date","Genotype","Condition", "total.area.mean")])

# Plot different genotypes under three conditions
Area_by_date_condition_genotype <- ggplot(data = Raspi_summary_ave, aes(x= date, y=total.area.mean, color = Genotype)) + 
  geom_line(alpha = 0.3,size = 0.8, aes(group= Genotype)) + facet_wrap(~ Condition, ncol=3) +
  geom_point(alpha = 0.8,size = 0.4, aes(group= Genotype)) + facet_wrap(~ Condition, ncol=3) +
  xlab("Days After Stress") +  ylab("Average Leaf area across each rep per genotype") +
  theme_bw()
Area_by_date_condition_genotype

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

###plot average leaf area across all reps per genotype

graph_all <- ggplot(data = Raspi_summary, aes(x= date, y=area.total, color = Condition)) + 
  geom_line(alpha = 0.3,size = 0.8, aes(group= POT)) + facet_wrap(~ Condition, ncol=3) +
  xlab("Days After Stress") +  ylab("Average Leaf area across each rep per genotype") +
  theme_bw()

graph_all

## Warning: Removed 1 row(s) containing missing values (geom_path).

In the above graph we are still seeing some pots have a decrease in leaf area as the experiment progresses. We determined that this could be due to some of the plants outgrowing the imaging station, as well as some of the plants (particularly stressed ones) losing a leaf or two over time. For instance, pot 45 has a large decrease from day 22 to day 25. In the day 25 images (timestamp 2021.10.25-14.25.26) you can see that the plant lost a leaf.

###Compare control vs drought and control vs salt

control_v_drought <- subset(Raspi_summary, Raspi_summary$Condition != "Salt")

control_v_salt <- subset(Raspi_summary, Raspi_summary$Condition != "Drought")

control_v_drought_area <- ggplot(data = control_v_drought, aes(x = date, y = area.total, color = Condition, group = POT)) + theme_classic()
control_v_drought_area <- control_v_drought_area + geom_line(alpha = 0.1)
control_v_drought_area <- control_v_drought_area + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) + stat_summary(fun = mean, aes(group = Condition), size = 0.9, geom = "line", linetype = "solid")
control_v_drought_area <- control_v_drought_area + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = T)
control_v_drought_area <- control_v_drought_area + scale_color_manual(values = c("blue", "red"))
control_v_drought_area <- control_v_drought_area + xlab("Days After Stress") + ylab("Average Leaf Area") + ggtitle("Control v Drought - Leaf Area Through Experiment") + theme(legend.position="bottom")
control_v_drought_area

control_v_salt_area <- ggplot(data = control_v_salt, aes(x = date, y = area.total, color = Condition, group = POT)) + theme_classic()
control_v_salt_area <- control_v_salt_area + geom_line(alpha = 0.1)
control_v_salt_area <- control_v_salt_area + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) + stat_summary(fun = mean, aes(group = Condition), size = 0.9, geom = "line", linetype = "solid")
control_v_salt_area <- control_v_salt_area + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = T)
control_v_salt_area <- control_v_salt_area + scale_color_manual(values = c("blue", "red"))
control_v_salt_area <- control_v_salt_area + xlab("Days After Stress") + ylab("Average Leaf Area") + ggtitle("Control v Salt - Leaf Area Through Experiment") + theme(legend.position="bottom")
control_v_salt_area

## Warning: Removed 1 rows containing non-finite values (stat_summary).

## Warning: Removed 1 rows containing non-finite values (stat_summary).

## Warning: Removed 1 rows containing non-finite values (stat_compare_means).

## Warning: Removed 1 row(s) containing missing values (geom_path).

###Now compare conditions and facet by genotype

control_v_drought_by_geno <- ggplot(data = control_v_drought, aes(x = date, y = area.total, group = POT, color = Condition)) + theme_classic()
control_v_drought_by_geno <- control_v_drought_by_geno + geom_line(alpha = 0.1)
control_v_drought_by_geno <- control_v_drought_by_geno + facet_wrap("Genotype")
control_v_drought_by_geno <- control_v_drought_by_geno + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "solid")
control_v_drought_by_geno <- control_v_drought_by_geno + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = T)
control_v_drought_by_geno <- control_v_drought_by_geno + scale_color_manual(values = c("blue", "red"))
control_v_drought_by_geno <- control_v_drought_by_geno + xlab("Days After Stress") + ylab("Average leaf Area") + ggtitle("Control V Drought - Leaf Area Through Experiment by Genotype") + theme(legend.position="bottom")
control_v_drought_by_geno

control_v_salt_by_geno <- ggplot(data = control_v_salt, aes(x = date, y = area.total, group = POT, color = Condition)) + theme_classic()
control_v_salt_by_geno <- control_v_salt_by_geno + geom_line(alpha = 0.1)
control_v_salt_by_geno <- control_v_salt_by_geno + facet_wrap("Genotype")
control_v_salt_by_geno <- control_v_salt_by_geno + stat_summary(fun.data = mean_se, geom = "ribbon", linetype = 0, aes(group = Condition), alpha = 0.3) + stat_summary(fun = mean, aes(group = Condition), size = 0.7, geom = "line", linetype = "solid")
control_v_salt_by_geno <- control_v_salt_by_geno + stat_compare_means(aes(group = Condition), label = "p.signif", method = "t.test", hide.ns = T)
control_v_salt_by_geno <- control_v_salt_by_geno + scale_color_manual(values = c("blue", "red"))
control_v_salt_by_geno <- control_v_salt_by_geno + xlab("Days After Stress") + ylab("Average leaf Area") + ggtitle("Control V Salt - Leaf Area Through Experiment by Genotype") + theme(legend.position="bottom")
control_v_salt_by_geno

## Warning: Removed 1 rows containing non-finite values (stat_summary).

## Warning: Removed 1 rows containing non-finite values (stat_summary).

## Warning: Removed 1 rows containing non-finite values (stat_compare_means).

## Warning: Removed 1 row(s) containing missing values (geom_path).

###Save plots as PDFs

#Save plots as PDFs
# pdf("Average Leaf Area.pdf", width = 13, height = 10)
# plot(graph_all)
# dev.off()
# 
# pdf("Control V Drought - Leaf Area Through Experiment.pdf", width = 13, height = 10)
# plot(control_v_drought_area)
# dev.off()
# 
# pdf("Control V Salt - Leaf Area Through Experiment.pdf", width = 13, height = 10)
# plot(control_v_salt_area)
# dev.off()
# 
# pdf("Control V Drought - Leaf Area Through Experiment by Genotype.pdf", width = 13, height = 10)
# plot(control_v_drought_by_geno)
# dev.off()
# 
# pdf("Control V Salt - Leaf Area Through Experiment by Genotype.pdf", width = 13, height = 10)
# plot(control_v_salt_by_geno)
# dev.off()
# 
# pdf("Leaf Area Over Time - Compare Condition_2.pdf", width = 13, height = 10)
# plot(Area_by_date_condition_genotype)
# dev.off()

Growth rate calculations

We need to fit growth one plant at the time:

#Subset data to only include days that are after stress (positive)
Raspi_summary

DAS <- subset(Raspi_summary, Raspi_summary$date != -3)
After_stress_only <- subset(DAS, DAS$date != -1)
After_stress_only$date <- as.numeric(as.factor(After_stress_only$date))
After_stress_only

temp <- subset(After_stress_only, After_stress_only$POT == 1)
temp

plot(temp$area.total ~ temp$date)

model <- lm(temp$area.total ~ temp$date)
summary(model)

## 
## Call:
## lm(formula = temp$area.total ~ temp$date)
## 
## Residuals:
##       1       2       3       4       5 
##  -13485  -40538  145630 -115708   24100 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   580337     182489   3.180  0.05009 . 
## temp$date     308410      35120   8.782  0.00311 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 111100 on 3 degrees of freedom
## Multiple R-squared:  0.9626, Adjusted R-squared:  0.9501 
## F-statistic: 77.12 on 1 and 3 DF,  p-value: 0.003111

summary(model)$coefficients[2]

## [1] 308410.5

So now - we need to assign this value to a POT # 01 - let’s repurpose Map for this:

Map$GrowthRate <- 0
Map$GrowthRate[1] <- summary(model)$coefficients[2]
Map

Cool - let’s loop it!

for (i in 1:nrow(Map)){
  temp <- subset(After_stress_only, After_stress_only$POT == i)
  model <- lm(temp$area.total ~ temp$date)
  Map$GrowthRate[i] <- summary(model)$coefficients[2]
}

Map$Genotype <- gsub("UCR7", "UCR779", Map$Genotype)
Map$Genotype <- gsub("IT97IC", "IT97K-499", Map$Genotype)
Map

#write.csv(Map,"Map_GrowthRate.csv", row.names = F, quote = F )

#Sanity check - check growth rate of another pot outside of the loop
# temp <- subset(After_stress_only, After_stress_only$POT == 2)
# model <- lm(temp$area.total ~ temp$date)
# summary(model)$coefficients[2]
#The value appearing here matches the growth rate found in the loop - All looks OK

#find mean of growth rate
GR_mean_condition <- aggregate(GrowthRate ~ Condition, data = Map, FUN = mean)

#Scatterplot of growth rates. Compare Genotype and facet by Condition
growth_rate_scatter <- ggerrorplot(Map, y = "GrowthRate", x = "Genotype", na.rm = TRUE, fill = "Genotype", color = "Genotype", 
                                   facet.by = c("Condition"), ncol = 4, 
                                   desc_stat = "mean_sd", add = "jitter",
                                   add.params = list(color = "darkgray"),
                                   xlab = "Genotype", ylab = "Growth Rate")
growth_rate_scatter <- growth_rate_scatter + geom_hline(data = GR_mean_condition, aes(yintercept = GrowthRate), linetype = 2)
growth_rate_scatter <- growth_rate_scatter + rremove("legend") + stat_compare_means(method = "t.test", ref.group = ".all.", label = "p.signif", hide.ns = TRUE)
growth_rate_scatter <- growth_rate_scatter + theme(axis.text.x = element_text(angle = 90))
growth_rate_scatter

# pdf("Scatter of Growth Rate by Condition and Genotype.pdf", width = 13, height = 10)
# plot(growth_rate_scatter)
# dev.off()

#Scatterplot of growth rates. Compare Genotype and facet by Condition
growth_rate_scatter <- ggerrorplot(Map, y = "GrowthRate", x = "Genotype", na.rm = TRUE, fill = "Genotype", color = "Genotype", 
                                   facet.by = c("Condition"), ncol = 4, 
                                   desc_stat = "mean_sd", add = "jitter",
                                   add.params = list(color = "darkgray"),
                                   xlab = "Genotype", ylab = "Growth Rate")
growth_rate_scatter <- growth_rate_scatter + rremove("legend") + stat_compare_means(method = "t.test", ref.group = "Suvita-2", label = "p.signif", hide.ns = TRUE)
growth_rate_scatter <- growth_rate_scatter + theme(axis.text.x = element_text(angle = 90))
growth_rate_scatter

# pdf("Scatter of Growth Rate by Condition and Genotype.pdf", width = 13, height = 10)
# plot(growth_rate_scatter)
# dev.off()

More Growth Rate Plots

# Here I am going to display different treatments and compare to control, facet by accession
Map_c_d <- subset(Map, Map$Condition != "Salt")
Map_c_s <- subset(Map, Map$Condition != "Drought")

GR_mean_c_d <- aggregate(GrowthRate ~ Genotype, data = Map_c_d, FUN = mean)
GR_mean_c_s <- aggregate(GrowthRate ~ Genotype, data = Map_c_s, FUN = mean)

gr_scatter_c_vs_d <- ggerrorplot(Map_c_d, y = "GrowthRate", x = "Condition", na.rm = TRUE, fill = "Condition", color = "Condition", 
                                   facet.by = c("Genotype"), ncol = 3, 
                                   desc_stat = "mean_sd", add = "jitter",
                                   add.params = list(color = "darkgray"),
                                   xlab = "Condition", ylab = "Growth Rate")
gr_scatter_c_vs_d <- gr_scatter_c_vs_d + rremove("legend") + stat_compare_means(method = "t.test", ref.group = "Control", label = "p.signif", hide.ns = TRUE)
gr_scatter_c_vs_d <- gr_scatter_c_vs_d + theme(axis.text.x = element_text(angle = 90))
gr_scatter_c_vs_d

gr_scatter_c_vs_s <- ggerrorplot(Map_c_s, y = "GrowthRate", x = "Condition", na.rm = TRUE, fill = "Condition", color = "Condition", 
                                   facet.by = c("Genotype"), ncol = 3, 
                                   desc_stat = "mean_sd", add = "jitter",
                                   add.params = list(color = "darkgray"),
                                   xlab = "Condition", ylab = "Growth Rate")
gr_scatter_c_vs_s <- gr_scatter_c_vs_s + rremove("legend") + stat_compare_means(method = "t.test", ref.group = "Control", label = "p.signif", hide.ns = TRUE)
gr_scatter_c_vs_s <- gr_scatter_c_vs_s + theme(axis.text.x = element_text(angle = 90))
gr_scatter_c_vs_s