This notebook describes the data analysis pipeline for side-view images collected for 1st cowpea screen (Cowpea 01), performend between February 11th and March 15th, 2022 by Hayley Sussman. The data was analyzed using the PlantCV pipeline by Magda Julkowska, and used here as an input.

Collating all side-views into one value for each plant

Since all of the plants were imaged from 7 angles, we first need to summarize the total plant size, by summarizing the pixels assigned as “plant” from all different side views. We will do it based on the time-stamp, for all of the imaging campaigns.

Let’s establish the pipeline first for the first file:

my_files <- list.files(pattern = "traits.csv")
length(my_files)

## [1] 7

data <- read.csv(my_files[1]) 
my_data <- data[,c(31, 18:25,27:30)]
my_data

Let’s split the ROI into specific information - including the timestamp!

for(i in 1:nrow(my_data)){
  my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
  }
my_data

#install.packages("doBy")
library(doBy)
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_data

let’s keep only the information we find valuable:

colnames(sum_data)

##  [1] "timestamp"             "area.mean"             "area.sum"             
##  [4] "area.max"              "convex_hull_area.mean" "convex_hull_area.sum" 
##  [7] "convex_hull_area.max"  "solidity.mean"         "solidity.sum"         
## [10] "solidity.max"          "perimeter.mean"        "perimeter.sum"        
## [13] "perimeter.max"         "width.mean"            "width.sum"            
## [16] "width.max"             "height.mean"           "height.sum"           
## [19] "height.max"

sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
sum_clean

Great - before moving forward - let’s add also info on DAY:

for(i in 1:nrow(sum_clean)){
  sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
  sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
  }
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean

For this experiment - I see in the lab notebook that

“Pot 18 (524-B, drought) did not germinate

Pot 92 and 97 (TVu-14346, control and drought) did not germinate

Pot 113 (TVu-16237, control) did not germinate; pot 118 (TVu-16237, drought) germinated late

Pot 137 (TVu-6643, drought) germinated on March 12

Pot 2 (CB5-2, control) on before watering on March 15th is missing

Look at images of: 123+128, 124+129. They all have bumps on the leaves”

So now we need to make a list of numbers from 1-140 and remove these missing numbers and merge them inside this data:

pots <- 1:140
nots <- c(18, 92, 97, 113, 137)
pots <- subset(pots, !(pots %in% nots))
pots

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  19
##  [19]  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37
##  [37]  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55
##  [55]  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73
##  [73]  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91
##  [91]  93  94  95  96  98  99 100 101 102 103 104 105 106 107 108 109 110 111
## [109] 112 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
## [127] 131 132 133 134 135 136 138 139 140

sum_clean$pot.no <- pots
sum_clean

CP01 <- sum_clean

OK - now let’s do next day but much quicker:

data <- read.csv(my_files[2]) 
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
  my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
  sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
  sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
  }
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean

pots <- 1:140
nots <- c(18, 92, 97, 113, 137)
pots <- subset(pots, !(pots %in% nots))
pots

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  19
##  [19]  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37
##  [37]  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55
##  [55]  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73
##  [73]  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91
##  [91]  93  94  95  96  98  99 100 101 102 103 104 105 106 107 108 109 110 111
## [109] 112 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
## [127] 131 132 133 134 135 136 138 139 140

sum_clean$pot.no <- pots
sum_clean

CP01 <- rbind(CP01, sum_clean)

data <- read.csv(my_files[3]) 
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
  my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
  sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
  sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
  }
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean

Pot 126 imaged twice

sum_clean <- subset(sum_clean, sum_clean$time != "15.33.45")

pots <- 1:140
nots <- c(18, 92, 97, 113, 137)
pots <- subset(pots, !(pots %in% nots))
pots

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  19
##  [19]  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37
##  [37]  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55
##  [55]  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73
##  [73]  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91
##  [91]  93  94  95  96  98  99 100 101 102 103 104 105 106 107 108 109 110 111
## [109] 112 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
## [127] 131 132 133 134 135 136 138 139 140

sum_clean$pot.no <- pots
sum_clean

CP01 <- rbind(CP01, sum_clean)

data <- read.csv(my_files[4]) 
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
  my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
  sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
  sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
  }
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean

additionally missing pot 103 and 119

pots <- 1:140
nots <- c(18, 92, 97, 103, 113, 119, 137)
pots <- subset(pots, !(pots %in% nots))
pots

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  19
##  [19]  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37
##  [37]  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55
##  [55]  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73
##  [73]  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91
##  [91]  93  94  95  96  98  99 100 101 102 104 105 106 107 108 109 110 111 112
## [109] 114 115 116 117 118 120 121 122 123 124 125 126 127 128 129 130 131 132
## [127] 133 134 135 136 138 139 140

sum_clean$pot.no <- pots
sum_clean

CP01 <- rbind(CP01, sum_clean)

data <- read.csv(my_files[5]) 
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
  my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
  sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
  sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
  }
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean

pots <- 1:140
nots <- c(18, 27, 57, 78, 92, 97, 113, 137)
pots <- subset(pots, !(pots %in% nots))
pots

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  19
##  [19]  20  21  22  23  24  25  26  28  29  30  31  32  33  34  35  36  37  38
##  [37]  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56
##  [55]  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75
##  [73]  76  77  79  80  81  82  83  84  85  86  87  88  89  90  91  93  94  95
##  [91]  96  98  99 100 101 102 103 104 105 106 107 108 109 110 111 112 114 115
## [109] 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
## [127] 134 135 136 138 139 140

sum_clean$pot.no <- pots
sum_clean

CP01 <- rbind(CP01, sum_clean)

data <- read.csv(my_files[6]) 
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
  my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
  sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
  sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
  }
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean

pots <- 1:140
nots <- c(18, 92, 97, 113)
pots <- subset(pots, !(pots %in% nots))
pots

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  19
##  [19]  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37
##  [37]  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55
##  [55]  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73
##  [73]  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91
##  [91]  93  94  95  96  98  99 100 101 102 103 104 105 106 107 108 109 110 111
## [109] 112 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
## [127] 131 132 133 134 135 136 137 138 139 140

sum_clean$pot.no <- pots
sum_clean

CP01 <- rbind(CP01, sum_clean)

Started to image pot 137 from this timepoint!

data <- read.csv(my_files[7]) 
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
  my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
  sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
  sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
  }
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean

pots <- 1:140
nots <- c(18, 92, 97, 113)
pots <- subset(pots, !(pots %in% nots))
pots

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  19
##  [19]  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37
##  [37]  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55
##  [55]  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73
##  [73]  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91
##  [91]  93  94  95  96  98  99 100 101 102 103 104 105 106 107 108 109 110 111
## [109] 112 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
## [127] 131 132 133 134 135 136 137 138 139 140

sum_clean$pot.no <- pots
sum_clean

CP01 <- rbind(CP01, sum_clean)

Decoding the pot information using the FW data

We will load the data containing fresh and dry weight, as well as all of the decoding information:

decode <- read.csv("FWDW_exp1.csv")
colnames(decode)[1] <- "pot.no"
FW_data <- decode
decode <- decode[,1:3]
decode <- subset(decode, !(decode$pot.no %in% nots))

CP01 <- CP01[,c(1:3,10,4:9)]
CP01

CP01_decoded <- merge(CP01, decode, all = TRUE)
unique(CP01_decoded$day)

## [1] "2022.03.03" "2022.03.05" "2022.03.10" "2022.03.08" "2022.03.14"
## [6] "2022.03.17" "2022.03.12" "2022.03.04" NA

CP01_decoded

Visualize the data over time:

For this - we need to rename the date into a numeric day of experiment:

CP01_decoded$day <- gsub("2022.03.03", 0, CP01_decoded$day)
CP01_decoded$day <- gsub("2022.03.04", 1, CP01_decoded$day)
CP01_decoded$day <- gsub("2022.03.05", 2, CP01_decoded$day)
CP01_decoded$day <- gsub("2022.03.08", 5, CP01_decoded$day)
CP01_decoded$day <- gsub("2022.03.10", 7, CP01_decoded$day)
CP01_decoded$day <- gsub("2022.03.12", 9, CP01_decoded$day)
CP01_decoded$day <- gsub("2022.03.14", 11, CP01_decoded$day)
CP01_decoded$day <- gsub("2022.03.17", 14, CP01_decoded$day)
CP01_decoded$day <- as.numeric(as.character(CP01_decoded$day))
CP01_decoded

Now - let’s visualize the area.sum over time for each pot, and divide colours by treatments:

unique(CP01_decoded$Treatment)

## [1] "Control " "Drought"  "Control"

CP01_decoded$Treatment <- gsub(" ", "", CP01_decoded$Treatment)
CP01_decoded$Treatment <- factor(CP01_decoded$Treatment, levels = c("Control", "Drought"))

library(ggplot2)
library(ggpubr)
library("ggsci")

CP01_decoded$day <- as.factor(CP01_decoded$day)

Area_lgraph_CP01 <- ggplot(data=CP01_decoded, aes(x= day, y=area.sum, group = pot.no, color = Treatment)) 
Area_lgraph_CP01 <- Area_lgraph_CP01 + geom_line(alpha = 0.1) 
Area_lgraph_CP01 <- Area_lgraph_CP01 + stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group= Treatment), alpha=0.3)
Area_lgraph_CP01 <- Area_lgraph_CP01 + stat_summary(fun=mean, aes(group= Treatment),  size=0.7, geom="line", linetype = "dashed")
Area_lgraph_CP01 <- Area_lgraph_CP01 + stat_compare_means(aes(group = Treatment), label = "p.signif", method = "t.test", hide.ns = T)
Area_lgraph_CP01 <- Area_lgraph_CP01 + ylab("Shoot Size (7 x SV pixels)") + xlab("Days After Stress") + scale_color_jco()
Area_lgraph_CP01

## Warning: Removed 2 rows containing non-finite values (stat_summary).
## Removed 2 rows containing non-finite values (stat_summary).

## Warning: Removed 2 rows containing non-finite values (stat_compare_means).

## Warning: Removed 2 row(s) containing missing values (geom_path).

Let’s save the data too into a clean and separate csv file:

write.csv(CP01_decoded, "Cowpea_01_Clean_data.csv", row.names = FALSE)

Visualize the correlation between FW and pixels at final day:

last_day <- subset(CP01_decoded, CP01_decoded$day == 14)
unique(last_day$Treatment)

## [1] Control Drought
## Levels: Control Drought

unique(FW_data$Treatment)

## [1] "Control " "Drought"  "Control"

FW_data$Treatment <- gsub(" ", "", FW_data$Treatment)
last_day_FW <- merge(last_day, FW_data, by=c("pot.no", "Genotype", "Treatment"), all=TRUE)
last_day_FW$FW <- as.numeric(as.character(last_day_FW$FW))

## Warning: NAs introduced by coercion

FW_Area_CP01 <- ggscatter(last_day_FW, x = "area.sum", y = "FW",rug = TRUE) + stat_cor()   
FW_Area_CP01

## Warning: Removed 6 rows containing non-finite values (stat_cor).

## Warning: Removed 6 rows containing missing values (geom_point).

Cowpea Screen 01

Collating all side-views into one value for each plant

Decoding the pot information using the FW data

Visualize the data over time:

Visualize the correlation between FW and pixels at final day: