This notebook describes the data analysis pipeline for side-view images collected for last cowpea screen (Cowpea 06), performend between March 18th and April 21st, 2022 by Hayley Sussman. The data was analyzed using the PlantCV pipeline by Magda Julkowska, and used here as an input.
Since all of the plants were imaged from 7 angles, we first need to summarize the total plant size, by summarizing the pixels assigned as “plant” from all different side views. We will do it based on the time-stamp, for all of the imaging campaigns.
Let’s establish the pipeline first for the first file:
my_files <- list.files(pattern = "traits.csv")
length(my_files)
## [1] 8
data <- read.csv(my_files[1])
my_data <- data[,c(31, 18:25,27:30)]
my_data
Let’s split the ROI into specific information - including the timestamp!
for(i in 1:nrow(my_data)){
my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
my_data
#install.packages("doBy")
library(doBy)
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_data
let’s keep only the information we find valuable:
colnames(sum_data)
## [1] "timestamp" "area.mean" "area.sum"
## [4] "area.max" "convex_hull_area.mean" "convex_hull_area.sum"
## [7] "convex_hull_area.max" "solidity.mean" "solidity.sum"
## [10] "solidity.max" "perimeter.mean" "perimeter.sum"
## [13] "perimeter.max" "width.mean" "width.sum"
## [16] "width.max" "height.mean" "height.sum"
## [19] "height.max"
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
sum_clean
Great - before moving forward - let’s add also info on DAY:
for(i in 1:nrow(sum_clean)){
sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
}
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean
For this experiment - I see no special mentioned the lab notebook. So we can assume we got 140 pots - all germinated plants and no missed imaging plants… but we will see :)
pots <- 1:140
pots
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140
sum_clean$pot.no <- pots
sum_clean
CP02 <- sum_clean
OK - now let’s do next day but much quicker:
data <- read.csv(my_files[2])
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
}
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean
pots <- 1:140
pots
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140
sum_clean$pot.no <- pots
sum_clean
CP02 <- rbind(CP02, sum_clean)
data <- read.csv(my_files[3])
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
}
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean
pots <- 1:140
pots
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140
sum_clean$pot.no <- pots
sum_clean
CP02 <- rbind(CP02, sum_clean)
data <- read.csv(my_files[4])
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
}
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean
pots <- 1:140
pots
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140
sum_clean$pot.no <- pots
sum_clean
CP02 <- rbind(CP02, sum_clean)
data <- read.csv(my_files[5])
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
}
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean
pots <- 1:140
pots
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140
sum_clean$pot.no <- pots
sum_clean
CP02 <- rbind(CP02, sum_clean)
data <- read.csv(my_files[6])
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
}
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean
pots <- 1:140
pots
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140
sum_clean$pot.no <- pots
sum_clean
CP02 <- rbind(CP02, sum_clean)
data <- read.csv(my_files[7])
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
}
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean
pots <- 1:140
pots
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140
sum_clean$pot.no <- pots
sum_clean
CP02 <- rbind(CP02, sum_clean)
data <- read.csv(my_files[8])
my_data <- data[,c(31, 18:25,27:30)]
for(i in 1:nrow(my_data)){
my_data$timestamp[i] <- strsplit(my_data$roi[i], "_")[[1]][1]
}
sum_data <- summaryBy(area + convex_hull_area + solidity + perimeter + width + height ~ timestamp, data = my_data, FUN = function(x) c(mean = mean(x), sum=sum(x), max=max(x)))
sum_clean <- sum_data[,c(1, 3, 5, 8, 11, 16, 19)]
for(i in 1:nrow(sum_clean)){
sum_clean$day[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][1]
sum_clean$time[i] <- strsplit(sum_clean$timestamp[i], "-")[[1]][2]
}
sum_clean <- sum_clean[,c(1,8:9,2:7)]
sum_clean
pots <- 1:140
pots
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140
sum_clean$pot.no <- pots
sum_clean
CP02 <- rbind(CP02, sum_clean)
We will load the data containing fresh and dry weight, as well as all of the decoding information:
decode <- read.csv("FWDW_exp2.csv")
colnames(decode)[1] <- "pot.no"
FW_data <- decode
FW_data
decode <- decode[,1:3]
CP02 <- CP02[,c(1:3,10,4:9)]
CP02
CP02_decoded <- merge(CP02, decode, all = TRUE)
unique(CP02_decoded$day)
## [1] "2022.04.07" "2022.04.17" "2022.04.09" "2022.04.13" "2022.04.11"
## [6] "2022.04.15" "2022.04.21" "2022.04.19" NA
CP02_decoded
For this - we need to rename the date into a numeric day of experiment:
CP02_decoded$day <- gsub("2022.04.07", 0, CP02_decoded$day)
CP02_decoded$day <- gsub("2022.04.09", 2, CP02_decoded$day)
CP02_decoded$day <- gsub("2022.04.11", 4, CP02_decoded$day)
CP02_decoded$day <- gsub("2022.04.13", 6, CP02_decoded$day)
CP02_decoded$day <- gsub("2022.04.15", 8, CP02_decoded$day)
CP02_decoded$day <- gsub("2022.04.17", 10, CP02_decoded$day)
CP02_decoded$day <- gsub("2022.04.19", 12, CP02_decoded$day)
CP02_decoded$day <- gsub("2022.04.21", 14, CP02_decoded$day)
CP02_decoded$day <- as.numeric(as.character(CP02_decoded$day))
CP02_decoded
Now - let’s visualize the area.sum over time for each pot, and divide colours by treatments:
unique(CP02_decoded$Treatment)
## [1] "Control " "Drought" "Control"
CP02_decoded$Treatment <- gsub(" ", "", CP02_decoded$Treatment)
CP02_decoded <- na.omit(CP02_decoded)
CP02_decoded$Treatment <- factor(CP02_decoded$Treatment, levels = c("Control", "Drought"))
library(ggplot2)
library(ggpubr)
library("ggsci")
CP02_decoded$day <- as.factor(CP02_decoded$day)
Area_lgraph_CP02 <- ggplot(data=CP02_decoded, aes(x= day, y=area.sum, group = pot.no, color = Treatment))
Area_lgraph_CP02 <- Area_lgraph_CP02 + geom_line(alpha = 0.1)
Area_lgraph_CP02 <- Area_lgraph_CP02 + stat_summary(fun.data = mean_se, geom="ribbon", linetype=0, aes(group= Treatment), alpha=0.3)
Area_lgraph_CP02 <- Area_lgraph_CP02 + stat_summary(fun=mean, aes(group= Treatment), size=0.7, geom="line", linetype = "dashed")
Area_lgraph_CP02 <- Area_lgraph_CP02 + stat_compare_means(aes(group = Treatment), label = "p.signif", method = "t.test", hide.ns = T)
Area_lgraph_CP02 <- Area_lgraph_CP02 + ylab("Shoot Size (7 x SV pixels)") + xlab("Days After Stress") + scale_color_jco()
Area_lgraph_CP02
Let’s save the data too into a clean and separate csv file:
write.csv(CP02_decoded, "Cowpea_02_Clean_data.csv", row.names = FALSE)
last_day <- subset(CP02_decoded, CP02_decoded$day == 14)
unique(last_day$Treatment)
## [1] Control Drought
## Levels: Control Drought
unique(FW_data$Treatment)
## [1] "Control " "Drought" "Control"
FW_data$Treatment <- gsub(" ", "", FW_data$Treatment)
last_day_FW <- merge(last_day, FW_data, by=c("pot.no", "Genotype", "Treatment"), all=TRUE)
last_day_FW
FW_Area_CP02 <- ggscatter(last_day_FW, x = "area.sum", y = "FW",rug = TRUE) + stat_cor()
FW_Area_CP02
## Warning: Removed 2 rows containing non-finite values (stat_cor).
## Warning: Removed 2 rows containing missing values (geom_point).