In this notebook I am going to examine the lateral root distribution across a collection of tomato accessions which were collected by Safarina Ahmad.
The aims are to: 1) examine the lateral root distribution across the main root 2) visualize the shift in apical zone (getting extended under salt stress) that Safarina observed in the pictures 3) calculate the “lateral root center of gravity” - indicating the portion of Main Root at which majority of Lateral Root number / Length standard calculation from center of gravity (physics) is: calculate momentum for each object (dist. * weight) center of gravity = sum(momentum) / sum(distance)
getwd()
## [1] "/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis"
setwd("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/")
list.files()
## [1] "20200131_Branching_analysis_for_Safi_files"
## [2] "20200131_Branching_analysis_for_Safi.nb.html"
## [3] "20200131_Branching_analysis_for_Safi.Rmd"
## [4] "AllMarks_DataInSegment"
## [5] "DataInSegment_04122019.R"
## [6] "G1_010_AllMarks.csv"
## [7] "G1_010_GlobalRootData.csv"
## [8] "G1_setup.csv"
## [9] "G2_010_AllMarks.csv"
## [10] "G2_010_GlobalRootData.csv"
## [11] "G2_setup.csv"
## [12] "G3_010_AllMarks.csv"
## [13] "G3_010_GlobalRootData.csv"
## [14] "G3_setup.csv"
## [15] "G4_010_AllMarks.csv"
## [16] "G4_010_GlobalRootData.csv"
## [17] "G4_setup.csv"
## [18] "G5_010_AllMarks.csv"
## [19] "G5_010_GlobalRootData.csv"
## [20] "G5_setup.csv"
## [21] "G6_010_AllMarks.csv"
## [22] "G6_010_GlobalRootData.csv"
## [23] "G6_setup.csv"
## [24] "G7_010_AllMarks.csv"
## [25] "G7_010_GlobalRootData.csv"
## [26] "G7_setup.csv"
## [27] "G8_010_AllMarks.csv"
## [28] "G8_010_GlobalRootData.csv"
## [29] "G8_setup.csv"
## [30] "GWAS_ready.csv"
## [31] "LR_distribution_final_set.csv"
## [32] "ultimate_GWAS_only_Safi.csv"
## [33] "ultimate_GWAS_Safi.csv"
libraries used in this script:
library(reshape)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:reshape':
##
## colsplit, melt, recast
library(ggplot2)
library(ggpubr)
## Loading required package: magrittr
library(doBy)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
## The following object is masked from 'package:reshape':
##
## stamp
library(RColorBrewer)
library(corrplot)
## corrplot 0.84 loaded
the files that contain the position of each LR are “GlobalRootData.csv” - so we need to import them and then fuse them all into one big file
my_files <- list.files(pattern="GlobalRootData.csv")
my_files
## [1] "G1_010_GlobalRootData.csv" "G2_010_GlobalRootData.csv"
## [3] "G3_010_GlobalRootData.csv" "G4_010_GlobalRootData.csv"
## [5] "G5_010_GlobalRootData.csv" "G6_010_GlobalRootData.csv"
## [7] "G7_010_GlobalRootData.csv" "G8_010_GlobalRootData.csv"
length(my_files)
## [1] 8
Let’s do one file as an example - and add experiment number to the file - so we will be able to sort out the roots later:
temp <- read.csv(my_files[1])
head(temp)
exp <- gsub("_010_GlobalRootData.csv", "", my_files[1])
exp
## [1] "G1"
temp$experiment <- exp
final <- temp
Looks good - so let’s loop it and combine it into final file:
dim(final)
## [1] 2008 22
for(i in 2:8){
temp <- read.csv(my_files[i])
exp <- gsub("_010_GlobalRootData.csv", "", my_files[i])
temp$experiment <- exp
final <- rbind(final, temp)
}
dim(final)
## [1] 14736 22
we are only going to calculate the traits for roots that are actually having LR - therefore - we need to subset out final dataset for n_child > 0
head(final)
so in our data - we have a structure where
1) each root has an unique ID - and all the lateral roots belonging to one MR list that ID as parent 2) all the main roots have “Root” in the root_ontology collumn 3) insertion_position is a position on MR where individual LR is inserted 4) length of individual root is in collumn “length”
So maybe first let’s just clean up the table to the collumns we really need for this calculations - as we are only interested in length and identifiers listed above - we are trimming the data table off the other info:
colnames(final)
## [1] "image" "root_name" "root"
## [4] "length" "vector_length" "surface"
## [7] "volume" "direction" "diameter"
## [10] "root_order" "root_ontology" "parent_name"
## [13] "parent" "insertion_position" "insertion_angle"
## [16] "n_child" "child_density" "first_child"
## [19] "insertion_first_child" "last_child" "insertion_last_child"
## [22] "experiment"
final2 <- final[,c(22, 1, 3:4, 11, 16, 13:14)]
head(final2)
Then we create a file containing only the main root information:
unique(final2$root_ontology)[1]
## [1] Root
## Levels: Lateral root Root
only_MR <- subset(final2, final2$root_ontology == unique(final2$root_ontology)[1])
head(only_MR)
Plus - we would like to have the MR that only have LR
no_LR_MR <- subset(only_MR, only_MR$n_child < 1)
only_MR <- subset(only_MR, only_MR$n_child > 0)
head(only_MR)
Let’s cut all of the unneccessary info here too:
only_MR <- only_MR[,c(1:4)]
dim(only_MR)
## [1] 1254 4
all_MR <- only_MR$root
length(all_MR)
## [1] 1254
So we have in total 1254 unique Main Roots - we will have to examine the LR belonging to each by extracting them based on the “parent” collumn:
MR_now <- all_MR[1]
MR_now
## [1] e0ff9ea5-6a63-42ed-be68-d1f6994c47b3
## 14736 Levels: 001813c8-3651-4583-8fd8-8ad0811e0ed2 ...
head(final2)
super_temp <- subset(final2, final2$parent %in% MR_now)
super_temp
Then - we need to create some info about the length of individual zones - for this we need info on MR_length
MR_length <- subset(only_MR, only_MR$root %in% MR_now)
MR_length <- MR_length$length
MR_length
## [1] 11.44772
Length of Apical zone is basically equal to position of first lateral root Branched zone length is the position of last LR - position of first LR Basal zone is the MR length - position of last LR thus:
Apical <- min(super_temp$insertion_position)
Branched <- (max(super_temp$insertion_position) - Apical)
Basal <- MR_length - max(super_temp$insertion_position)
Apical
## [1] 1.61211
Branched
## [1] 5.770811
Basal
## [1] 4.064801
Then - we can calculate the % of LR across the MR length - by dividing the MR into 4 or 10 parts
super_temp$LR_in_10_perc <- 0
super_temp$LR_in_20_perc <- 0
super_temp$LR_in_30_perc <- 0
super_temp$LR_in_40_perc <- 0
super_temp$LR_in_50_perc <- 0
super_temp$LR_in_60_perc <- 0
super_temp$LR_in_70_perc <- 0
super_temp$LR_in_80_perc <- 0
super_temp$LR_in_90_perc <- 0
super_temp$LR_in_100_perc <- 0
super_temp$LR_in_25_perc_4 <- 0
super_temp$LR_in_50_perc_4 <- 0
super_temp$LR_in_75_perc_4 <- 0
super_temp$LR_in_100_perc_4 <- 0
for(e in 1:nrow(super_temp)){
if (super_temp$insertion_position[e] < (MR_length/10)){
super_temp$LR_in_10_perc[e] <- 1
} else {super_temp$LR_in_10_perc[e] <- 0}
if (super_temp$insertion_position[e] < 2*(MR_length/10) & super_temp$insertion_position[e] > (MR_length/10)){
super_temp$LR_in_20_perc[e] <- 1
} else {super_temp$LR_in_20_perc[e] <- 0}
if (super_temp$insertion_position[e] < 3*(MR_length/10) & super_temp$insertion_position[e] > 2*(MR_length/10)){
super_temp$LR_in_30_perc[e] <- 1
} else {super_temp$LR_in_30_perc[e] <- 0}
if (super_temp$insertion_position[e] < 4*(MR_length/10) & super_temp$insertion_position[e] > 3*(MR_length/10)){
super_temp$LR_in_40_perc[e] <- 1
} else {super_temp$LR_in_40_perc[e] <- 0}
if (super_temp$insertion_position[e] < 5*(MR_length/10) & super_temp$insertion_position[e] > 4*(MR_length/10)){
super_temp$LR_in_50_perc[e] <- 1
} else {super_temp$LR_in_50_perc[e] <- 0}
if (super_temp$insertion_position[e] < 6*(MR_length/10) & super_temp$insertion_position[e] > 5*(MR_length/10)){
super_temp$LR_in_60_perc[e] <- 1
} else {super_temp$LR_in_60_perc[e] <- 0}
if (super_temp$insertion_position[e] < 7*(MR_length/10) & super_temp$insertion_position[e] > 6*(MR_length/10)){
super_temp$LR_in_70_perc[e] <- 1
} else {super_temp$LR_in_70_perc[e] <- 0}
if (super_temp$insertion_position[e] < 8*(MR_length/10) & super_temp$insertion_position[e] > 7*(MR_length/10)){
super_temp$LR_in_80_perc[e] <- 1
} else {super_temp$LR_in_80_perc[e] <- 0}
if (super_temp$insertion_position[e] < 9*(MR_length/10) & super_temp$insertion_position[e] > 8*(MR_length/10)){
super_temp$LR_in_90_perc[e] <- 1
} else {super_temp$LR_in_90_perc[e] <- 0}
if (super_temp$insertion_position[e] < 10*(MR_length/10) & super_temp$insertion_position[e] > 9*(MR_length/10)){
super_temp$LR_in_100_perc[e] <- 1
} else {super_temp$LR_in_100_perc[e] <- 0}
if (super_temp$insertion_position[e] < 1*(MR_length/4)){
super_temp$LR_in_25_perc_4[e] <- 1
} else {super_temp$LR_in_25_perc_4[e] <- 0}
if (super_temp$insertion_position[e] < 2*(MR_length/4) & super_temp$insertion_position[e] > 1*(MR_length/4)){
super_temp$LR_in_50_perc_4[e] <- 1
} else {super_temp$LR_in_50_perc_4[e] <- 0}
if (super_temp$insertion_position[e] < 3*(MR_length/4) & super_temp$insertion_position[e] > 2*(MR_length/4)){
super_temp$LR_in_75_perc_4[e] <- 1
} else {super_temp$LR_in_75_perc_4[e] <- 0}
if (super_temp$insertion_position[e] < 4*(MR_length/4) & super_temp$insertion_position[e] > 3*(MR_length/4)){
super_temp$LR_in_100_perc_4[e] <- 1
} else {super_temp$LR_in_100_perc_4[e] <- 0}
}
super_temp
so after classifying each LR to its respective portion of MR, we can then summarize them and calculate total LR length in this fragment:
super_temp$LRL_in_10_perc <- super_temp$length * super_temp$LR_in_10_perc
super_temp$LRL_in_20_perc <- super_temp$length * super_temp$LR_in_20_perc
super_temp$LRL_in_30_perc <- super_temp$length * super_temp$LR_in_30_perc
super_temp$LRL_in_40_perc <- super_temp$length * super_temp$LR_in_40_perc
super_temp$LRL_in_50_perc <- super_temp$length * super_temp$LR_in_50_perc
super_temp$LRL_in_60_perc <- super_temp$length * super_temp$LR_in_60_perc
super_temp$LRL_in_70_perc <- super_temp$length * super_temp$LR_in_70_perc
super_temp$LRL_in_80_perc <- super_temp$length * super_temp$LR_in_80_perc
super_temp$LRL_in_90_perc <- super_temp$length * super_temp$LR_in_90_perc
super_temp$LRL_in_100_perc <- super_temp$length * super_temp$LR_in_100_perc
super_temp$LRL_in_25_perc_4 <- super_temp$length * super_temp$LR_in_25_perc_4
super_temp$LRL_in_50_perc_4 <- super_temp$length * super_temp$LR_in_50_perc_4
super_temp$LRL_in_75_perc_4 <- super_temp$length * super_temp$LR_in_75_perc_4
super_temp$LRL_in_100_perc_4 <- super_temp$length * super_temp$LR_in_100_perc_4
LR_no_10_100 <- sum(super_temp$LR_in_10_perc)
LR_no_20_100 <- sum(super_temp$LR_in_20_perc)
LR_no_30_100 <- sum(super_temp$LR_in_30_perc)
LR_no_40_100 <- sum(super_temp$LR_in_40_perc)
LR_no_50_100 <- sum(super_temp$LR_in_50_perc)
LR_no_60_100 <- sum(super_temp$LR_in_60_perc)
LR_no_70_100 <- sum(super_temp$LR_in_70_perc)
LR_no_80_100 <- sum(super_temp$LR_in_80_perc)
LR_no_90_100 <- sum(super_temp$LR_in_90_perc)
LR_no_100_100 <- sum(super_temp$LR_in_100_perc)
LR_no_25_4 <- sum(super_temp$LR_in_25_perc_4)
LR_no_50_4 <- sum(super_temp$LR_in_50_perc_4)
LR_no_75_4 <- sum(super_temp$LR_in_75_perc_4)
LR_no_100_4 <- sum(super_temp$LR_in_100_perc_4)
LRL_10_100 <- sum(super_temp$LRL_in_10_perc)
LRL_20_100 <- sum(super_temp$LRL_in_20_perc)
LRL_30_100 <- sum(super_temp$LRL_in_30_perc)
LRL_40_100 <- sum(super_temp$LRL_in_40_perc)
LRL_50_100 <- sum(super_temp$LRL_in_50_perc)
LRL_60_100 <- sum(super_temp$LRL_in_60_perc)
LRL_70_100 <- sum(super_temp$LRL_in_70_perc)
LRL_80_100 <- sum(super_temp$LRL_in_80_perc)
LRL_90_100 <- sum(super_temp$LRL_in_90_perc)
LRL_100_100 <- sum(super_temp$LRL_in_100_perc)
LRL_25_4 <- sum(super_temp$LRL_in_25_perc_4)
LRL_50_4 <- sum(super_temp$LRL_in_50_perc_4)
LRL_75_4 <- sum(super_temp$LRL_in_75_perc_4)
LRL_100_4 <- sum(super_temp$LRL_in_100_perc_4)
OK - last but not least - we should calculate the center of gravity for LR:
super_temp$momentum <- (super_temp$insertion_position * super_temp$length)
super_temp
all_momentum <- sum(super_temp$momentum)
all_momentum
## [1] 61.61884
all_length <- sum(super_temp$length)
all_length
## [1] 18.08004
CoG <- (all_momentum / all_length)
CoG
## [1] 3.408113
Finally - let’s add all these values into our MR_only table
First we need to create needed collumns:
only_MR$Apical <- 0
only_MR$Branched <- 0
only_MR$Basal <- 0
only_MR$Apical_perc <- 0
only_MR$Branched_perc <- 0
only_MR$Basal_perc <- 0
only_MR$LR_no_10_100 <- 0
only_MR$LR_no_20_100 <- 0
only_MR$LR_no_30_100 <- 0
only_MR$LR_no_40_100 <- 0
only_MR$LR_no_50_100 <- 0
only_MR$LR_no_60_100 <- 0
only_MR$LR_no_70_100 <- 0
only_MR$LR_no_80_100 <- 0
only_MR$LR_no_90_100 <- 0
only_MR$LR_no_100_100 <- 0
only_MR$LR_no_25_4 <- 0
only_MR$LR_no_50_4 <- 0
only_MR$LR_no_75_4 <- 0
only_MR$LR_no_100_4 <- 0
only_MR$LRL_10_100 <- 0
only_MR$LRL_20_100 <- 0
only_MR$LRL_30_100 <- 0
only_MR$LRL_40_100 <- 0
only_MR$LRL_50_100 <- 0
only_MR$LRL_60_100 <- 0
only_MR$LRL_70_100 <- 0
only_MR$LRL_80_100 <- 0
only_MR$LRL_90_100 <- 0
only_MR$LRL_100_100 <- 0
only_MR$LRL_25_4 <- 0
only_MR$LRL_50_4 <- 0
only_MR$LRL_75_4 <- 0
only_MR$LRL_100_4 <- 0
only_MR$CoG <- 0
then add values to these collumns:
only_MR$Apical[1] <- Apical
only_MR$Branched[1] <- Branched
only_MR$Basal[1] <- Basal
only_MR$Apical_perc[1] <- (Apical / MR_length)
only_MR$Branched_perc[1] <- (Branched / MR_length)
only_MR$Basal_perc[1] <- (Basal / MR_length)
only_MR$LR_no_10_100[1] <- LR_no_10_100
only_MR$LR_no_20_100[1] <- LR_no_20_100
only_MR$LR_no_30_100[1] <- LR_no_30_100
only_MR$LR_no_40_100[1] <- LR_no_40_100
only_MR$LR_no_50_100[1] <- LR_no_50_100
only_MR$LR_no_60_100[1] <- LR_no_60_100
only_MR$LR_no_70_100[1] <- LR_no_70_100
only_MR$LR_no_80_100[1] <- LR_no_80_100
only_MR$LR_no_90_100[1] <- LR_no_90_100
only_MR$LR_no_100_100[1] <- LR_no_100_100
only_MR$LR_no_25_4[1] <- LR_no_25_4
only_MR$LR_no_50_4[1] <- LR_no_50_4
only_MR$LR_no_75_4[1] <- LR_no_75_4
only_MR$LR_no_100_4[1] <- LR_no_100_4
only_MR$LRL_10_100[1] <- LRL_10_100
only_MR$LRL_20_100[1] <- LRL_20_100
only_MR$LRL_30_100[1] <- LRL_30_100
only_MR$LRL_40_100[1] <- LRL_40_100
only_MR$LRL_50_100[1] <- LRL_50_100
only_MR$LRL_60_100[1] <- LRL_60_100
only_MR$LRL_70_100[1] <- LRL_70_100
only_MR$LRL_80_100[1] <- LRL_80_100
only_MR$LRL_90_100[1] <- LRL_90_100
only_MR$LRL_100_100[1] <- LRL_100_100
only_MR$LRL_25_4[1] <- LRL_25_4
only_MR$LRL_50_4[1] <- LRL_50_4
only_MR$LRL_75_4[1] <- LRL_75_4
only_MR$LRL_100_4[1] <- LRL_100_4
only_MR$CoG[1] <- CoG
let’s have a look if everything was correctly inserted:
head(only_MR)
Loop everything for the remaining roots - I already forgot how many we have ;)
length(unique(only_MR$root))
## [1] 1254
Loopy loop:
for(i in 1:1254){
MR_now <- all_MR[i]
super_temp <- subset(final2, final2$parent %in% MR_now)
MR_length <- subset(only_MR, only_MR$root %in% MR_now)
MR_length <- MR_length$length
Apical <- min(super_temp$insertion_position)
Branched <- max(super_temp$insertion_position) - Apical
Basal <- MR_length - max(super_temp$insertion_position)
only_MR$Apical[i] <- Apical
only_MR$Branched[i] <- Branched
only_MR$Basal[i] <- Basal
only_MR$Apical_perc[i] <- (Apical / MR_length)
only_MR$Branched_perc[i] <- (Branched / MR_length)
only_MR$Basal_perc[i] <- (Basal / MR_length)
super_temp$LR_in_10_perc <- 0
super_temp$LR_in_20_perc <- 0
super_temp$LR_in_30_perc <- 0
super_temp$LR_in_40_perc <- 0
super_temp$LR_in_50_perc <- 0
super_temp$LR_in_60_perc <- 0
super_temp$LR_in_70_perc <- 0
super_temp$LR_in_80_perc <- 0
super_temp$LR_in_90_perc <- 0
super_temp$LR_in_100_perc <- 0
super_temp$LR_in_25_perc_4 <- 0
super_temp$LR_in_50_perc_4 <- 0
super_temp$LR_in_75_perc_4 <- 0
super_temp$LR_in_100_perc_4 <- 0
for(e in 1:nrow(super_temp)){
if (super_temp$insertion_position[e] < (MR_length/10)){
super_temp$LR_in_10_perc[e] <- 1
} else {super_temp$LR_in_10_perc[e] <- 0}
if (super_temp$insertion_position[e] < 2*(MR_length/10) & super_temp$insertion_position[e] > (MR_length/10)){
super_temp$LR_in_20_perc[e] <- 1
} else {super_temp$LR_in_20_perc[e] <- 0}
if (super_temp$insertion_position[e] < 3*(MR_length/10) & super_temp$insertion_position[e] > 2*(MR_length/10)){
super_temp$LR_in_30_perc[e] <- 1
} else {super_temp$LR_in_30_perc[e] <- 0}
if (super_temp$insertion_position[e] < 4*(MR_length/10) & super_temp$insertion_position[e] > 3*(MR_length/10)){
super_temp$LR_in_40_perc[e] <- 1
} else {super_temp$LR_in_40_perc[e] <- 0}
if (super_temp$insertion_position[e] < 5*(MR_length/10) & super_temp$insertion_position[e] > 4*(MR_length/10)){
super_temp$LR_in_50_perc[e] <- 1
} else {super_temp$LR_in_50_perc[e] <- 0}
if (super_temp$insertion_position[e] < 6*(MR_length/10) & super_temp$insertion_position[e] > 5*(MR_length/10)){
super_temp$LR_in_60_perc[e] <- 1
} else {super_temp$LR_in_60_perc[e] <- 0}
if (super_temp$insertion_position[e] < 7*(MR_length/10) & super_temp$insertion_position[e] > 6*(MR_length/10)){
super_temp$LR_in_70_perc[e] <- 1
} else {super_temp$LR_in_70_perc[e] <- 0}
if (super_temp$insertion_position[e] < 8*(MR_length/10) & super_temp$insertion_position[e] > 7*(MR_length/10)){
super_temp$LR_in_80_perc[e] <- 1
} else {super_temp$LR_in_80_perc[e] <- 0}
if (super_temp$insertion_position[e] < 9*(MR_length/10) & super_temp$insertion_position[e] > 8*(MR_length/10)){
super_temp$LR_in_90_perc[e] <- 1
} else {super_temp$LR_in_90_perc[e] <- 0}
if (super_temp$insertion_position[e] < 10*(MR_length/10) & super_temp$insertion_position[e] > 9*(MR_length/10)){
super_temp$LR_in_100_perc[e] <- 1
} else {super_temp$LR_in_100_perc[e] <- 0}
if (super_temp$insertion_position[e] < 1*(MR_length/4)){
super_temp$LR_in_25_perc_4[e] <- 1
} else {super_temp$LR_in_25_perc_4[e] <- 0}
if (super_temp$insertion_position[e] < 2*(MR_length/4) & super_temp$insertion_position[e] > 1*(MR_length/4)){
super_temp$LR_in_50_perc_4[e] <- 1
} else {super_temp$LR_in_50_perc_4[e] <- 0}
if (super_temp$insertion_position[e] < 3*(MR_length/4) & super_temp$insertion_position[e] > 2*(MR_length/4)){
super_temp$LR_in_75_perc_4[e] <- 1
} else {super_temp$LR_in_75_perc_4[e] <- 0}
if (super_temp$insertion_position[e] < 4*(MR_length/4) & super_temp$insertion_position[e] > 3*(MR_length/4)){
super_temp$LR_in_100_perc_4[e] <- 1
} else {super_temp$LR_in_100_perc_4[e] <- 0}
}
super_temp$LRL_in_10_perc <- super_temp$length * super_temp$LR_in_10_perc
super_temp$LRL_in_20_perc <- super_temp$length * super_temp$LR_in_20_perc
super_temp$LRL_in_30_perc <- super_temp$length * super_temp$LR_in_30_perc
super_temp$LRL_in_40_perc <- super_temp$length * super_temp$LR_in_40_perc
super_temp$LRL_in_50_perc <- super_temp$length * super_temp$LR_in_50_perc
super_temp$LRL_in_60_perc <- super_temp$length * super_temp$LR_in_60_perc
super_temp$LRL_in_70_perc <- super_temp$length * super_temp$LR_in_70_perc
super_temp$LRL_in_80_perc <- super_temp$length * super_temp$LR_in_80_perc
super_temp$LRL_in_90_perc <- super_temp$length * super_temp$LR_in_90_perc
super_temp$LRL_in_100_perc <- super_temp$length * super_temp$LR_in_100_perc
super_temp$LRL_in_25_perc_4 <- super_temp$length * super_temp$LR_in_25_perc_4
super_temp$LRL_in_50_perc_4 <- super_temp$length * super_temp$LR_in_50_perc_4
super_temp$LRL_in_75_perc_4 <- super_temp$length * super_temp$LR_in_75_perc_4
super_temp$LRL_in_100_perc_4 <- super_temp$length * super_temp$LR_in_100_perc_4
only_MR$LR_no_10_100[i] <- sum(super_temp$LR_in_10_perc)
only_MR$LR_no_20_100[i] <- sum(super_temp$LR_in_20_perc)
only_MR$LR_no_30_100[i] <- sum(super_temp$LR_in_30_perc)
only_MR$LR_no_40_100[i] <- sum(super_temp$LR_in_40_perc)
only_MR$LR_no_50_100[i] <- sum(super_temp$LR_in_50_perc)
only_MR$LR_no_60_100[i] <- sum(super_temp$LR_in_60_perc)
only_MR$LR_no_70_100[i] <- sum(super_temp$LR_in_70_perc)
only_MR$LR_no_80_100[i] <- sum(super_temp$LR_in_80_perc)
only_MR$LR_no_90_100[i] <- sum(super_temp$LR_in_90_perc)
only_MR$LR_no_100_100[i] <- sum(super_temp$LR_in_100_perc)
only_MR$LR_no_25_4[i] <- sum(super_temp$LR_in_25_perc_4)
only_MR$LR_no_50_4[i] <- sum(super_temp$LR_in_50_perc_4)
only_MR$LR_no_75_4[i] <- sum(super_temp$LR_in_75_perc_4)
only_MR$LR_no_100_4[i] <- sum(super_temp$LR_in_100_perc_4)
only_MR$LRL_10_100[i] <- sum(super_temp$LRL_in_10_perc)
only_MR$LRL_20_100[i] <- sum(super_temp$LRL_in_20_perc)
only_MR$LRL_30_100[i] <- sum(super_temp$LRL_in_30_perc)
only_MR$LRL_40_100[i] <- sum(super_temp$LRL_in_40_perc)
only_MR$LRL_50_100[i] <- sum(super_temp$LRL_in_50_perc)
only_MR$LRL_60_100[i] <- sum(super_temp$LRL_in_60_perc)
only_MR$LRL_70_100[i] <- sum(super_temp$LRL_in_70_perc)
only_MR$LRL_80_100[i] <- sum(super_temp$LRL_in_80_perc)
only_MR$LRL_90_100[i] <- sum(super_temp$LRL_in_90_perc)
only_MR$LRL_100_100[i] <- sum(super_temp$LRL_in_100_perc)
only_MR$LRL_25_4[i] <- sum(super_temp$LRL_in_25_perc_4)
only_MR$LRL_50_4[i] <- sum(super_temp$LRL_in_50_perc_4)
only_MR$LRL_75_4[i] <- sum(super_temp$LRL_in_75_perc_4)
only_MR$LRL_100_4[i] <- sum(super_temp$LRL_in_100_perc_4)
super_temp$momentum <- (super_temp$insertion_position * super_temp$length)
all_momentum <- sum(super_temp$momentum)
all_length <- sum(super_temp$length)
only_MR$CoG[i] <- (all_momentum / all_length)
}
let’s have a look if all looks good:
head(only_MR)
then- we need to fuse all the main roots that dont own LR
Make sure that the data structure is the same as for the only_MR file
head(no_LR_MR)
no_LR_MR <- no_LR_MR[,c(1:4)]
add all the columns that are missing
no_LR_MR$Apical <- 0
no_LR_MR$Branched <- 0
no_LR_MR$Basal <- 0
no_LR_MR$Apical_perc <- 0
no_LR_MR$Branched_perc <- 0
no_LR_MR$Basal_perc <- 0
no_LR_MR$LR_no_10_100 <- 0
no_LR_MR$LR_no_20_100 <- 0
no_LR_MR$LR_no_30_100 <- 0
no_LR_MR$LR_no_40_100 <- 0
no_LR_MR$LR_no_50_100 <- 0
no_LR_MR$LR_no_60_100 <- 0
no_LR_MR$LR_no_70_100 <- 0
no_LR_MR$LR_no_80_100 <- 0
no_LR_MR$LR_no_90_100 <- 0
no_LR_MR$LR_no_100_100 <- 0
no_LR_MR$LR_no_25_4 <- 0
no_LR_MR$LR_no_50_4 <- 0
no_LR_MR$LR_no_75_4 <- 0
no_LR_MR$LR_no_100_4 <- 0
no_LR_MR$LRL_10_100 <- 0
no_LR_MR$LRL_20_100 <- 0
no_LR_MR$LRL_30_100 <- 0
no_LR_MR$LRL_40_100 <- 0
no_LR_MR$LRL_50_100 <- 0
no_LR_MR$LRL_60_100 <- 0
no_LR_MR$LRL_70_100 <- 0
no_LR_MR$LRL_80_100 <- 0
no_LR_MR$LRL_90_100 <- 0
no_LR_MR$LRL_100_100 <- 0
no_LR_MR$LRL_25_4 <- 0
no_LR_MR$LRL_50_4 <- 0
no_LR_MR$LRL_75_4 <- 0
no_LR_MR$LRL_100_4 <- 0
no_LR_MR$CoG <- 0
finally - let’s fuse the two files together:
all_MR <- rbind(only_MR, no_LR_MR)
head(all_MR)
tail(all_MR)
All the root ID and genotype information is linked in the folder AllMarks_DataInSegment/*_010_DataSegments.csv files
let-s import them first
coding_1 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G1_010_DataSegments.csv")
coding_1$experiment <- "G1"
coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G2_010_DataSegments.csv")
coding_2$experiment <- "G2"
all_coding <- rbind(coding_1, coding_2)
coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G3_010_DataSegments.csv")
coding_2$experiment <- "G3"
all_coding <- rbind(all_coding, coding_2)
coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G4_010_DataSegments.csv")
coding_2$experiment <- "G4"
all_coding <- rbind(all_coding, coding_2)
coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G5_010_DataSegments.csv")
coding_2$experiment <- "G5"
all_coding <- rbind(all_coding, coding_2)
coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G6_010_DataSegments.csv")
coding_2$experiment <- "G6"
all_coding <- rbind(all_coding, coding_2)
coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G7_010_DataSegments.csv")
coding_2$experiment <- "G7"
all_coding <- rbind(all_coding, coding_2)
coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G8_010_DataSegments.csv")
coding_2$experiment <- "G8"
all_coding <- rbind(all_coding, coding_2)
Then - let’s have a look at the files:
head(all_coding)
we basically need image name, root, Genotype and Treatment collumns to be able to link it to our existing data in MR_only
colnames(all_coding)
## [1] "X" "image" "source"
## [4] "root" "root_name" "mark_type"
## [7] "position_from_base" "diameter" "angle"
## [10] "x" "y" "root_order"
## [13] "root_ontology" "value" "Date"
## [16] "Day" "Plate" "ontology"
## [19] "Root" "Genotype" "Treatment"
## [22] "experiment"
all_coding2 <- all_coding[,c(2,4,20:22)]
head(all_coding2)
all_coding3 <- unique(all_coding2)
all_coding3
OK - now let’s match the all_coding3 file with all_MR file into one:
uncoded_total <- merge(all_coding3, all_MR, by=c("image", "root", "experiment"))
head(uncoded_total)
Let’s check if there is anything missing
dim(all_coding3)
## [1] 14736 5
dim(all_MR)
## [1] 1360 39
dim(uncoded_total)
## [1] 1360 41
Now - lets uncode the genotypes
geno_coding <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Data_Safarina/GlobalRootData/20191031_geno_coding.csv")
geno_coding$genotypes <- gsub(" ", "", geno_coding$genotypes, fixed = TRUE)
head(geno_coding)
and fuse the genotype information into the entire table:
colnames(uncoded_total)[4] <- "genotypes"
final_decoded <- merge(geno_coding, uncoded_total, by="genotypes", all=T)
head(final_decoded)
write.csv(final_decoded, "LR_distribution_final_set.csv", row.names = F)
let’s visualize the population wide trend:
AP_histo <- gghistogram(final_decoded, x = "Apical",
add = "mean", rug = TRUE,
color = "Treatment", fill = "Treatment",
palette = c("#00AFBB", "#E7B800")) + xlab("Apical Zone length (cm)")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
AP_histo
BZ_histo <- gghistogram(final_decoded, x = "Branched",
add = "mean", rug = TRUE,
color = "Treatment", fill = "Treatment",
palette = c("#00AFBB", "#E7B800")) + xlab("Branched Zone length (cm)")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
BZ_histo
BS_histo <- gghistogram(final_decoded, x = "Basal",
add = "mean", rug = TRUE,
color = "Treatment", fill = "Treatment",
palette = c("#00AFBB", "#E7B800")) + xlab("Basal Zone length (cm)")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
BS_histo
now - let’s have a look at how these trends look when we calculate all the zones as % of the MR:
AP_perc_histo <- gghistogram(final_decoded, x = "Apical_perc",
add = "mean", rug = TRUE,
color = "Treatment", fill = "Treatment",
palette = c("#00AFBB", "#E7B800")) + xlab("Apical Zone length (fraction of MRL)")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
AP_perc_histo
BZ_perc_histo <- gghistogram(final_decoded, x = "Branched_perc",
add = "mean", rug = TRUE,
color = "Treatment", fill = "Treatment",
palette = c("#00AFBB", "#E7B800")) + xlab("Branched Zone length (fraction of MRL)")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
BZ_perc_histo
BS_perc_histo <- gghistogram(final_decoded, x = "Basal_perc",
add = "mean", rug = TRUE,
color = "Treatment", fill = "Treatment",
palette = c("#00AFBB", "#E7B800")) + xlab("Basal Zone length (fraction of MRL)")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
BS_perc_histo
Let’s have a look at Center of Gravity
CoG_histo <- gghistogram(final_decoded, x = "CoG",
add = "mean", rug = TRUE,
color = "Treatment", fill = "Treatment",
palette = c("#00AFBB", "#E7B800")) + xlab("Center of Gravity (position on MR in cm)")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
CoG_histo
how about if we look at CoG as % of MR
final_decoded$CoG_perc <- final_decoded$CoG/final_decoded$length
CoG_perc_histo <- gghistogram(final_decoded, x = "CoG_perc",
add = "mean", rug = TRUE,
color = "Treatment", fill = "Treatment",
palette = c("#00AFBB", "#E7B800")) + xlab("Center of Gravity (fraction of MR length)")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
CoG_perc_histo
## Warning: Removed 2 rows containing non-finite values (stat_bin).
Cool - all of the above graphs look pretty cool - so let’s combine them into two separatee figures:
pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.18_histo_zones_cm.pdf", width=8, height=6)
plot_grid(AP_histo, BZ_histo, BS_histo, CoG_histo,
ncol = 2, align = "hv", labels=c("AUTO"),
label_size = 14)
dev.off()
## quartz_off_screen
## 2
pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.19_histo_zones_perc.pdf", width=8, height=6)
plot_grid(AP_perc_histo, BZ_perc_histo, BS_perc_histo, CoG_perc_histo,
ncol = 2, align = "hv", labels=c("AUTO"),
label_size = 14)
## Warning: Removed 2 rows containing non-finite values (stat_bin).
dev.off()
## quartz_off_screen
## 2
Let’s reshape the data and have a look at the correlations between accessions:
Then - how do we visualize LR distribution? Maybe - let’s try to melt the dataset containing only the LR distribution - let’s first try with MR / 4 distribution of LR #:
colnames(final_decoded)
## [1] "genotypes" "Identifier" "Accession" "image"
## [5] "root" "experiment" "Treatment" "length"
## [9] "Apical" "Branched" "Basal" "Apical_perc"
## [13] "Branched_perc" "Basal_perc" "LR_no_10_100" "LR_no_20_100"
## [17] "LR_no_30_100" "LR_no_40_100" "LR_no_50_100" "LR_no_60_100"
## [21] "LR_no_70_100" "LR_no_80_100" "LR_no_90_100" "LR_no_100_100"
## [25] "LR_no_25_4" "LR_no_50_4" "LR_no_75_4" "LR_no_100_4"
## [29] "LRL_10_100" "LRL_20_100" "LRL_30_100" "LRL_40_100"
## [33] "LRL_50_100" "LRL_60_100" "LRL_70_100" "LRL_80_100"
## [37] "LRL_90_100" "LRL_100_100" "LRL_25_4" "LRL_50_4"
## [41] "LRL_75_4" "LRL_100_4" "CoG" "CoG_perc"
new_temp <- final_decoded[,c(2,6:7,25:28)]
head(new_temp)
colnames(new_temp)[4] <- "0-25%"
colnames(new_temp)[5] <- "25-50%"
colnames(new_temp)[6] <- "50-75%"
colnames(new_temp)[7] <- "75-100%"
mtemp <- melt(new_temp, id=c("Identifier", "experiment", "Treatment"))
head(mtemp)
LRno_25_perc_plot <- ggerrorplot(mtemp, y = "value", x = "Treatment", fill="Treatment", color="Treatment",
facet.by = "variable", ncol=4, palette = c("#00AFBB", "#E7B800"),
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="", ylab="# LR per MR", title = "fraction of MR")
LRno_25_perc_plot <- LRno_25_perc_plot + rremove("legend") + stat_compare_means(method="t.test", ref.group = "0mM",
label = "p.signif", hide.ns = T)
LRno_25_perc_plot
colnames(final_decoded)
## [1] "genotypes" "Identifier" "Accession" "image"
## [5] "root" "experiment" "Treatment" "length"
## [9] "Apical" "Branched" "Basal" "Apical_perc"
## [13] "Branched_perc" "Basal_perc" "LR_no_10_100" "LR_no_20_100"
## [17] "LR_no_30_100" "LR_no_40_100" "LR_no_50_100" "LR_no_60_100"
## [21] "LR_no_70_100" "LR_no_80_100" "LR_no_90_100" "LR_no_100_100"
## [25] "LR_no_25_4" "LR_no_50_4" "LR_no_75_4" "LR_no_100_4"
## [29] "LRL_10_100" "LRL_20_100" "LRL_30_100" "LRL_40_100"
## [33] "LRL_50_100" "LRL_60_100" "LRL_70_100" "LRL_80_100"
## [37] "LRL_90_100" "LRL_100_100" "LRL_25_4" "LRL_50_4"
## [41] "LRL_75_4" "LRL_100_4" "CoG" "CoG_perc"
new_temp <- final_decoded[,c(2,6:7,39:42)]
head(new_temp)
colnames(new_temp)[4] <- "0-25%"
colnames(new_temp)[5] <- "25-50%"
colnames(new_temp)[6] <- "50-75%"
colnames(new_temp)[7] <- "75-100%"
mtemp <- melt(new_temp, id=c("Identifier", "experiment", "Treatment"))
head(mtemp)
LRL_25_perc_plot <- ggerrorplot(mtemp, y = "value", x = "Treatment", fill="Treatment", color="Treatment",
facet.by = "variable", ncol=4, palette = c("#00AFBB", "#E7B800"),
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="", ylab="Lateral Root Length (cm)", title = "fraction of MR")
LRL_25_perc_plot <- LRL_25_perc_plot + rremove("legend") + stat_compare_means(method="t.test", ref.group = "0mM",
label = "p.signif", hide.ns = T)
LRL_25_perc_plot
OK - these graphs look pretty cool and make sense - lets have a look at how it looks in 10% fractions of MR:
colnames(final_decoded)
## [1] "genotypes" "Identifier" "Accession" "image"
## [5] "root" "experiment" "Treatment" "length"
## [9] "Apical" "Branched" "Basal" "Apical_perc"
## [13] "Branched_perc" "Basal_perc" "LR_no_10_100" "LR_no_20_100"
## [17] "LR_no_30_100" "LR_no_40_100" "LR_no_50_100" "LR_no_60_100"
## [21] "LR_no_70_100" "LR_no_80_100" "LR_no_90_100" "LR_no_100_100"
## [25] "LR_no_25_4" "LR_no_50_4" "LR_no_75_4" "LR_no_100_4"
## [29] "LRL_10_100" "LRL_20_100" "LRL_30_100" "LRL_40_100"
## [33] "LRL_50_100" "LRL_60_100" "LRL_70_100" "LRL_80_100"
## [37] "LRL_90_100" "LRL_100_100" "LRL_25_4" "LRL_50_4"
## [41] "LRL_75_4" "LRL_100_4" "CoG" "CoG_perc"
new_temp <- final_decoded[,c(2,6:7,15:24)]
head(new_temp)
colnames(new_temp)[4] <- "0-10%"
colnames(new_temp)[5] <- "10-20%"
colnames(new_temp)[6] <- "20-30%"
colnames(new_temp)[7] <- "30-40%"
colnames(new_temp)[8] <- "40-50%"
colnames(new_temp)[9] <- "50-60%"
colnames(new_temp)[10] <- "60-70%"
colnames(new_temp)[11] <- "70-80%"
colnames(new_temp)[12] <- "80-90%"
colnames(new_temp)[13] <- "90-100%"
mtemp <- melt(new_temp, id=c("Identifier", "experiment", "Treatment"))
head(mtemp)
LRno_10_perc_plot <- ggerrorplot(mtemp, y = "value", x = "Treatment", fill="Treatment", color="Treatment",
facet.by = "variable", ncol=10, palette = c("#00AFBB", "#E7B800"),
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="", ylab="# LR per MR", title = "fraction of MR")
LRno_10_perc_plot <- LRno_10_perc_plot + rremove("legend") + stat_compare_means(method="t.test", ref.group = "0mM",
label = "p.signif", hide.ns = T)+ rotate_x_text(90)
LRno_10_perc_plot
colnames(final_decoded)
## [1] "genotypes" "Identifier" "Accession" "image"
## [5] "root" "experiment" "Treatment" "length"
## [9] "Apical" "Branched" "Basal" "Apical_perc"
## [13] "Branched_perc" "Basal_perc" "LR_no_10_100" "LR_no_20_100"
## [17] "LR_no_30_100" "LR_no_40_100" "LR_no_50_100" "LR_no_60_100"
## [21] "LR_no_70_100" "LR_no_80_100" "LR_no_90_100" "LR_no_100_100"
## [25] "LR_no_25_4" "LR_no_50_4" "LR_no_75_4" "LR_no_100_4"
## [29] "LRL_10_100" "LRL_20_100" "LRL_30_100" "LRL_40_100"
## [33] "LRL_50_100" "LRL_60_100" "LRL_70_100" "LRL_80_100"
## [37] "LRL_90_100" "LRL_100_100" "LRL_25_4" "LRL_50_4"
## [41] "LRL_75_4" "LRL_100_4" "CoG" "CoG_perc"
new_temp <- final_decoded[,c(2,6:7,29:38)]
head(new_temp)
colnames(new_temp)[4] <- "0-10%"
colnames(new_temp)[5] <- "10-20%"
colnames(new_temp)[6] <- "20-30%"
colnames(new_temp)[7] <- "30-40%"
colnames(new_temp)[8] <- "40-50%"
colnames(new_temp)[9] <- "50-60%"
colnames(new_temp)[10] <- "60-70%"
colnames(new_temp)[11] <- "70-80%"
colnames(new_temp)[12] <- "80-90%"
colnames(new_temp)[13] <- "90-100%"
mtemp <- melt(new_temp, id=c("Identifier", "experiment", "Treatment"))
head(mtemp)
LRL_10_perc_plot <- ggerrorplot(mtemp, y = "value", x = "Treatment", fill="Treatment", color="Treatment",
facet.by = "variable", ncol=10, palette = c("#00AFBB", "#E7B800"),
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="", ylab="Lateral Root Length (cm)", title = "fraction of MR")
LRL_10_perc_plot <- LRL_10_perc_plot + rremove("legend") + stat_compare_means(method="t.test", ref.group = "0mM",
label = "p.signif", hide.ns = T) + rotate_x_text(90)
LRL_10_perc_plot
Cool - again - above graphs look pretty cool - so let’s combine them into one figure:
pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.20_LR_distribution_25perc.pdf", width=8, height=8)
plot_grid(LRno_25_perc_plot, LRL_25_perc_plot,
ncol = 1, align = "hv", labels=c("AUTO"),
label_size = 14)
dev.off()
## quartz_off_screen
## 2
pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.21_LR_distribution_10perc.pdf", width=10, height=8)
plot_grid(LRno_10_perc_plot, LRL_10_perc_plot,
ncol = 1, align = "hv", labels=c("AUTO"),
label_size = 14)
dev.off()
## quartz_off_screen
## 2
grand.means <- aggregate(CoG ~ Treatment, data = final_decoded, FUN = mean)
grand.means
colnames(final_decoded)
## [1] "genotypes" "Identifier" "Accession" "image"
## [5] "root" "experiment" "Treatment" "length"
## [9] "Apical" "Branched" "Basal" "Apical_perc"
## [13] "Branched_perc" "Basal_perc" "LR_no_10_100" "LR_no_20_100"
## [17] "LR_no_30_100" "LR_no_40_100" "LR_no_50_100" "LR_no_60_100"
## [21] "LR_no_70_100" "LR_no_80_100" "LR_no_90_100" "LR_no_100_100"
## [25] "LR_no_25_4" "LR_no_50_4" "LR_no_75_4" "LR_no_100_4"
## [29] "LRL_10_100" "LRL_20_100" "LRL_30_100" "LRL_40_100"
## [33] "LRL_50_100" "LRL_60_100" "LRL_70_100" "LRL_80_100"
## [37] "LRL_90_100" "LRL_100_100" "LRL_25_4" "LRL_50_4"
## [41] "LRL_75_4" "LRL_100_4" "CoG" "CoG_perc"
CoG_p_exp <- ggerrorplot(final_decoded, y = "CoG", x = "experiment", fill="experiment", color="experiment",
facet.by = "Treatment", ncol=1,
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="Experiment", ylab="Center of Gravity (MR cm)")
CoG_p_exp <- CoG_p_exp + geom_hline(
data = grand.means, aes(yintercept = CoG),
linetype = 2,
group = "Treatment")
CoG_p_exp <- CoG_p_exp + rremove("legend") + stat_compare_means(method="t.test", ref.group = ".all.",
label = "p.signif", hide.ns = T)
CoG_p_exp
Since we have already done this for MR traits - I am not really going into more depth in here - I think the differences we see are mainly due to composition of accessions rather than some random effect
We have to first subset things per experiment:
final1 <- subset(final_decoded, final_decoded$experiment == "G1")
final2 <- subset(final_decoded, final_decoded$experiment == "G2")
final3 <- subset(final_decoded, final_decoded$experiment == "G3")
final4 <- subset(final_decoded, final_decoded$experiment == "G4")
final5 <- subset(final_decoded, final_decoded$experiment == "G5")
final6 <- subset(final_decoded, final_decoded$experiment == "G6")
final7 <- subset(final_decoded, final_decoded$experiment == "G7")
final8 <- subset(final_decoded, final_decoded$experiment == "G8")
grand.means <- aggregate(CoG ~ Treatment, data = final1, FUN = mean)
CoG_rep_exp1 <- ggerrorplot(final1, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
facet.by = c("Treatment"), ncol=1,
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="", ylab="Center of Gravity (cm of MR)")
CoG_rep_exp1 <- CoG_rep_exp1 + stat_compare_means(method="t.test",
ref.group = ".all.", label = "p.signif",
hide.ns = T) + rremove("legend")
CoG_rep_exp1 <- CoG_rep_exp1 + geom_hline(data = grand.means, aes(yintercept = CoG),
linetype = 2,
group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp1
grand.means <- aggregate(CoG ~ Treatment, data = final2, FUN = mean)
CoG_rep_exp2 <- ggerrorplot(final2, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
facet.by = c("Treatment"), ncol=1,
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="", ylab="Center of Gravity (cm of MR)")
CoG_rep_exp2 <- CoG_rep_exp2 + stat_compare_means(method="t.test",
ref.group = ".all.", label = "p.signif",
hide.ns = T) + rremove("legend")
CoG_rep_exp2 <- CoG_rep_exp2 + geom_hline(data = grand.means, aes(yintercept = CoG),
linetype = 2,
group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp2
grand.means <- aggregate(CoG ~ Treatment, data = final3, FUN = mean)
CoG_rep_exp3 <- ggerrorplot(final3, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
facet.by = c("Treatment"), ncol=1,
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="", ylab="Center of Gravity (cm of MR)")
CoG_rep_exp3 <- CoG_rep_exp3 + stat_compare_means(method="t.test",
ref.group = ".all.", label = "p.signif",
hide.ns = T) + rremove("legend")
CoG_rep_exp3 <- CoG_rep_exp3 + geom_hline(data = grand.means, aes(yintercept = CoG),
linetype = 2,
group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp3
## Warning: Computation failed in `stat_compare_means()`:
## not enough 'x' observations
## Warning: Removed 1 rows containing missing values (geom_segment).
grand.means <- aggregate(CoG ~ Treatment, data = final4, FUN = mean)
CoG_rep_exp4 <- ggerrorplot(final4, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
facet.by = c("Treatment"), ncol=1,
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="", ylab="Center of Gravity (cm of MR)")
CoG_rep_exp4 <- CoG_rep_exp4 + stat_compare_means(method="t.test",
ref.group = ".all.", label = "p.signif",
hide.ns = T) + rremove("legend")
CoG_rep_exp4 <- CoG_rep_exp4 + geom_hline(data = grand.means, aes(yintercept = CoG),
linetype = 2,
group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp4
grand.means <- aggregate(CoG ~ Treatment, data = final5, FUN = mean)
CoG_rep_exp5 <- ggerrorplot(final5, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
facet.by = c("Treatment"), ncol=1,
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="", ylab="Center of Gravity (cm of MR)")
CoG_rep_exp5 <- CoG_rep_exp5 + stat_compare_means(method="t.test",
ref.group = ".all.", label = "p.signif",
hide.ns = T) + rremove("legend")
CoG_rep_exp5 <- CoG_rep_exp5 + geom_hline(data = grand.means, aes(yintercept = CoG),
linetype = 2,
group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp5
grand.means <- aggregate(CoG ~ Treatment, data = final6, FUN = mean)
CoG_rep_exp6 <- ggerrorplot(final6, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
facet.by = c("Treatment"), ncol=1,
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="", ylab="Center of Gravity (cm of MR)")
CoG_rep_exp6 <- CoG_rep_exp6 + stat_compare_means(method="t.test",
ref.group = ".all.", label = "p.signif",
hide.ns = T) + rremove("legend")
CoG_rep_exp6 <- CoG_rep_exp6 + geom_hline(data = grand.means, aes(yintercept = CoG),
linetype = 2,
group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp6
grand.means <- aggregate(CoG ~ Treatment, data = final7, FUN = mean)
CoG_rep_exp7 <- ggerrorplot(final7, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
facet.by = c("Treatment"), ncol=1,
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="", ylab="Center of Gravity (cm of MR)")
CoG_rep_exp7 <- CoG_rep_exp7 + stat_compare_means(method="t.test",
ref.group = ".all.", label = "p.signif",
hide.ns = T) + rremove("legend")
CoG_rep_exp7 <- CoG_rep_exp7 + geom_hline(data = grand.means, aes(yintercept = CoG),
linetype = 2,
group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp7
grand.means <- aggregate(CoG ~ Treatment, data = final8, FUN = mean)
CoG_rep_exp8 <- ggerrorplot(final8, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
facet.by = c("Treatment"), ncol=1,
desc_stat = "mean_sd", add = "jitter",
add.params = list(color = "darkgray"),
xlab="", ylab="Center of Gravity (cm of MR)")
CoG_rep_exp8 <- CoG_rep_exp8 + stat_compare_means(method="t.test",
ref.group = ".all.", label = "p.signif",
hide.ns = T) + rremove("legend")
CoG_rep_exp8 <- CoG_rep_exp8 + geom_hline(data = grand.means, aes(yintercept = CoG),
linetype = 2,
group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp8
## Warning: Computation failed in `stat_compare_means()`:
## not enough 'x' observations
## Warning: Removed 1 rows containing missing values (geom_segment).
pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.22_All_acc_check_CoG_per_exp.pdf", width=16, height=23)
plot_grid(CoG_rep_exp1, CoG_rep_exp2, CoG_rep_exp3, CoG_rep_exp4, CoG_rep_exp5, CoG_rep_exp6,
CoG_rep_exp7, CoG_rep_exp8,
ncol = 2, align = "hv", labels=c("AUTO"),
label_size = 14)
## Warning: Computation failed in `stat_compare_means()`:
## not enough 'x' observations
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Computation failed in `stat_compare_means()`:
## not enough 'x' observations
## Warning: Removed 1 rows containing missing values (geom_segment).
dev.off()
## quartz_off_screen
## 2
I also don’t thing that visualizing the distribution of LR in these kind of graphs make much sense. Thus…. Let’s summarize the data per accession and then have a look at correlation between the traits and between conditions
Just like in previous notebook - I would like to remove one genotype called “T102964” it is only screened in exp. GA6 and consist only of one plant scored at salt while no plants were scored at control conditions
final_decoded$Identifier <- as.character(final_decoded$Identifier)
dim(final_decoded)
## [1] 1360 44
bye <- c("T102964 ")
final_decoded <- subset(final_decoded, !(final_decoded$Identifier %in% bye))
dim(final_decoded)
## [1] 1355 44
colnames(final_decoded)
## [1] "genotypes" "Identifier" "Accession" "image"
## [5] "root" "experiment" "Treatment" "length"
## [9] "Apical" "Branched" "Basal" "Apical_perc"
## [13] "Branched_perc" "Basal_perc" "LR_no_10_100" "LR_no_20_100"
## [17] "LR_no_30_100" "LR_no_40_100" "LR_no_50_100" "LR_no_60_100"
## [21] "LR_no_70_100" "LR_no_80_100" "LR_no_90_100" "LR_no_100_100"
## [25] "LR_no_25_4" "LR_no_50_4" "LR_no_75_4" "LR_no_100_4"
## [29] "LRL_10_100" "LRL_20_100" "LRL_30_100" "LRL_40_100"
## [33] "LRL_50_100" "LRL_60_100" "LRL_70_100" "LRL_80_100"
## [37] "LRL_90_100" "LRL_100_100" "LRL_25_4" "LRL_50_4"
## [41] "LRL_75_4" "LRL_100_4" "CoG" "CoG_perc"
unique(final_decoded$Identifier)
## [1] "CR001 " "CR077 " "CR078 "
## [4] "CR079 " "CR081 " "CR082 "
## [7] "CR083 " "CR084 " "CR085 "
## [10] "CR086 " "CR088 " "CR028 "
## [13] "LA0147 " "CR049 " "CR056 "
## [16] "CR058 " "CR064 " "CR073 "
## [19] "CR075 " "CR076 " "CR089 "
## [22] "CR103 " "CR104 " "CR105 "
## [25] "CR106 " "CR107 " "CR108 "
## [28] "CR114 " "CR110 " "CR115 "
## [31] "CR117 " "CR092 " "CR288 "
## [34] "CR093 " "CR095 " "CR097 "
## [37] "CR098 " "CR099 " "CR100 "
## [40] "CR101 " "CR118 " "CR144 "
## [43] "CR145 " "CR147 " "CR148 "
## [46] "CR149 " "CR152 " "CR153 "
## [49] "CR154 " "CR155 " "CR156 "
## [52] "CR120 " "CR122 " "CR123 "
## [55] "CR124 " "CR125 " "CR130 "
## [58] "CR141 " "CR142 " "CR157 "
## [61] "CR205 " "CR236 " "CR238 "
## [64] "CR240 " "CR241 " "CR242 "
## [67] "CR246 " "CR248 " "CR250 "
## [70] "CR258 " "CR158 " "CR161 "
## [73] "CR166 " "CR179 " "CR181 "
## [76] "CR199 " "CR202 " "CR203 "
## [79] "CR263 " "CR287 " "CR289 "
## [82] "CR290 " "CR291 " "CR292 "
## [85] "CR293 " "T102068 " "T102950 "
## [88] "T102957 " "CR264 " "CR271 "
## [91] "CR274 " "CR275 " "CR276 "
## [94] "CR282 " "CR283 " "CR286 "
## [97] "T102958 " "T102968 " "T102969 "
## [100] "T102970 " "T102971 " "T102972 "
## [103] "T102973 " "T102974 " "T102975 "
## [106] "T102977 " "T102978 " "T102959 "
## [109] "T102961 " "T102962 " "T102963 "
## [112] "T102965 " "T102966 " "T102967 "
## [115] "T102979 " "BGV5895 " "BGV6071 "
## [118] "BGV6767 " "BGV6769 " "BGV6827 "
## [121] "BGV6931 " "BGV7942 " "BGV7979 "
## [124] "BGV8036 " "BGV8096 " "T102980 "
## [127] "T102982 " "T102983 " "T102984 "
## [130] "BGV12627 " "BGV13157 " "BGV13161 "
## [133] "BGV13945 " "BGV8109 " "BGV8219 "
## [136] "BGV8221 " "BGV8228 " "BGV8230 "
## [139] "Ferum " "Stupicke Polni Rane "
now - lets calculate mean trait value for each accession for all of our traits
colnames(final_decoded)
## [1] "genotypes" "Identifier" "Accession" "image"
## [5] "root" "experiment" "Treatment" "length"
## [9] "Apical" "Branched" "Basal" "Apical_perc"
## [13] "Branched_perc" "Basal_perc" "LR_no_10_100" "LR_no_20_100"
## [17] "LR_no_30_100" "LR_no_40_100" "LR_no_50_100" "LR_no_60_100"
## [21] "LR_no_70_100" "LR_no_80_100" "LR_no_90_100" "LR_no_100_100"
## [25] "LR_no_25_4" "LR_no_50_4" "LR_no_75_4" "LR_no_100_4"
## [29] "LRL_10_100" "LRL_20_100" "LRL_30_100" "LRL_40_100"
## [33] "LRL_50_100" "LRL_60_100" "LRL_70_100" "LRL_80_100"
## [37] "LRL_90_100" "LRL_100_100" "LRL_25_4" "LRL_50_4"
## [41] "LRL_75_4" "LRL_100_4" "CoG" "CoG_perc"
geno.means <- summaryBy(Apical + Branched + Basal + Apical_perc + Branched_perc + Basal_perc + LR_no_10_100 + LR_no_20_100 + LR_no_30_100 + LR_no_40_100 + LR_no_50_100 + LR_no_60_100 + LR_no_70_100 + LR_no_80_100 + LR_no_90_100 + LR_no_100_100 + LR_no_25_4 + LR_no_50_4 + LR_no_75_4 + LR_no_100_4 + LRL_10_100 + LRL_20_100 + LRL_30_100 + LRL_40_100 + LRL_50_100 + LRL_60_100 + LRL_70_100 + LRL_80_100 + LRL_90_100 + LRL_100_100 + LRL_25_4 + LRL_50_4 + LRL_75_4 + LRL_100_4 + CoG + CoG_perc ~ Treatment + Identifier, data = final_decoded)
geno.means.c <- subset(geno.means, geno.means$Treatment == "0mM")
head(geno.means.c)
geno.means.s <- subset(geno.means, geno.means$Treatment == "125mM")
head(geno.means.s)
Let’s use only the informative collumns and re-name them into “Control” > c and “Salt” > s:
colnames(geno.means.c) <- gsub(".mean", ".C", colnames(geno.means.c))
colnames(geno.means.s) <- gsub(".mean", ".S", colnames(geno.means.s))
colnames(geno.means.c)
## [1] "Treatment" "Identifier" "Apical.C" "Branched.C"
## [5] "Basal.C" "Apical_perc.C" "Branched_perc.C" "Basal_perc.C"
## [9] "LR_no_10_100.C" "LR_no_20_100.C" "LR_no_30_100.C" "LR_no_40_100.C"
## [13] "LR_no_50_100.C" "LR_no_60_100.C" "LR_no_70_100.C" "LR_no_80_100.C"
## [17] "LR_no_90_100.C" "LR_no_100_100.C" "LR_no_25_4.C" "LR_no_50_4.C"
## [21] "LR_no_75_4.C" "LR_no_100_4.C" "LRL_10_100.C" "LRL_20_100.C"
## [25] "LRL_30_100.C" "LRL_40_100.C" "LRL_50_100.C" "LRL_60_100.C"
## [29] "LRL_70_100.C" "LRL_80_100.C" "LRL_90_100.C" "LRL_100_100.C"
## [33] "LRL_25_4.C" "LRL_50_4.C" "LRL_75_4.C" "LRL_100_4.C"
## [37] "CoG.C" "CoG_perc.C"
colnames(geno.means.s)
## [1] "Treatment" "Identifier" "Apical.S" "Branched.S"
## [5] "Basal.S" "Apical_perc.S" "Branched_perc.S" "Basal_perc.S"
## [9] "LR_no_10_100.S" "LR_no_20_100.S" "LR_no_30_100.S" "LR_no_40_100.S"
## [13] "LR_no_50_100.S" "LR_no_60_100.S" "LR_no_70_100.S" "LR_no_80_100.S"
## [17] "LR_no_90_100.S" "LR_no_100_100.S" "LR_no_25_4.S" "LR_no_50_4.S"
## [21] "LR_no_75_4.S" "LR_no_100_4.S" "LRL_10_100.S" "LRL_20_100.S"
## [25] "LRL_30_100.S" "LRL_40_100.S" "LRL_50_100.S" "LRL_60_100.S"
## [29] "LRL_70_100.S" "LRL_80_100.S" "LRL_90_100.S" "LRL_100_100.S"
## [33] "LRL_25_4.S" "LRL_50_4.S" "LRL_75_4.S" "LRL_100_4.S"
## [37] "CoG.S" "CoG_perc.S"
geno.means.c <- geno.means.c[,c(2:38)]
geno.means.s <- geno.means.s[,c(2:38)]
Then - we fuse the avg trait values from both conditions together in one file and calculate STI for individual traits of interest:
STI <- merge(geno.means.c, geno.means.s, by="Identifier")
head(STI)
colnames(STI)
## [1] "Identifier" "Apical.C" "Branched.C" "Basal.C"
## [5] "Apical_perc.C" "Branched_perc.C" "Basal_perc.C" "LR_no_10_100.C"
## [9] "LR_no_20_100.C" "LR_no_30_100.C" "LR_no_40_100.C" "LR_no_50_100.C"
## [13] "LR_no_60_100.C" "LR_no_70_100.C" "LR_no_80_100.C" "LR_no_90_100.C"
## [17] "LR_no_100_100.C" "LR_no_25_4.C" "LR_no_50_4.C" "LR_no_75_4.C"
## [21] "LR_no_100_4.C" "LRL_10_100.C" "LRL_20_100.C" "LRL_30_100.C"
## [25] "LRL_40_100.C" "LRL_50_100.C" "LRL_60_100.C" "LRL_70_100.C"
## [29] "LRL_80_100.C" "LRL_90_100.C" "LRL_100_100.C" "LRL_25_4.C"
## [33] "LRL_50_4.C" "LRL_75_4.C" "LRL_100_4.C" "CoG.C"
## [37] "CoG_perc.C" "Apical.S" "Branched.S" "Basal.S"
## [41] "Apical_perc.S" "Branched_perc.S" "Basal_perc.S" "LR_no_10_100.S"
## [45] "LR_no_20_100.S" "LR_no_30_100.S" "LR_no_40_100.S" "LR_no_50_100.S"
## [49] "LR_no_60_100.S" "LR_no_70_100.S" "LR_no_80_100.S" "LR_no_90_100.S"
## [53] "LR_no_100_100.S" "LR_no_25_4.S" "LR_no_50_4.S" "LR_no_75_4.S"
## [57] "LR_no_100_4.S" "LRL_10_100.S" "LRL_20_100.S" "LRL_30_100.S"
## [61] "LRL_40_100.S" "LRL_50_100.S" "LRL_60_100.S" "LRL_70_100.S"
## [65] "LRL_80_100.S" "LRL_90_100.S" "LRL_100_100.S" "LRL_25_4.S"
## [69] "LRL_50_4.S" "LRL_75_4.S" "LRL_100_4.S" "CoG.S"
## [73] "CoG_perc.S"
# OK - let's think which traits make sense to calculate STI?
# Zones
STI$Apical.STI <- STI$Apical.S/STI$Apical.C
STI$Branched.STI <- STI$Branched.S/STI$Branched.C
STI$Basal.STI <- STI$Basal.S/STI$Basal.C
STI$Apical_perc.STI <- STI$Apical_perc.S/STI$Apical_perc.C
STI$Branched_perc.STI <- STI$Branched_perc.S/STI$Branched_perc.C
STI$Basal_perc.STI <- STI$Basal_perc.S/STI$Basal_perc.C
# CoG cm and CoG as percentage
STI$CoG.STI <- STI$CoG.S/STI$CoG.C
STI$CoG_perc.STI <- STI$CoG_perc.S/STI$CoG_perc.C
head(STI)
# Let's have a look how these graphs look like:
# Apical Zone
my_line <- median(STI$Apical.STI)
STI_graph_Apical <- ggplot(STI, aes(x = Identifier, y = Apical.STI))
STI_graph_Apical <- STI_graph_Apical + geom_bar(stat = "identity")
STI_graph_Apical <- STI_graph_Apical + xlab("") + ylab("Fraction of Control (Apical zone length)")
STI_graph_Apical <- STI_graph_Apical + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_Apical <- STI_graph_Apical + theme_classic()
STI_graph_Apical <- STI_graph_Apical + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_Apical
# Branched Zone
my_line <- median(STI$Branched.STI)
STI_graph_Branched <- ggplot(STI, aes(x = Identifier, y = Branched.STI))
STI_graph_Branched <- STI_graph_Branched + geom_bar(stat = "identity")
STI_graph_Branched <- STI_graph_Branched + xlab("") + ylab("Fraction of Control (Branched zone length)")
STI_graph_Branched <- STI_graph_Branched + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_Branched <- STI_graph_Branched + theme_classic()
STI_graph_Branched <- STI_graph_Branched + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_Branched
# Basal Zone
my_line <- median(STI$Basal.STI)
STI_graph_Basal <- ggplot(STI, aes(x = Identifier, y = Basal.STI))
STI_graph_Basal <- STI_graph_Basal + geom_bar(stat = "identity")
STI_graph_Basal <- STI_graph_Basal + xlab("") + ylab("Fraction of Control (Basal zone length)")
STI_graph_Basal <- STI_graph_Basal + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_Basal <- STI_graph_Basal + theme_classic()
STI_graph_Basal <- STI_graph_Basal + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_Basal
# Center of Gravity
my_line <- median(STI$CoG.STI)
STI_graph_CoG <- ggplot(STI, aes(x = Identifier, y = CoG.STI))
STI_graph_CoG <- STI_graph_CoG + geom_bar(stat = "identity")
STI_graph_CoG <- STI_graph_CoG + xlab("") + ylab("Fraction of Control (Center of Gravity)")
STI_graph_CoG <- STI_graph_CoG + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_CoG <- STI_graph_CoG + theme_classic()
STI_graph_CoG <- STI_graph_CoG + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_CoG
I think it would be good to combine this data with the MR data analyzed previously - and see how the changes in zonation and CoG are comparing with the absolute seedling’s root size
MR_data <- read.csv("GWAS_ready.csv")
head(MR_data)
MR_data$class <- ""
for(i in 1:nrow(MR_data)){
if(MR_data$TRS_STI[i] < 0.5109159){
MR_data$class[i] <- "super-bad"}
if(MR_data$TRS_STI[i] > 0.5109159 & MR_data$TRS_STI[i] < 0.6815476){
MR_data$class[i] <- "bad"}
if(MR_data$TRS_STI[i] > 0.6815476 & MR_data$TRS_STI[i] < 0.8521793){
MR_data$class[i] <- "good"}
if(MR_data$TRS_STI[i] > 0.8521793){
MR_data$class[i] <- "kick-ass"}
}
head(MR_data)
MR_data <- MR_data[,c(2:38)]
colnames(MR_data)[1] <- "Identifier"
all <- merge(MR_data, STI)
head(all)
all$class <- factor(all$class, levels=c("super-bad", "bad", "good", "kick-ass"))
all$Identifier = with(all, reorder(Identifier, TRS_STI, median))
Then - let’s re-plot again all the graphs from above:
# Apical Zone
my_line <- median(all$Apical.STI)
STI_graph_Apical <- ggplot(all, aes(x = Identifier, y = Apical.STI, fill = class))
STI_graph_Apical <- STI_graph_Apical + geom_bar(stat = "identity")
STI_graph_Apical <- STI_graph_Apical + xlab("") + ylab("Fraction of Control (Apical zone length)")
STI_graph_Apical <- STI_graph_Apical + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_Apical <- STI_graph_Apical + theme_classic()
STI_graph_Apical <- STI_graph_Apical + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_Apical
# Branched Zone
my_line <- median(all$Branched.STI)
STI_graph_Branched <- ggplot(all, aes(x = Identifier, y = Branched.STI, fill = class))
STI_graph_Branched <- STI_graph_Branched + geom_bar(stat = "identity")
STI_graph_Branched <- STI_graph_Branched + xlab("") + ylab("Fraction of Control (Branched zone length)")
STI_graph_Branched <- STI_graph_Branched + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_Branched <- STI_graph_Branched + theme_classic()
STI_graph_Branched <- STI_graph_Branched + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_Branched
# Basal Zone
my_line <- median(all$Basal.STI)
STI_graph_Basal <- ggplot(all, aes(x = Identifier, y = Basal.STI, fill = class))
STI_graph_Basal <- STI_graph_Basal + geom_bar(stat = "identity")
STI_graph_Basal <- STI_graph_Basal + xlab("") + ylab("Fraction of Control (Basal zone length)")
STI_graph_Basal <- STI_graph_Basal + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_Basal <- STI_graph_Basal + theme_classic()
STI_graph_Basal <- STI_graph_Basal + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_Basal
# Center of Gravity
my_line <- median(all$CoG.STI)
STI_graph_CoG <- ggplot(all, aes(x = Identifier, y = CoG.STI, fill = class))
STI_graph_CoG <- STI_graph_CoG + geom_bar(stat = "identity")
STI_graph_CoG <- STI_graph_CoG + xlab("") + ylab("Fraction of Control (Center of Gravity)")
STI_graph_CoG <- STI_graph_CoG + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_CoG <- STI_graph_CoG + theme_classic()
STI_graph_CoG <- STI_graph_CoG + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_CoG
Let’s pull all these graphs into one figure too:
pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.23_SIT_of_branching.pdf", width=16, height=23)
plot_grid(STI_graph_Apical, STI_graph_Branched, STI_graph_Basal, STI_graph_CoG,
ncol = 2, align = "hv", labels=c("AUTO"),
label_size = 14)
dev.off()
## quartz_off_screen
## 2
Global correlations - making a graph bigger than life ;)
colnames(all)
## [1] "Identifier" "MRL_C" "STR_C"
## [4] "noLR_C" "LRD_C" "LRL_C"
## [7] "aLRL_C" "DIR_C" "BasZ_C"
## [10] "BraZ_C" "ApZ_C" "aLRLpMRL_C"
## [13] "TRS_C" "BasZpMRL_C" "BraZpMRL_C"
## [16] "ApZpMRL_C" "MRL_S" "STR_S"
## [19] "noLR_S" "LRD_S" "LRL_S"
## [22] "aLRL_S" "DIR_S" "BasZ_S"
## [25] "BraZ_S" "ApZ_S" "aLRLpMRL_S"
## [28] "TRS_S" "BasZpMRL_S" "BraZpMRL_S"
## [31] "ApZpMRL_S" "MRL_STI" "LRL_STI"
## [34] "noLR_STI" "aLRL_STI" "TRS_STI"
## [37] "class" "Apical.C" "Branched.C"
## [40] "Basal.C" "Apical_perc.C" "Branched_perc.C"
## [43] "Basal_perc.C" "LR_no_10_100.C" "LR_no_20_100.C"
## [46] "LR_no_30_100.C" "LR_no_40_100.C" "LR_no_50_100.C"
## [49] "LR_no_60_100.C" "LR_no_70_100.C" "LR_no_80_100.C"
## [52] "LR_no_90_100.C" "LR_no_100_100.C" "LR_no_25_4.C"
## [55] "LR_no_50_4.C" "LR_no_75_4.C" "LR_no_100_4.C"
## [58] "LRL_10_100.C" "LRL_20_100.C" "LRL_30_100.C"
## [61] "LRL_40_100.C" "LRL_50_100.C" "LRL_60_100.C"
## [64] "LRL_70_100.C" "LRL_80_100.C" "LRL_90_100.C"
## [67] "LRL_100_100.C" "LRL_25_4.C" "LRL_50_4.C"
## [70] "LRL_75_4.C" "LRL_100_4.C" "CoG.C"
## [73] "CoG_perc.C" "Apical.S" "Branched.S"
## [76] "Basal.S" "Apical_perc.S" "Branched_perc.S"
## [79] "Basal_perc.S" "LR_no_10_100.S" "LR_no_20_100.S"
## [82] "LR_no_30_100.S" "LR_no_40_100.S" "LR_no_50_100.S"
## [85] "LR_no_60_100.S" "LR_no_70_100.S" "LR_no_80_100.S"
## [88] "LR_no_90_100.S" "LR_no_100_100.S" "LR_no_25_4.S"
## [91] "LR_no_50_4.S" "LR_no_75_4.S" "LR_no_100_4.S"
## [94] "LRL_10_100.S" "LRL_20_100.S" "LRL_30_100.S"
## [97] "LRL_40_100.S" "LRL_50_100.S" "LRL_60_100.S"
## [100] "LRL_70_100.S" "LRL_80_100.S" "LRL_90_100.S"
## [103] "LRL_100_100.S" "LRL_25_4.S" "LRL_50_4.S"
## [106] "LRL_75_4.S" "LRL_100_4.S" "CoG.S"
## [109] "CoG_perc.S" "Apical.STI" "Branched.STI"
## [112] "Basal.STI" "Apical_perc.STI" "Branched_perc.STI"
## [115] "Basal_perc.STI" "CoG.STI" "CoG_perc.STI"
all_m <- as.matrix(all[,c(2:36,38:117)])
corr_A <- cor(all_m)
res1_A <- cor.mtest(all_m)
diag(corr_A) = NA
diag(res1_A$p) = NA
pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.24_Correlation_all.pdf", height = 20, width= 20)
corrplot(corr_A, type = "upper", p.mat = res1_A$p, insig = "label_sig",
sig.level = c(.001, .01, .05), pch.cex = .9, pch.col = "black",
na.label = "x", method = "color", tl.col = "black", tl.pos = "tp",
tl.cex = 0.7, col=brewer.pal(n=8, name="PuOr"), mar = c(1, 1, 1, 1))
dev.off()
## quartz_off_screen
## 2
Let’s have a look at the correlation between the most interesting traits based on our understanding and interest ;)
# Performance based on TRS with the CoG performance
inter_corr <- ggscatter(all, x= "TRS_STI", y="CoG.STI",
add = "reg.line", conf.int = F, palette = "jco",
xlab = "TRS @ Salt / TRS @ Control", ylab = "CoG @ Salt / CoG @ Control")
inter_corr <- inter_corr + stat_cor() + rremove("legend")
inter_corr
## `geom_smooth()` using formula 'y ~ x'
# OK - maybe above is too far fetched - lets have a look with CoG @ Salt
inter_corr <- ggscatter(all, x= "TRS_STI", y="CoG.S",
add = "reg.line", conf.int = F, palette = "jco",
xlab = "TRS @ Salt / TRS @ Control", ylab = "CoG @ Salt")
inter_corr <- inter_corr + stat_cor() + rremove("legend")
inter_corr
## `geom_smooth()` using formula 'y ~ x'
# OK - we are starting to getting somewhere - we have a slight indication that LOWER center of gravity is corresponding to larger root system at salt stress
inter_corr <- ggscatter(all, x= "TRS_S", y="CoG.S",
add = "reg.line", conf.int = F, palette = "jco",
xlab = "TRS @ Salt", ylab = "CoG @ Salt")
inter_corr <- inter_corr + stat_cor() + rremove("legend")
inter_corr
## `geom_smooth()` using formula 'y ~ x'
# Do we then see an opposite relation with Apical zone? >> NOPE
inter_corr2 <- ggscatter(all, x= "TRS_S", y="Apical.S",
add = "reg.line", conf.int = F, palette = "jco",
xlab = "TRS @ Salt (cm)", ylab = "Apical zone length @ Salt (cm)")
inter_corr2 <- inter_corr2 + stat_cor() + rremove("legend")
inter_corr2
## `geom_smooth()` using formula 'y ~ x'
# How about Branched zone? >> YES!
inter_corr2 <- ggscatter(all, x= "TRS_S", y="Branched.S",
add = "reg.line", conf.int = F, palette = "jco",
xlab = "TRS @ Salt (cm)", ylab = "Branched zone length @ Salt (cm)")
inter_corr2 <- inter_corr2 + stat_cor() + rremove("legend")
inter_corr2
## `geom_smooth()` using formula 'y ~ x'
# How about Basal zone? >> YES!
inter_corr3 <- ggscatter(all, x= "TRS_S", y="Basal.S",
add = "reg.line", conf.int = F, palette = "jco",
xlab = "TRS @ Salt (cm)", ylab = "Basal zone length @ Salt (cm)")
inter_corr3 <- inter_corr3 + stat_cor() + rremove("legend")
inter_corr3
## `geom_smooth()` using formula 'y ~ x'
# And how is the Center of Gravity correlated between the CoG @ Control and @ Salt
inter_corr4 <- ggscatter(all, x= "CoG.C", y="CoG.S",
add = "reg.line", conf.int = F, palette = "jco",
xlab = "CoG @ Control (cm MR)", ylab = "CoG @ Salt (cm MR)")
inter_corr4 <- inter_corr4 + stat_cor() + rremove("legend")
inter_corr4
## `geom_smooth()` using formula 'y ~ x'
Let’s combine all these graphs into figure too:
pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.25_Correlation_most interesting traits.pdf", height = 10, width= 10)
plot_grid(inter_corr, inter_corr2, inter_corr3, inter_corr4,
ncol = 2, align = "hv", labels=c("AUTO"),
label_size = 14)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
dev.off()
## quartz_off_screen
## 2
There was previously an issue with some genotypes that were not in the genotyping file. So let’s re-evaluate which Identifiers to include ;)
old_GWAS<- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/GWAS_Try1/Albert_Pheno.csv")
head(old_GWAS)
colnames(old_GWAS)[1] <- "Identifier"
what_you_want <- old_GWAS$Identifier
all$Identifier <- gsub(" ", "", all$Identifier)
all$Identifier %in% what_you_want
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE
dim(all)
## [1] 138 117
all_final <- subset(all, all$Identifier %in% what_you_want)
dim(all_final)
## [1] 135 117
colnames(all_final)
## [1] "Identifier" "MRL_C" "STR_C"
## [4] "noLR_C" "LRD_C" "LRL_C"
## [7] "aLRL_C" "DIR_C" "BasZ_C"
## [10] "BraZ_C" "ApZ_C" "aLRLpMRL_C"
## [13] "TRS_C" "BasZpMRL_C" "BraZpMRL_C"
## [16] "ApZpMRL_C" "MRL_S" "STR_S"
## [19] "noLR_S" "LRD_S" "LRL_S"
## [22] "aLRL_S" "DIR_S" "BasZ_S"
## [25] "BraZ_S" "ApZ_S" "aLRLpMRL_S"
## [28] "TRS_S" "BasZpMRL_S" "BraZpMRL_S"
## [31] "ApZpMRL_S" "MRL_STI" "LRL_STI"
## [34] "noLR_STI" "aLRL_STI" "TRS_STI"
## [37] "class" "Apical.C" "Branched.C"
## [40] "Basal.C" "Apical_perc.C" "Branched_perc.C"
## [43] "Basal_perc.C" "LR_no_10_100.C" "LR_no_20_100.C"
## [46] "LR_no_30_100.C" "LR_no_40_100.C" "LR_no_50_100.C"
## [49] "LR_no_60_100.C" "LR_no_70_100.C" "LR_no_80_100.C"
## [52] "LR_no_90_100.C" "LR_no_100_100.C" "LR_no_25_4.C"
## [55] "LR_no_50_4.C" "LR_no_75_4.C" "LR_no_100_4.C"
## [58] "LRL_10_100.C" "LRL_20_100.C" "LRL_30_100.C"
## [61] "LRL_40_100.C" "LRL_50_100.C" "LRL_60_100.C"
## [64] "LRL_70_100.C" "LRL_80_100.C" "LRL_90_100.C"
## [67] "LRL_100_100.C" "LRL_25_4.C" "LRL_50_4.C"
## [70] "LRL_75_4.C" "LRL_100_4.C" "CoG.C"
## [73] "CoG_perc.C" "Apical.S" "Branched.S"
## [76] "Basal.S" "Apical_perc.S" "Branched_perc.S"
## [79] "Basal_perc.S" "LR_no_10_100.S" "LR_no_20_100.S"
## [82] "LR_no_30_100.S" "LR_no_40_100.S" "LR_no_50_100.S"
## [85] "LR_no_60_100.S" "LR_no_70_100.S" "LR_no_80_100.S"
## [88] "LR_no_90_100.S" "LR_no_100_100.S" "LR_no_25_4.S"
## [91] "LR_no_50_4.S" "LR_no_75_4.S" "LR_no_100_4.S"
## [94] "LRL_10_100.S" "LRL_20_100.S" "LRL_30_100.S"
## [97] "LRL_40_100.S" "LRL_50_100.S" "LRL_60_100.S"
## [100] "LRL_70_100.S" "LRL_80_100.S" "LRL_90_100.S"
## [103] "LRL_100_100.S" "LRL_25_4.S" "LRL_50_4.S"
## [106] "LRL_75_4.S" "LRL_100_4.S" "CoG.S"
## [109] "CoG_perc.S" "Apical.STI" "Branched.STI"
## [112] "Basal.STI" "Apical_perc.STI" "Branched_perc.STI"
## [115] "Basal_perc.STI" "CoG.STI" "CoG_perc.STI"
Let’s add more traits to this final table and re-categorize class into four separate categories:
all_final$class_bad <- all_final$class == "bad"
all_final$class_bad <- as.integer(as.logical(all_final$class_bad))
all_final$class_superbad <- all_final$class == "super-bad"
all_final$class_superbad <- as.integer(as.logical(all_final$class_superbad))
all_final$class_good <- all_final$class == "good"
all_final$class_good <- as.integer(as.logical(all_final$class_good))
all_final$class_kickass <- all_final$class == "kick-ass"
all_final$class_kickass <- as.integer(as.logical(all_final$class_kickass))
colnames(all_final)
## [1] "Identifier" "MRL_C" "STR_C"
## [4] "noLR_C" "LRD_C" "LRL_C"
## [7] "aLRL_C" "DIR_C" "BasZ_C"
## [10] "BraZ_C" "ApZ_C" "aLRLpMRL_C"
## [13] "TRS_C" "BasZpMRL_C" "BraZpMRL_C"
## [16] "ApZpMRL_C" "MRL_S" "STR_S"
## [19] "noLR_S" "LRD_S" "LRL_S"
## [22] "aLRL_S" "DIR_S" "BasZ_S"
## [25] "BraZ_S" "ApZ_S" "aLRLpMRL_S"
## [28] "TRS_S" "BasZpMRL_S" "BraZpMRL_S"
## [31] "ApZpMRL_S" "MRL_STI" "LRL_STI"
## [34] "noLR_STI" "aLRL_STI" "TRS_STI"
## [37] "class" "Apical.C" "Branched.C"
## [40] "Basal.C" "Apical_perc.C" "Branched_perc.C"
## [43] "Basal_perc.C" "LR_no_10_100.C" "LR_no_20_100.C"
## [46] "LR_no_30_100.C" "LR_no_40_100.C" "LR_no_50_100.C"
## [49] "LR_no_60_100.C" "LR_no_70_100.C" "LR_no_80_100.C"
## [52] "LR_no_90_100.C" "LR_no_100_100.C" "LR_no_25_4.C"
## [55] "LR_no_50_4.C" "LR_no_75_4.C" "LR_no_100_4.C"
## [58] "LRL_10_100.C" "LRL_20_100.C" "LRL_30_100.C"
## [61] "LRL_40_100.C" "LRL_50_100.C" "LRL_60_100.C"
## [64] "LRL_70_100.C" "LRL_80_100.C" "LRL_90_100.C"
## [67] "LRL_100_100.C" "LRL_25_4.C" "LRL_50_4.C"
## [70] "LRL_75_4.C" "LRL_100_4.C" "CoG.C"
## [73] "CoG_perc.C" "Apical.S" "Branched.S"
## [76] "Basal.S" "Apical_perc.S" "Branched_perc.S"
## [79] "Basal_perc.S" "LR_no_10_100.S" "LR_no_20_100.S"
## [82] "LR_no_30_100.S" "LR_no_40_100.S" "LR_no_50_100.S"
## [85] "LR_no_60_100.S" "LR_no_70_100.S" "LR_no_80_100.S"
## [88] "LR_no_90_100.S" "LR_no_100_100.S" "LR_no_25_4.S"
## [91] "LR_no_50_4.S" "LR_no_75_4.S" "LR_no_100_4.S"
## [94] "LRL_10_100.S" "LRL_20_100.S" "LRL_30_100.S"
## [97] "LRL_40_100.S" "LRL_50_100.S" "LRL_60_100.S"
## [100] "LRL_70_100.S" "LRL_80_100.S" "LRL_90_100.S"
## [103] "LRL_100_100.S" "LRL_25_4.S" "LRL_50_4.S"
## [106] "LRL_75_4.S" "LRL_100_4.S" "CoG.S"
## [109] "CoG_perc.S" "Apical.STI" "Branched.STI"
## [112] "Basal.STI" "Apical_perc.STI" "Branched_perc.STI"
## [115] "Basal_perc.STI" "CoG.STI" "CoG_perc.STI"
## [118] "class_bad" "class_superbad" "class_good"
## [121] "class_kickass"
remove all unneccessary columns:
for_GWAS <- all_final[,c(1:36,38:121)]
then - add some more columns, because there is always more traits I can come up with :)
for_GWAS$LR_no_fract_10_100.C <- for_GWAS$LR_no_10_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_20_100.C <- for_GWAS$LR_no_20_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_30_100.C <- for_GWAS$LR_no_30_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_40_100.C <- for_GWAS$LR_no_40_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_50_100.C <- for_GWAS$LR_no_50_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_60_100.C <- for_GWAS$LR_no_60_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_70_100.C <- for_GWAS$LR_no_70_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_80_100.C <- for_GWAS$LR_no_80_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_90_100.C <- for_GWAS$LR_no_90_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_100_100.C <- for_GWAS$LR_no_100_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_10_100.S <- for_GWAS$LR_no_10_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_20_100.S <- for_GWAS$LR_no_20_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_30_100.S <- for_GWAS$LR_no_30_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_40_100.S <- for_GWAS$LR_no_40_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_50_100.S <- for_GWAS$LR_no_50_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_60_100.S <- for_GWAS$LR_no_60_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_70_100.S <- for_GWAS$LR_no_70_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_80_100.S <- for_GWAS$LR_no_80_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_90_100.S <- for_GWAS$LR_no_90_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_100_100.S <- for_GWAS$LR_no_100_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_25_4.C <- for_GWAS$LR_no_25_4.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_50_4.C <- for_GWAS$LR_no_50_4.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_75_4.C <- for_GWAS$LR_no_75_4.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_100_4.C <- for_GWAS$LR_no_100_4.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_25_4.S <- for_GWAS$LR_no_25_4.S / for_GWAS$noLR_C
for_GWAS$LR_no_fract_50_4.S <- for_GWAS$LR_no_50_4.S / for_GWAS$noLR_C
for_GWAS$LR_no_fract_75_4.S <- for_GWAS$LR_no_75_4.S / for_GWAS$noLR_C
for_GWAS$LR_no_fract_100_4.S <- for_GWAS$LR_no_100_4.S / for_GWAS$noLR_C
# LRL same
for_GWAS$LRL_fract_10_100.C <- for_GWAS$LRL_10_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_20_100.C <- for_GWAS$LRL_20_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_30_100.C <- for_GWAS$LRL_30_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_40_100.C <- for_GWAS$LRL_40_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_50_100.C <- for_GWAS$LRL_50_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_60_100.C <- for_GWAS$LRL_60_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_70_100.C <- for_GWAS$LRL_70_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_80_100.C <- for_GWAS$LRL_80_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_90_100.C <- for_GWAS$LRL_90_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_100_100.C <- for_GWAS$LRL_100_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_10_100.S <- for_GWAS$LRL_10_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_20_100.S <- for_GWAS$LRL_20_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_30_100.S <- for_GWAS$LRL_30_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_40_100.S <- for_GWAS$LRL_40_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_50_100.S <- for_GWAS$LRL_50_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_60_100.S <- for_GWAS$LRL_60_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_70_100.S <- for_GWAS$LRL_70_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_80_100.S <- for_GWAS$LRL_80_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_90_100.S <- for_GWAS$LRL_90_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_100_100.S <- for_GWAS$LRL_100_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_25_4.C <- for_GWAS$LRL_25_4.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_50_4.C <- for_GWAS$LRL_50_4.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_75_4.C <- for_GWAS$LRL_75_4.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_100_4.C <- for_GWAS$LRL_100_4.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_25_4.S <- for_GWAS$LRL_25_4.S / for_GWAS$LRL_C
for_GWAS$LRL_fract_50_4.S <- for_GWAS$LRL_50_4.S / for_GWAS$LRL_C
for_GWAS$LRL_fract_75_4.S <- for_GWAS$LRL_75_4.S / for_GWAS$LRL_C
for_GWAS$LRL_fract_100_4.S <- for_GWAS$LRL_100_4.S / for_GWAS$LRL_C
OK - NOW we aree ultimately DONE :)))
Let’s save it, but we need to have ecot_ID as numbers only
Lets see how it worked last time with files from Arabidopsis script:
decoding_1 <- read.table("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Arabidopsis script GWAS/data files/df.traitgwa.txt", header=T)
decoding_2 <- read.table("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Arabidopsis script GWAS/data files/df.traitgwa.better.txt", header=T)
head(decoding_1)
head(decoding_2)
decoding_1 <- decoding_1$genotype
decoding_2 <- decoding_2$genotype
decoding_2
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135
for_GWAS$ecot_ID <- decoding_2
colnames(for_GWAS)
## [1] "Identifier" "MRL_C" "STR_C"
## [4] "noLR_C" "LRD_C" "LRL_C"
## [7] "aLRL_C" "DIR_C" "BasZ_C"
## [10] "BraZ_C" "ApZ_C" "aLRLpMRL_C"
## [13] "TRS_C" "BasZpMRL_C" "BraZpMRL_C"
## [16] "ApZpMRL_C" "MRL_S" "STR_S"
## [19] "noLR_S" "LRD_S" "LRL_S"
## [22] "aLRL_S" "DIR_S" "BasZ_S"
## [25] "BraZ_S" "ApZ_S" "aLRLpMRL_S"
## [28] "TRS_S" "BasZpMRL_S" "BraZpMRL_S"
## [31] "ApZpMRL_S" "MRL_STI" "LRL_STI"
## [34] "noLR_STI" "aLRL_STI" "TRS_STI"
## [37] "Apical.C" "Branched.C" "Basal.C"
## [40] "Apical_perc.C" "Branched_perc.C" "Basal_perc.C"
## [43] "LR_no_10_100.C" "LR_no_20_100.C" "LR_no_30_100.C"
## [46] "LR_no_40_100.C" "LR_no_50_100.C" "LR_no_60_100.C"
## [49] "LR_no_70_100.C" "LR_no_80_100.C" "LR_no_90_100.C"
## [52] "LR_no_100_100.C" "LR_no_25_4.C" "LR_no_50_4.C"
## [55] "LR_no_75_4.C" "LR_no_100_4.C" "LRL_10_100.C"
## [58] "LRL_20_100.C" "LRL_30_100.C" "LRL_40_100.C"
## [61] "LRL_50_100.C" "LRL_60_100.C" "LRL_70_100.C"
## [64] "LRL_80_100.C" "LRL_90_100.C" "LRL_100_100.C"
## [67] "LRL_25_4.C" "LRL_50_4.C" "LRL_75_4.C"
## [70] "LRL_100_4.C" "CoG.C" "CoG_perc.C"
## [73] "Apical.S" "Branched.S" "Basal.S"
## [76] "Apical_perc.S" "Branched_perc.S" "Basal_perc.S"
## [79] "LR_no_10_100.S" "LR_no_20_100.S" "LR_no_30_100.S"
## [82] "LR_no_40_100.S" "LR_no_50_100.S" "LR_no_60_100.S"
## [85] "LR_no_70_100.S" "LR_no_80_100.S" "LR_no_90_100.S"
## [88] "LR_no_100_100.S" "LR_no_25_4.S" "LR_no_50_4.S"
## [91] "LR_no_75_4.S" "LR_no_100_4.S" "LRL_10_100.S"
## [94] "LRL_20_100.S" "LRL_30_100.S" "LRL_40_100.S"
## [97] "LRL_50_100.S" "LRL_60_100.S" "LRL_70_100.S"
## [100] "LRL_80_100.S" "LRL_90_100.S" "LRL_100_100.S"
## [103] "LRL_25_4.S" "LRL_50_4.S" "LRL_75_4.S"
## [106] "LRL_100_4.S" "CoG.S" "CoG_perc.S"
## [109] "Apical.STI" "Branched.STI" "Basal.STI"
## [112] "Apical_perc.STI" "Branched_perc.STI" "Basal_perc.STI"
## [115] "CoG.STI" "CoG_perc.STI" "class_bad"
## [118] "class_superbad" "class_good" "class_kickass"
## [121] "LR_no_fract_10_100.C" "LR_no_fract_20_100.C" "LR_no_fract_30_100.C"
## [124] "LR_no_fract_40_100.C" "LR_no_fract_50_100.C" "LR_no_fract_60_100.C"
## [127] "LR_no_fract_70_100.C" "LR_no_fract_80_100.C" "LR_no_fract_90_100.C"
## [130] "LR_no_fract_100_100.C" "LR_no_fract_10_100.S" "LR_no_fract_20_100.S"
## [133] "LR_no_fract_30_100.S" "LR_no_fract_40_100.S" "LR_no_fract_50_100.S"
## [136] "LR_no_fract_60_100.S" "LR_no_fract_70_100.S" "LR_no_fract_80_100.S"
## [139] "LR_no_fract_90_100.S" "LR_no_fract_100_100.S" "LR_no_fract_25_4.C"
## [142] "LR_no_fract_50_4.C" "LR_no_fract_75_4.C" "LR_no_fract_100_4.C"
## [145] "LR_no_fract_25_4.S" "LR_no_fract_50_4.S" "LR_no_fract_75_4.S"
## [148] "LR_no_fract_100_4.S" "LRL_fract_10_100.C" "LRL_fract_20_100.C"
## [151] "LRL_fract_30_100.C" "LRL_fract_40_100.C" "LRL_fract_50_100.C"
## [154] "LRL_fract_60_100.C" "LRL_fract_70_100.C" "LRL_fract_80_100.C"
## [157] "LRL_fract_90_100.C" "LRL_fract_100_100.C" "LRL_fract_10_100.S"
## [160] "LRL_fract_20_100.S" "LRL_fract_30_100.S" "LRL_fract_40_100.S"
## [163] "LRL_fract_50_100.S" "LRL_fract_60_100.S" "LRL_fract_70_100.S"
## [166] "LRL_fract_80_100.S" "LRL_fract_90_100.S" "LRL_fract_100_100.S"
## [169] "LRL_fract_25_4.C" "LRL_fract_50_4.C" "LRL_fract_75_4.C"
## [172] "LRL_fract_100_4.C" "LRL_fract_25_4.S" "LRL_fract_50_4.S"
## [175] "LRL_fract_75_4.S" "LRL_fract_100_4.S" "ecot_ID"
for_GWAS2 <- for_GWAS[,c(177, 1:176)]
for_GWAS_only <- for_GWAS[,c(177,2:176)]
write.csv(for_GWAS2, "ultimate_GWAS_Safi.csv", row.names = F)
write.csv(for_GWAS_only, "ultimate_GWAS_only_Safi.csv", row.names = F)