In this notebook I am going to examine the lateral root distribution across a collection of tomato accessions which were collected by Safarina Ahmad.

The aims are to: 1) examine the lateral root distribution across the main root 2) visualize the shift in apical zone (getting extended under salt stress) that Safarina observed in the pictures 3) calculate the “lateral root center of gravity” - indicating the portion of Main Root at which majority of Lateral Root number / Length standard calculation from center of gravity (physics) is: calculate momentum for each object (dist. * weight) center of gravity = sum(momentum) / sum(distance)

Importing data

getwd()

## [1] "/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis"

setwd("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/")
list.files()

##  [1] "20200131_Branching_analysis_for_Safi_files"  
##  [2] "20200131_Branching_analysis_for_Safi.nb.html"
##  [3] "20200131_Branching_analysis_for_Safi.Rmd"    
##  [4] "AllMarks_DataInSegment"                      
##  [5] "DataInSegment_04122019.R"                    
##  [6] "G1_010_AllMarks.csv"                         
##  [7] "G1_010_GlobalRootData.csv"                   
##  [8] "G1_setup.csv"                                
##  [9] "G2_010_AllMarks.csv"                         
## [10] "G2_010_GlobalRootData.csv"                   
## [11] "G2_setup.csv"                                
## [12] "G3_010_AllMarks.csv"                         
## [13] "G3_010_GlobalRootData.csv"                   
## [14] "G3_setup.csv"                                
## [15] "G4_010_AllMarks.csv"                         
## [16] "G4_010_GlobalRootData.csv"                   
## [17] "G4_setup.csv"                                
## [18] "G5_010_AllMarks.csv"                         
## [19] "G5_010_GlobalRootData.csv"                   
## [20] "G5_setup.csv"                                
## [21] "G6_010_AllMarks.csv"                         
## [22] "G6_010_GlobalRootData.csv"                   
## [23] "G6_setup.csv"                                
## [24] "G7_010_AllMarks.csv"                         
## [25] "G7_010_GlobalRootData.csv"                   
## [26] "G7_setup.csv"                                
## [27] "G8_010_AllMarks.csv"                         
## [28] "G8_010_GlobalRootData.csv"                   
## [29] "G8_setup.csv"                                
## [30] "GWAS_ready.csv"                              
## [31] "LR_distribution_final_set.csv"               
## [32] "ultimate_GWAS_only_Safi.csv"                 
## [33] "ultimate_GWAS_Safi.csv"

libraries used in this script:

library(reshape)
library(reshape2)

## 
## Attaching package: 'reshape2'

## The following objects are masked from 'package:reshape':
## 
##     colsplit, melt, recast

library(ggplot2)
library(ggpubr)

## Loading required package: magrittr

library(doBy)
library(cowplot)

## 
## ********************************************************

## Note: As of version 1.0.0, cowplot does not change the

##   default ggplot2 theme anymore. To recover the previous

##   behavior, execute:
##   theme_set(theme_cowplot())

## ********************************************************

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:ggpubr':
## 
##     get_legend

## The following object is masked from 'package:reshape':
## 
##     stamp

library(RColorBrewer)
library(corrplot)

## corrplot 0.84 loaded

the files that contain the position of each LR are “GlobalRootData.csv” - so we need to import them and then fuse them all into one big file

my_files <- list.files(pattern="GlobalRootData.csv")
my_files

## [1] "G1_010_GlobalRootData.csv" "G2_010_GlobalRootData.csv"
## [3] "G3_010_GlobalRootData.csv" "G4_010_GlobalRootData.csv"
## [5] "G5_010_GlobalRootData.csv" "G6_010_GlobalRootData.csv"
## [7] "G7_010_GlobalRootData.csv" "G8_010_GlobalRootData.csv"

length(my_files)

## [1] 8

Let’s do one file as an example - and add experiment number to the file - so we will be able to sort out the roots later:

temp <- read.csv(my_files[1])
head(temp)

exp <- gsub("_010_GlobalRootData.csv", "", my_files[1])
exp

## [1] "G1"

temp$experiment <- exp

final <- temp

Looks good - so let’s loop it and combine it into final file:

dim(final)

## [1] 2008   22

for(i in 2:8){
  temp <- read.csv(my_files[i])
  exp <- gsub("_010_GlobalRootData.csv", "", my_files[i])
  temp$experiment <- exp
  final <- rbind(final, temp)
}

dim(final)

## [1] 14736    22

Calculating the desired traits

we are only going to calculate the traits for roots that are actually having LR - therefore - we need to subset out final dataset for n_child > 0

head(final)

so in our data - we have a structure where
1) each root has an unique ID - and all the lateral roots belonging to one MR list that ID as parent 2) all the main roots have “Root” in the root_ontology collumn 3) insertion_position is a position on MR where individual LR is inserted 4) length of individual root is in collumn “length”

So maybe first let’s just clean up the table to the collumns we really need for this calculations - as we are only interested in length and identifiers listed above - we are trimming the data table off the other info:

colnames(final)

##  [1] "image"                 "root_name"             "root"                 
##  [4] "length"                "vector_length"         "surface"              
##  [7] "volume"                "direction"             "diameter"             
## [10] "root_order"            "root_ontology"         "parent_name"          
## [13] "parent"                "insertion_position"    "insertion_angle"      
## [16] "n_child"               "child_density"         "first_child"          
## [19] "insertion_first_child" "last_child"            "insertion_last_child" 
## [22] "experiment"

final2 <- final[,c(22, 1, 3:4, 11, 16, 13:14)]
head(final2)

Then we create a file containing only the main root information:

unique(final2$root_ontology)[1]

## [1]  Root
## Levels:  Lateral root  Root

only_MR <- subset(final2, final2$root_ontology == unique(final2$root_ontology)[1])
head(only_MR)

Plus - we would like to have the MR that only have LR

no_LR_MR <- subset(only_MR, only_MR$n_child < 1)
only_MR <- subset(only_MR, only_MR$n_child > 0)
head(only_MR)

Let’s cut all of the unneccessary info here too:

only_MR <- only_MR[,c(1:4)]
dim(only_MR)

## [1] 1254    4

all_MR <- only_MR$root
length(all_MR)

## [1] 1254

So we have in total 1254 unique Main Roots - we will have to examine the LR belonging to each by extracting them based on the “parent” collumn:

MR_now <- all_MR[1]
MR_now

## [1]  e0ff9ea5-6a63-42ed-be68-d1f6994c47b3
## 14736 Levels:  001813c8-3651-4583-8fd8-8ad0811e0ed2 ...

head(final2)

super_temp <- subset(final2, final2$parent %in% MR_now)
super_temp

Then - we need to create some info about the length of individual zones - for this we need info on MR_length

MR_length <- subset(only_MR, only_MR$root %in% MR_now)
MR_length <- MR_length$length
MR_length

## [1] 11.44772

Length of Apical zone is basically equal to position of first lateral root Branched zone length is the position of last LR - position of first LR Basal zone is the MR length - position of last LR thus:

Apical <- min(super_temp$insertion_position)
Branched <- (max(super_temp$insertion_position) - Apical)
Basal <- MR_length - max(super_temp$insertion_position)

Apical

## [1] 1.61211

Branched

## [1] 5.770811

Basal

## [1] 4.064801

Then - we can calculate the % of LR across the MR length - by dividing the MR into 4 or 10 parts

super_temp$LR_in_10_perc <- 0
super_temp$LR_in_20_perc <- 0
super_temp$LR_in_30_perc <- 0
super_temp$LR_in_40_perc <- 0
super_temp$LR_in_50_perc <- 0
super_temp$LR_in_60_perc <- 0
super_temp$LR_in_70_perc <- 0
super_temp$LR_in_80_perc <- 0
super_temp$LR_in_90_perc <- 0
super_temp$LR_in_100_perc <- 0
super_temp$LR_in_25_perc_4 <- 0
super_temp$LR_in_50_perc_4 <- 0
super_temp$LR_in_75_perc_4 <- 0
super_temp$LR_in_100_perc_4 <- 0

for(e in 1:nrow(super_temp)){
   if (super_temp$insertion_position[e] < (MR_length/10)){
    super_temp$LR_in_10_perc[e] <- 1
   } else {super_temp$LR_in_10_perc[e] <- 0}
  if (super_temp$insertion_position[e] < 2*(MR_length/10) & super_temp$insertion_position[e] > (MR_length/10)){
    super_temp$LR_in_20_perc[e] <- 1
  } else {super_temp$LR_in_20_perc[e] <- 0}
  if (super_temp$insertion_position[e] < 3*(MR_length/10) & super_temp$insertion_position[e] > 2*(MR_length/10)){
    super_temp$LR_in_30_perc[e] <- 1
  } else {super_temp$LR_in_30_perc[e] <- 0}
  if (super_temp$insertion_position[e] < 4*(MR_length/10) & super_temp$insertion_position[e] > 3*(MR_length/10)){
    super_temp$LR_in_40_perc[e] <- 1
  } else {super_temp$LR_in_40_perc[e] <- 0}
  if (super_temp$insertion_position[e] < 5*(MR_length/10) & super_temp$insertion_position[e] > 4*(MR_length/10)){
    super_temp$LR_in_50_perc[e] <- 1
  } else {super_temp$LR_in_50_perc[e] <- 0}
  if (super_temp$insertion_position[e] < 6*(MR_length/10) & super_temp$insertion_position[e] > 5*(MR_length/10)){
    super_temp$LR_in_60_perc[e] <- 1
  } else {super_temp$LR_in_60_perc[e] <- 0}
  if (super_temp$insertion_position[e] < 7*(MR_length/10) & super_temp$insertion_position[e] > 6*(MR_length/10)){
    super_temp$LR_in_70_perc[e] <- 1
  } else {super_temp$LR_in_70_perc[e] <- 0}
  if (super_temp$insertion_position[e] < 8*(MR_length/10) & super_temp$insertion_position[e] > 7*(MR_length/10)){
    super_temp$LR_in_80_perc[e] <- 1
  } else {super_temp$LR_in_80_perc[e] <- 0}
  if (super_temp$insertion_position[e] < 9*(MR_length/10) & super_temp$insertion_position[e] > 8*(MR_length/10)){
    super_temp$LR_in_90_perc[e] <- 1
  } else {super_temp$LR_in_90_perc[e] <- 0}
  if (super_temp$insertion_position[e] < 10*(MR_length/10) & super_temp$insertion_position[e] > 9*(MR_length/10)){
    super_temp$LR_in_100_perc[e] <- 1
  } else {super_temp$LR_in_100_perc[e] <- 0}
  if (super_temp$insertion_position[e] < 1*(MR_length/4)){
    super_temp$LR_in_25_perc_4[e] <- 1
  } else {super_temp$LR_in_25_perc_4[e] <- 0}
  if (super_temp$insertion_position[e] < 2*(MR_length/4) & super_temp$insertion_position[e] > 1*(MR_length/4)){
    super_temp$LR_in_50_perc_4[e] <- 1
  } else {super_temp$LR_in_50_perc_4[e] <- 0}
  if (super_temp$insertion_position[e] < 3*(MR_length/4) & super_temp$insertion_position[e] > 2*(MR_length/4)){
    super_temp$LR_in_75_perc_4[e] <- 1
  } else {super_temp$LR_in_75_perc_4[e] <- 0}
  if (super_temp$insertion_position[e] < 4*(MR_length/4) & super_temp$insertion_position[e] > 3*(MR_length/4)){
    super_temp$LR_in_100_perc_4[e] <- 1
  } else {super_temp$LR_in_100_perc_4[e] <- 0}
} 

super_temp

so after classifying each LR to its respective portion of MR, we can then summarize them and calculate total LR length in this fragment:

super_temp$LRL_in_10_perc <- super_temp$length * super_temp$LR_in_10_perc
super_temp$LRL_in_20_perc <- super_temp$length * super_temp$LR_in_20_perc
super_temp$LRL_in_30_perc <- super_temp$length * super_temp$LR_in_30_perc
super_temp$LRL_in_40_perc <- super_temp$length * super_temp$LR_in_40_perc
super_temp$LRL_in_50_perc <- super_temp$length * super_temp$LR_in_50_perc
super_temp$LRL_in_60_perc <- super_temp$length * super_temp$LR_in_60_perc
super_temp$LRL_in_70_perc <- super_temp$length * super_temp$LR_in_70_perc
super_temp$LRL_in_80_perc <- super_temp$length * super_temp$LR_in_80_perc
super_temp$LRL_in_90_perc <- super_temp$length * super_temp$LR_in_90_perc
super_temp$LRL_in_100_perc <- super_temp$length * super_temp$LR_in_100_perc

super_temp$LRL_in_25_perc_4 <- super_temp$length * super_temp$LR_in_25_perc_4
super_temp$LRL_in_50_perc_4 <- super_temp$length * super_temp$LR_in_50_perc_4
super_temp$LRL_in_75_perc_4 <- super_temp$length * super_temp$LR_in_75_perc_4
super_temp$LRL_in_100_perc_4 <- super_temp$length * super_temp$LR_in_100_perc_4

LR_no_10_100 <- sum(super_temp$LR_in_10_perc)
LR_no_20_100 <- sum(super_temp$LR_in_20_perc)
LR_no_30_100 <- sum(super_temp$LR_in_30_perc)
LR_no_40_100 <- sum(super_temp$LR_in_40_perc)
LR_no_50_100 <- sum(super_temp$LR_in_50_perc)
LR_no_60_100 <- sum(super_temp$LR_in_60_perc)
LR_no_70_100 <- sum(super_temp$LR_in_70_perc)
LR_no_80_100 <- sum(super_temp$LR_in_80_perc)
LR_no_90_100 <- sum(super_temp$LR_in_90_perc)
LR_no_100_100 <- sum(super_temp$LR_in_100_perc)

LR_no_25_4 <- sum(super_temp$LR_in_25_perc_4)
LR_no_50_4 <- sum(super_temp$LR_in_50_perc_4)
LR_no_75_4 <- sum(super_temp$LR_in_75_perc_4)
LR_no_100_4 <- sum(super_temp$LR_in_100_perc_4)

LRL_10_100 <- sum(super_temp$LRL_in_10_perc)
LRL_20_100 <- sum(super_temp$LRL_in_20_perc)
LRL_30_100 <- sum(super_temp$LRL_in_30_perc)
LRL_40_100 <- sum(super_temp$LRL_in_40_perc)
LRL_50_100 <- sum(super_temp$LRL_in_50_perc)
LRL_60_100 <- sum(super_temp$LRL_in_60_perc)
LRL_70_100 <- sum(super_temp$LRL_in_70_perc)
LRL_80_100 <- sum(super_temp$LRL_in_80_perc)
LRL_90_100 <- sum(super_temp$LRL_in_90_perc)
LRL_100_100 <- sum(super_temp$LRL_in_100_perc)

LRL_25_4 <- sum(super_temp$LRL_in_25_perc_4)
LRL_50_4 <- sum(super_temp$LRL_in_50_perc_4)
LRL_75_4 <- sum(super_temp$LRL_in_75_perc_4)
LRL_100_4 <- sum(super_temp$LRL_in_100_perc_4)

OK - last but not least - we should calculate the center of gravity for LR:

super_temp$momentum <- (super_temp$insertion_position * super_temp$length)
super_temp

all_momentum <- sum(super_temp$momentum)
all_momentum

## [1] 61.61884

all_length <- sum(super_temp$length)
all_length

## [1] 18.08004

CoG <- (all_momentum / all_length)
CoG

## [1] 3.408113

Finally - let’s add all these values into our MR_only table

First we need to create needed collumns:

only_MR$Apical <- 0
only_MR$Branched <- 0
only_MR$Basal <- 0

only_MR$Apical_perc <- 0
only_MR$Branched_perc <- 0
only_MR$Basal_perc <- 0
only_MR$LR_no_10_100 <- 0
only_MR$LR_no_20_100 <- 0
only_MR$LR_no_30_100 <- 0
only_MR$LR_no_40_100 <- 0
only_MR$LR_no_50_100 <- 0
only_MR$LR_no_60_100 <- 0
only_MR$LR_no_70_100 <- 0
only_MR$LR_no_80_100 <- 0
only_MR$LR_no_90_100 <- 0
only_MR$LR_no_100_100 <- 0

only_MR$LR_no_25_4 <- 0
only_MR$LR_no_50_4 <- 0
only_MR$LR_no_75_4 <- 0
only_MR$LR_no_100_4 <- 0

only_MR$LRL_10_100 <- 0
only_MR$LRL_20_100 <- 0
only_MR$LRL_30_100 <- 0
only_MR$LRL_40_100 <- 0
only_MR$LRL_50_100 <- 0
only_MR$LRL_60_100 <- 0
only_MR$LRL_70_100 <- 0
only_MR$LRL_80_100 <- 0
only_MR$LRL_90_100 <- 0
only_MR$LRL_100_100 <- 0

only_MR$LRL_25_4 <- 0
only_MR$LRL_50_4 <- 0
only_MR$LRL_75_4 <- 0
only_MR$LRL_100_4 <- 0

only_MR$CoG <- 0

then add values to these collumns:

only_MR$Apical[1] <- Apical
only_MR$Branched[1] <- Branched
only_MR$Basal[1] <- Basal

only_MR$Apical_perc[1] <- (Apical / MR_length)
only_MR$Branched_perc[1] <- (Branched / MR_length)
only_MR$Basal_perc[1] <- (Basal / MR_length)
only_MR$LR_no_10_100[1] <- LR_no_10_100
only_MR$LR_no_20_100[1] <- LR_no_20_100
only_MR$LR_no_30_100[1] <- LR_no_30_100
only_MR$LR_no_40_100[1] <- LR_no_40_100
only_MR$LR_no_50_100[1] <- LR_no_50_100
only_MR$LR_no_60_100[1] <- LR_no_60_100
only_MR$LR_no_70_100[1] <- LR_no_70_100
only_MR$LR_no_80_100[1] <- LR_no_80_100
only_MR$LR_no_90_100[1] <- LR_no_90_100
only_MR$LR_no_100_100[1] <- LR_no_100_100

only_MR$LR_no_25_4[1] <- LR_no_25_4
only_MR$LR_no_50_4[1] <- LR_no_50_4
only_MR$LR_no_75_4[1] <- LR_no_75_4
only_MR$LR_no_100_4[1] <- LR_no_100_4

only_MR$LRL_10_100[1] <- LRL_10_100
only_MR$LRL_20_100[1] <- LRL_20_100
only_MR$LRL_30_100[1] <- LRL_30_100
only_MR$LRL_40_100[1] <- LRL_40_100
only_MR$LRL_50_100[1] <- LRL_50_100
only_MR$LRL_60_100[1] <- LRL_60_100
only_MR$LRL_70_100[1] <- LRL_70_100
only_MR$LRL_80_100[1] <- LRL_80_100
only_MR$LRL_90_100[1] <- LRL_90_100
only_MR$LRL_100_100[1] <- LRL_100_100

only_MR$LRL_25_4[1] <- LRL_25_4
only_MR$LRL_50_4[1] <- LRL_50_4
only_MR$LRL_75_4[1] <- LRL_75_4
only_MR$LRL_100_4[1] <- LRL_100_4

only_MR$CoG[1] <- CoG

let’s have a look if everything was correctly inserted:

head(only_MR)

Loop everything for the remaining roots - I already forgot how many we have ;)

length(unique(only_MR$root))

## [1] 1254

Loopy loop:

for(i in 1:1254){
  MR_now <- all_MR[i]
  super_temp <- subset(final2, final2$parent %in% MR_now)
  MR_length <- subset(only_MR, only_MR$root %in% MR_now)
  MR_length <- MR_length$length
  Apical <- min(super_temp$insertion_position)
  Branched <- max(super_temp$insertion_position) - Apical
  Basal <- MR_length - max(super_temp$insertion_position)
  only_MR$Apical[i] <- Apical
  only_MR$Branched[i] <- Branched
  only_MR$Basal[i] <- Basal
  
  only_MR$Apical_perc[i] <- (Apical / MR_length)
  only_MR$Branched_perc[i] <- (Branched / MR_length)
  only_MR$Basal_perc[i] <- (Basal / MR_length)

  super_temp$LR_in_10_perc <- 0
  super_temp$LR_in_20_perc <- 0
  super_temp$LR_in_30_perc <- 0
  super_temp$LR_in_40_perc <- 0
  super_temp$LR_in_50_perc <- 0
  super_temp$LR_in_60_perc <- 0
  super_temp$LR_in_70_perc <- 0
  super_temp$LR_in_80_perc <- 0
  super_temp$LR_in_90_perc <- 0
  super_temp$LR_in_100_perc <- 0
  super_temp$LR_in_25_perc_4 <- 0
  super_temp$LR_in_50_perc_4 <- 0
  super_temp$LR_in_75_perc_4 <- 0
  super_temp$LR_in_100_perc_4 <- 0
  
  for(e in 1:nrow(super_temp)){
     if (super_temp$insertion_position[e] < (MR_length/10)){
      super_temp$LR_in_10_perc[e] <- 1
     } else {super_temp$LR_in_10_perc[e] <- 0}
    if (super_temp$insertion_position[e] < 2*(MR_length/10) & super_temp$insertion_position[e] > (MR_length/10)){
      super_temp$LR_in_20_perc[e] <- 1
    } else {super_temp$LR_in_20_perc[e] <- 0}
    if (super_temp$insertion_position[e] < 3*(MR_length/10) & super_temp$insertion_position[e] > 2*(MR_length/10)){
      super_temp$LR_in_30_perc[e] <- 1
    } else {super_temp$LR_in_30_perc[e] <- 0}
    if (super_temp$insertion_position[e] < 4*(MR_length/10) & super_temp$insertion_position[e] > 3*(MR_length/10)){
      super_temp$LR_in_40_perc[e] <- 1
    } else {super_temp$LR_in_40_perc[e] <- 0}
    if (super_temp$insertion_position[e] < 5*(MR_length/10) & super_temp$insertion_position[e] > 4*(MR_length/10)){
      super_temp$LR_in_50_perc[e] <- 1
    } else {super_temp$LR_in_50_perc[e] <- 0}
    if (super_temp$insertion_position[e] < 6*(MR_length/10) & super_temp$insertion_position[e] > 5*(MR_length/10)){
      super_temp$LR_in_60_perc[e] <- 1
    } else {super_temp$LR_in_60_perc[e] <- 0}
    if (super_temp$insertion_position[e] < 7*(MR_length/10) & super_temp$insertion_position[e] > 6*(MR_length/10)){
      super_temp$LR_in_70_perc[e] <- 1
    } else {super_temp$LR_in_70_perc[e] <- 0}
    if (super_temp$insertion_position[e] < 8*(MR_length/10) & super_temp$insertion_position[e] > 7*(MR_length/10)){
      super_temp$LR_in_80_perc[e] <- 1
    } else {super_temp$LR_in_80_perc[e] <- 0}
    if (super_temp$insertion_position[e] < 9*(MR_length/10) & super_temp$insertion_position[e] > 8*(MR_length/10)){
      super_temp$LR_in_90_perc[e] <- 1
    } else {super_temp$LR_in_90_perc[e] <- 0}
    if (super_temp$insertion_position[e] < 10*(MR_length/10) & super_temp$insertion_position[e] > 9*(MR_length/10)){
      super_temp$LR_in_100_perc[e] <- 1
    } else {super_temp$LR_in_100_perc[e] <- 0}
    if (super_temp$insertion_position[e] < 1*(MR_length/4)){
      super_temp$LR_in_25_perc_4[e] <- 1
    } else {super_temp$LR_in_25_perc_4[e] <- 0}
    if (super_temp$insertion_position[e] < 2*(MR_length/4) & super_temp$insertion_position[e] > 1*(MR_length/4)){
      super_temp$LR_in_50_perc_4[e] <- 1
    } else {super_temp$LR_in_50_perc_4[e] <- 0}
    if (super_temp$insertion_position[e] < 3*(MR_length/4) & super_temp$insertion_position[e] > 2*(MR_length/4)){
      super_temp$LR_in_75_perc_4[e] <- 1
    } else {super_temp$LR_in_75_perc_4[e] <- 0}
    if (super_temp$insertion_position[e] < 4*(MR_length/4) & super_temp$insertion_position[e] > 3*(MR_length/4)){
      super_temp$LR_in_100_perc_4[e] <- 1
    } else {super_temp$LR_in_100_perc_4[e] <- 0}
  } 
  
  super_temp$LRL_in_10_perc <- super_temp$length * super_temp$LR_in_10_perc
  super_temp$LRL_in_20_perc <- super_temp$length * super_temp$LR_in_20_perc
  super_temp$LRL_in_30_perc <- super_temp$length * super_temp$LR_in_30_perc
  super_temp$LRL_in_40_perc <- super_temp$length * super_temp$LR_in_40_perc
  super_temp$LRL_in_50_perc <- super_temp$length * super_temp$LR_in_50_perc
  super_temp$LRL_in_60_perc <- super_temp$length * super_temp$LR_in_60_perc
  super_temp$LRL_in_70_perc <- super_temp$length * super_temp$LR_in_70_perc
  super_temp$LRL_in_80_perc <- super_temp$length * super_temp$LR_in_80_perc
  super_temp$LRL_in_90_perc <- super_temp$length * super_temp$LR_in_90_perc
  super_temp$LRL_in_100_perc <- super_temp$length * super_temp$LR_in_100_perc
  
  super_temp$LRL_in_25_perc_4 <- super_temp$length * super_temp$LR_in_25_perc_4
  super_temp$LRL_in_50_perc_4 <- super_temp$length * super_temp$LR_in_50_perc_4
  super_temp$LRL_in_75_perc_4 <- super_temp$length * super_temp$LR_in_75_perc_4
  super_temp$LRL_in_100_perc_4 <- super_temp$length * super_temp$LR_in_100_perc_4
  
  only_MR$LR_no_10_100[i] <- sum(super_temp$LR_in_10_perc)
  only_MR$LR_no_20_100[i] <- sum(super_temp$LR_in_20_perc)
  only_MR$LR_no_30_100[i] <- sum(super_temp$LR_in_30_perc)
  only_MR$LR_no_40_100[i] <- sum(super_temp$LR_in_40_perc)
  only_MR$LR_no_50_100[i] <- sum(super_temp$LR_in_50_perc)
  only_MR$LR_no_60_100[i] <- sum(super_temp$LR_in_60_perc)
  only_MR$LR_no_70_100[i] <- sum(super_temp$LR_in_70_perc)
  only_MR$LR_no_80_100[i] <- sum(super_temp$LR_in_80_perc)
  only_MR$LR_no_90_100[i] <- sum(super_temp$LR_in_90_perc)
  only_MR$LR_no_100_100[i] <- sum(super_temp$LR_in_100_perc)

  only_MR$LR_no_25_4[i] <- sum(super_temp$LR_in_25_perc_4)
  only_MR$LR_no_50_4[i] <- sum(super_temp$LR_in_50_perc_4)
  only_MR$LR_no_75_4[i] <- sum(super_temp$LR_in_75_perc_4)
  only_MR$LR_no_100_4[i] <- sum(super_temp$LR_in_100_perc_4)

  
  only_MR$LRL_10_100[i] <- sum(super_temp$LRL_in_10_perc)
  only_MR$LRL_20_100[i] <- sum(super_temp$LRL_in_20_perc)
  only_MR$LRL_30_100[i] <- sum(super_temp$LRL_in_30_perc)
  only_MR$LRL_40_100[i] <- sum(super_temp$LRL_in_40_perc)
  only_MR$LRL_50_100[i] <- sum(super_temp$LRL_in_50_perc)
  only_MR$LRL_60_100[i] <- sum(super_temp$LRL_in_60_perc)
  only_MR$LRL_70_100[i] <- sum(super_temp$LRL_in_70_perc)
  only_MR$LRL_80_100[i] <- sum(super_temp$LRL_in_80_perc)
  only_MR$LRL_90_100[i] <- sum(super_temp$LRL_in_90_perc)
  only_MR$LRL_100_100[i] <- sum(super_temp$LRL_in_100_perc)
  
  only_MR$LRL_25_4[i] <- sum(super_temp$LRL_in_25_perc_4)
  only_MR$LRL_50_4[i] <- sum(super_temp$LRL_in_50_perc_4)
  only_MR$LRL_75_4[i] <- sum(super_temp$LRL_in_75_perc_4)
  only_MR$LRL_100_4[i] <- sum(super_temp$LRL_in_100_perc_4)

  super_temp$momentum <- (super_temp$insertion_position * super_temp$length)
  all_momentum <- sum(super_temp$momentum)
  all_length <- sum(super_temp$length)
  only_MR$CoG[i] <- (all_momentum / all_length)
}

let’s have a look if all looks good:

head(only_MR)

then- we need to fuse all the main roots that dont own LR

Make sure that the data structure is the same as for the only_MR file

head(no_LR_MR)

no_LR_MR <- no_LR_MR[,c(1:4)]

add all the columns that are missing

no_LR_MR$Apical <- 0
no_LR_MR$Branched <- 0
no_LR_MR$Basal <- 0

no_LR_MR$Apical_perc <- 0
no_LR_MR$Branched_perc <- 0
no_LR_MR$Basal_perc <- 0
no_LR_MR$LR_no_10_100 <- 0
no_LR_MR$LR_no_20_100 <- 0
no_LR_MR$LR_no_30_100 <- 0
no_LR_MR$LR_no_40_100 <- 0
no_LR_MR$LR_no_50_100 <- 0
no_LR_MR$LR_no_60_100 <- 0
no_LR_MR$LR_no_70_100 <- 0
no_LR_MR$LR_no_80_100 <- 0
no_LR_MR$LR_no_90_100 <- 0
no_LR_MR$LR_no_100_100 <- 0

no_LR_MR$LR_no_25_4 <- 0
no_LR_MR$LR_no_50_4 <- 0
no_LR_MR$LR_no_75_4 <- 0
no_LR_MR$LR_no_100_4 <- 0

no_LR_MR$LRL_10_100 <- 0
no_LR_MR$LRL_20_100 <- 0
no_LR_MR$LRL_30_100 <- 0
no_LR_MR$LRL_40_100 <- 0
no_LR_MR$LRL_50_100 <- 0
no_LR_MR$LRL_60_100 <- 0
no_LR_MR$LRL_70_100 <- 0
no_LR_MR$LRL_80_100 <- 0
no_LR_MR$LRL_90_100 <- 0
no_LR_MR$LRL_100_100 <- 0

no_LR_MR$LRL_25_4 <- 0
no_LR_MR$LRL_50_4 <- 0
no_LR_MR$LRL_75_4 <- 0
no_LR_MR$LRL_100_4 <- 0

no_LR_MR$CoG <- 0

finally - let’s fuse the two files together:

all_MR <- rbind(only_MR, no_LR_MR)
head(all_MR)

tail(all_MR)

matching the data to genotype & treatment information

All the root ID and genotype information is linked in the folder AllMarks_DataInSegment/*_010_DataSegments.csv files

let-s import them first

coding_1 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G1_010_DataSegments.csv")
coding_1$experiment <- "G1"
coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G2_010_DataSegments.csv")
coding_2$experiment <- "G2"
all_coding <- rbind(coding_1, coding_2)

coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G3_010_DataSegments.csv")
coding_2$experiment <- "G3"
all_coding <- rbind(all_coding, coding_2)

coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G4_010_DataSegments.csv")
coding_2$experiment <- "G4"
all_coding <- rbind(all_coding, coding_2)

coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G5_010_DataSegments.csv")
coding_2$experiment <- "G5"
all_coding <- rbind(all_coding, coding_2)

coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G6_010_DataSegments.csv")
coding_2$experiment <- "G6"
all_coding <- rbind(all_coding, coding_2)

coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G7_010_DataSegments.csv")
coding_2$experiment <- "G7"
all_coding <- rbind(all_coding, coding_2)

coding_2 <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/for_Branching_analysis/AllMarks_DataInSegment/G8_010_DataSegments.csv")
coding_2$experiment <- "G8"
all_coding <- rbind(all_coding, coding_2)

Then - let’s have a look at the files:

head(all_coding)

we basically need image name, root, Genotype and Treatment collumns to be able to link it to our existing data in MR_only

colnames(all_coding)

##  [1] "X"                  "image"              "source"            
##  [4] "root"               "root_name"          "mark_type"         
##  [7] "position_from_base" "diameter"           "angle"             
## [10] "x"                  "y"                  "root_order"        
## [13] "root_ontology"      "value"              "Date"              
## [16] "Day"                "Plate"              "ontology"          
## [19] "Root"               "Genotype"           "Treatment"         
## [22] "experiment"

all_coding2 <- all_coding[,c(2,4,20:22)]
head(all_coding2)

all_coding3 <- unique(all_coding2)
all_coding3

OK - now let’s match the all_coding3 file with all_MR file into one:

uncoded_total <- merge(all_coding3, all_MR, by=c("image", "root", "experiment"))
head(uncoded_total)

Let’s check if there is anything missing

dim(all_coding3)

## [1] 14736     5

dim(all_MR)

## [1] 1360   39

dim(uncoded_total)

## [1] 1360   41

Now - lets uncode the genotypes

geno_coding <- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Data_Safarina/GlobalRootData/20191031_geno_coding.csv")
geno_coding$genotypes <- gsub(" ", "", geno_coding$genotypes, fixed = TRUE)
head(geno_coding)

and fuse the genotype information into the entire table:

colnames(uncoded_total)[4] <- "genotypes"

final_decoded <- merge(geno_coding, uncoded_total, by="genotypes", all=T)
head(final_decoded)

write.csv(final_decoded, "LR_distribution_final_set.csv", row.names = F)

Visualize the population trends

let’s visualize the population wide trend:

AP_histo <- gghistogram(final_decoded, x = "Apical",
            add = "mean", rug = TRUE,
            color = "Treatment", fill = "Treatment",
            palette = c("#00AFBB", "#E7B800")) + xlab("Apical Zone length (cm)")

## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.

AP_histo

BZ_histo <- gghistogram(final_decoded, x = "Branched",
            add = "mean", rug = TRUE,
            color = "Treatment", fill = "Treatment",
            palette = c("#00AFBB", "#E7B800")) + xlab("Branched Zone length (cm)")

## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.

BZ_histo

BS_histo <- gghistogram(final_decoded, x = "Basal",
            add = "mean", rug = TRUE,
            color = "Treatment", fill = "Treatment",
            palette = c("#00AFBB", "#E7B800")) + xlab("Basal Zone length (cm)")

## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.

BS_histo

now - let’s have a look at how these trends look when we calculate all the zones as % of the MR:

AP_perc_histo <- gghistogram(final_decoded, x = "Apical_perc",
            add = "mean", rug = TRUE,
            color = "Treatment", fill = "Treatment",
            palette = c("#00AFBB", "#E7B800")) + xlab("Apical Zone length (fraction of MRL)")

## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.

AP_perc_histo

BZ_perc_histo <- gghistogram(final_decoded, x = "Branched_perc",
            add = "mean", rug = TRUE,
            color = "Treatment", fill = "Treatment",
            palette = c("#00AFBB", "#E7B800")) + xlab("Branched Zone length (fraction of MRL)")

## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.

BZ_perc_histo

BS_perc_histo <- gghistogram(final_decoded, x = "Basal_perc",
            add = "mean", rug = TRUE,
            color = "Treatment", fill = "Treatment",
            palette = c("#00AFBB", "#E7B800")) + xlab("Basal Zone length (fraction of MRL)")

## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.

BS_perc_histo

Let’s have a look at Center of Gravity

CoG_histo <- gghistogram(final_decoded, x = "CoG",
            add = "mean", rug = TRUE,
            color = "Treatment", fill = "Treatment",
            palette = c("#00AFBB", "#E7B800")) + xlab("Center of Gravity (position on MR in cm)")

## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.

CoG_histo

how about if we look at CoG as % of MR

final_decoded$CoG_perc <- final_decoded$CoG/final_decoded$length

CoG_perc_histo <- gghistogram(final_decoded, x = "CoG_perc",
            add = "mean", rug = TRUE,
            color = "Treatment", fill = "Treatment",
            palette = c("#00AFBB", "#E7B800")) + xlab("Center of Gravity (fraction of MR length)")

## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.

CoG_perc_histo

## Warning: Removed 2 rows containing non-finite values (stat_bin).

Cool - all of the above graphs look pretty cool - so let’s combine them into two separatee figures:

pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.18_histo_zones_cm.pdf", width=8, height=6)

plot_grid(AP_histo, BZ_histo, BS_histo, CoG_histo,
          ncol = 2, align = "hv", labels=c("AUTO"), 
          label_size = 14)
dev.off()

## quartz_off_screen 
##                 2

pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.19_histo_zones_perc.pdf", width=8, height=6)

plot_grid(AP_perc_histo, BZ_perc_histo, BS_perc_histo, CoG_perc_histo,
          ncol = 2, align = "hv", labels=c("AUTO"), 
          label_size = 14)

## Warning: Removed 2 rows containing non-finite values (stat_bin).

dev.off()

## quartz_off_screen 
##                 2

Question - is the correlation between CoG at Control and Salt Stress?

Let’s reshape the data and have a look at the correlations between accessions:

25 % MR split

Then - how do we visualize LR distribution? Maybe - let’s try to melt the dataset containing only the LR distribution - let’s first try with MR / 4 distribution of LR #:

colnames(final_decoded)

##  [1] "genotypes"     "Identifier"    "Accession"     "image"        
##  [5] "root"          "experiment"    "Treatment"     "length"       
##  [9] "Apical"        "Branched"      "Basal"         "Apical_perc"  
## [13] "Branched_perc" "Basal_perc"    "LR_no_10_100"  "LR_no_20_100" 
## [17] "LR_no_30_100"  "LR_no_40_100"  "LR_no_50_100"  "LR_no_60_100" 
## [21] "LR_no_70_100"  "LR_no_80_100"  "LR_no_90_100"  "LR_no_100_100"
## [25] "LR_no_25_4"    "LR_no_50_4"    "LR_no_75_4"    "LR_no_100_4"  
## [29] "LRL_10_100"    "LRL_20_100"    "LRL_30_100"    "LRL_40_100"   
## [33] "LRL_50_100"    "LRL_60_100"    "LRL_70_100"    "LRL_80_100"   
## [37] "LRL_90_100"    "LRL_100_100"   "LRL_25_4"      "LRL_50_4"     
## [41] "LRL_75_4"      "LRL_100_4"     "CoG"           "CoG_perc"

new_temp <- final_decoded[,c(2,6:7,25:28)]
head(new_temp)

colnames(new_temp)[4] <- "0-25%"
colnames(new_temp)[5] <- "25-50%"
colnames(new_temp)[6] <- "50-75%"
colnames(new_temp)[7] <- "75-100%"

mtemp <- melt(new_temp, id=c("Identifier", "experiment", "Treatment"))
head(mtemp)

LRno_25_perc_plot <- ggerrorplot(mtemp, y = "value", x = "Treatment", fill="Treatment", color="Treatment", 
                        facet.by = "variable", ncol=4,  palette = c("#00AFBB", "#E7B800"),
                        desc_stat = "mean_sd", add = "jitter", 
                        add.params = list(color = "darkgray"),
                        xlab="", ylab="# LR per MR", title = "fraction of MR") 

LRno_25_perc_plot <- LRno_25_perc_plot + rremove("legend") + stat_compare_means(method="t.test", ref.group = "0mM", 
                                                              label = "p.signif", hide.ns = T)
LRno_25_perc_plot

colnames(final_decoded)

##  [1] "genotypes"     "Identifier"    "Accession"     "image"        
##  [5] "root"          "experiment"    "Treatment"     "length"       
##  [9] "Apical"        "Branched"      "Basal"         "Apical_perc"  
## [13] "Branched_perc" "Basal_perc"    "LR_no_10_100"  "LR_no_20_100" 
## [17] "LR_no_30_100"  "LR_no_40_100"  "LR_no_50_100"  "LR_no_60_100" 
## [21] "LR_no_70_100"  "LR_no_80_100"  "LR_no_90_100"  "LR_no_100_100"
## [25] "LR_no_25_4"    "LR_no_50_4"    "LR_no_75_4"    "LR_no_100_4"  
## [29] "LRL_10_100"    "LRL_20_100"    "LRL_30_100"    "LRL_40_100"   
## [33] "LRL_50_100"    "LRL_60_100"    "LRL_70_100"    "LRL_80_100"   
## [37] "LRL_90_100"    "LRL_100_100"   "LRL_25_4"      "LRL_50_4"     
## [41] "LRL_75_4"      "LRL_100_4"     "CoG"           "CoG_perc"

new_temp <- final_decoded[,c(2,6:7,39:42)]
head(new_temp)

colnames(new_temp)[4] <- "0-25%"
colnames(new_temp)[5] <- "25-50%"
colnames(new_temp)[6] <- "50-75%"
colnames(new_temp)[7] <- "75-100%"
mtemp <- melt(new_temp, id=c("Identifier", "experiment", "Treatment"))
head(mtemp)

LRL_25_perc_plot <- ggerrorplot(mtemp, y = "value", x = "Treatment", fill="Treatment", color="Treatment", 
                        facet.by = "variable", ncol=4,  palette = c("#00AFBB", "#E7B800"),
                        desc_stat = "mean_sd", add = "jitter", 
                        add.params = list(color = "darkgray"),
                        xlab="", ylab="Lateral Root Length (cm)", title = "fraction of MR") 

LRL_25_perc_plot <- LRL_25_perc_plot + rremove("legend") + stat_compare_means(method="t.test", ref.group = "0mM", 
                                                              label = "p.signif", hide.ns = T)
LRL_25_perc_plot

10 % MR split

OK - these graphs look pretty cool and make sense - lets have a look at how it looks in 10% fractions of MR:

colnames(final_decoded)

##  [1] "genotypes"     "Identifier"    "Accession"     "image"        
##  [5] "root"          "experiment"    "Treatment"     "length"       
##  [9] "Apical"        "Branched"      "Basal"         "Apical_perc"  
## [13] "Branched_perc" "Basal_perc"    "LR_no_10_100"  "LR_no_20_100" 
## [17] "LR_no_30_100"  "LR_no_40_100"  "LR_no_50_100"  "LR_no_60_100" 
## [21] "LR_no_70_100"  "LR_no_80_100"  "LR_no_90_100"  "LR_no_100_100"
## [25] "LR_no_25_4"    "LR_no_50_4"    "LR_no_75_4"    "LR_no_100_4"  
## [29] "LRL_10_100"    "LRL_20_100"    "LRL_30_100"    "LRL_40_100"   
## [33] "LRL_50_100"    "LRL_60_100"    "LRL_70_100"    "LRL_80_100"   
## [37] "LRL_90_100"    "LRL_100_100"   "LRL_25_4"      "LRL_50_4"     
## [41] "LRL_75_4"      "LRL_100_4"     "CoG"           "CoG_perc"

new_temp <- final_decoded[,c(2,6:7,15:24)]
head(new_temp)

colnames(new_temp)[4] <- "0-10%"
colnames(new_temp)[5] <- "10-20%"
colnames(new_temp)[6] <- "20-30%"
colnames(new_temp)[7] <- "30-40%"
colnames(new_temp)[8] <- "40-50%"
colnames(new_temp)[9] <- "50-60%"
colnames(new_temp)[10] <- "60-70%"
colnames(new_temp)[11] <- "70-80%"
colnames(new_temp)[12] <- "80-90%"
colnames(new_temp)[13] <- "90-100%"

mtemp <- melt(new_temp, id=c("Identifier", "experiment", "Treatment"))
head(mtemp)

LRno_10_perc_plot <- ggerrorplot(mtemp, y = "value", x = "Treatment", fill="Treatment", color="Treatment", 
                        facet.by = "variable", ncol=10,  palette = c("#00AFBB", "#E7B800"),
                        desc_stat = "mean_sd", add = "jitter", 
                        add.params = list(color = "darkgray"),
                        xlab="", ylab="# LR per MR", title = "fraction of MR") 

LRno_10_perc_plot <- LRno_10_perc_plot + rremove("legend") + stat_compare_means(method="t.test", ref.group = "0mM", 
                                                              label = "p.signif", hide.ns = T)+ rotate_x_text(90)
LRno_10_perc_plot

colnames(final_decoded)

##  [1] "genotypes"     "Identifier"    "Accession"     "image"        
##  [5] "root"          "experiment"    "Treatment"     "length"       
##  [9] "Apical"        "Branched"      "Basal"         "Apical_perc"  
## [13] "Branched_perc" "Basal_perc"    "LR_no_10_100"  "LR_no_20_100" 
## [17] "LR_no_30_100"  "LR_no_40_100"  "LR_no_50_100"  "LR_no_60_100" 
## [21] "LR_no_70_100"  "LR_no_80_100"  "LR_no_90_100"  "LR_no_100_100"
## [25] "LR_no_25_4"    "LR_no_50_4"    "LR_no_75_4"    "LR_no_100_4"  
## [29] "LRL_10_100"    "LRL_20_100"    "LRL_30_100"    "LRL_40_100"   
## [33] "LRL_50_100"    "LRL_60_100"    "LRL_70_100"    "LRL_80_100"   
## [37] "LRL_90_100"    "LRL_100_100"   "LRL_25_4"      "LRL_50_4"     
## [41] "LRL_75_4"      "LRL_100_4"     "CoG"           "CoG_perc"

new_temp <- final_decoded[,c(2,6:7,29:38)]
head(new_temp)

colnames(new_temp)[4] <- "0-10%"
colnames(new_temp)[5] <- "10-20%"
colnames(new_temp)[6] <- "20-30%"
colnames(new_temp)[7] <- "30-40%"
colnames(new_temp)[8] <- "40-50%"
colnames(new_temp)[9] <- "50-60%"
colnames(new_temp)[10] <- "60-70%"
colnames(new_temp)[11] <- "70-80%"
colnames(new_temp)[12] <- "80-90%"
colnames(new_temp)[13] <- "90-100%"
mtemp <- melt(new_temp, id=c("Identifier", "experiment", "Treatment"))
head(mtemp)

LRL_10_perc_plot <- ggerrorplot(mtemp, y = "value", x = "Treatment", fill="Treatment", color="Treatment", 
                        facet.by = "variable", ncol=10,  palette = c("#00AFBB", "#E7B800"),
                        desc_stat = "mean_sd", add = "jitter", 
                        add.params = list(color = "darkgray"),
                        xlab="", ylab="Lateral Root Length (cm)", title = "fraction of MR") 

LRL_10_perc_plot <- LRL_10_perc_plot + rremove("legend") + stat_compare_means(method="t.test", ref.group = "0mM", 
                                                              label = "p.signif", hide.ns = T) + rotate_x_text(90)
LRL_10_perc_plot

Cool - again - above graphs look pretty cool - so let’s combine them into one figure:

pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.20_LR_distribution_25perc.pdf", width=8, height=8)

plot_grid(LRno_25_perc_plot, LRL_25_perc_plot,
          ncol = 1, align = "hv", labels=c("AUTO"), 
          label_size = 14)
dev.off()

## quartz_off_screen 
##                 2

pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.21_LR_distribution_10perc.pdf", width=10, height=8)

plot_grid(LRno_10_perc_plot, LRL_10_perc_plot,
          ncol = 1, align = "hv", labels=c("AUTO"), 
          label_size = 14)
dev.off()

## quartz_off_screen 
##                 2

Examining the variance across the experiments / genotypes

grand.means <- aggregate(CoG ~ Treatment, data = final_decoded, FUN = mean)
grand.means

colnames(final_decoded)

##  [1] "genotypes"     "Identifier"    "Accession"     "image"        
##  [5] "root"          "experiment"    "Treatment"     "length"       
##  [9] "Apical"        "Branched"      "Basal"         "Apical_perc"  
## [13] "Branched_perc" "Basal_perc"    "LR_no_10_100"  "LR_no_20_100" 
## [17] "LR_no_30_100"  "LR_no_40_100"  "LR_no_50_100"  "LR_no_60_100" 
## [21] "LR_no_70_100"  "LR_no_80_100"  "LR_no_90_100"  "LR_no_100_100"
## [25] "LR_no_25_4"    "LR_no_50_4"    "LR_no_75_4"    "LR_no_100_4"  
## [29] "LRL_10_100"    "LRL_20_100"    "LRL_30_100"    "LRL_40_100"   
## [33] "LRL_50_100"    "LRL_60_100"    "LRL_70_100"    "LRL_80_100"   
## [37] "LRL_90_100"    "LRL_100_100"   "LRL_25_4"      "LRL_50_4"     
## [41] "LRL_75_4"      "LRL_100_4"     "CoG"           "CoG_perc"

CoG_p_exp <- ggerrorplot(final_decoded, y = "CoG", x = "experiment", fill="experiment", color="experiment", 
                        facet.by = "Treatment", ncol=1,
                        desc_stat = "mean_sd", add = "jitter", 
                        add.params = list(color = "darkgray"),
                        xlab="Experiment", ylab="Center of Gravity (MR cm)") 
CoG_p_exp <- CoG_p_exp + geom_hline(
                          data = grand.means, aes(yintercept = CoG),
                          linetype = 2,
                          group = "Treatment")
CoG_p_exp <- CoG_p_exp + rremove("legend") + stat_compare_means(method="t.test", ref.group = ".all.", 
                                                              label = "p.signif", hide.ns = T)
CoG_p_exp

Since we have already done this for MR traits - I am not really going into more depth in here - I think the differences we see are mainly due to composition of accessions rather than some random effect

Comparing accession performance accross individual experiments:

We have to first subset things per experiment:

final1 <- subset(final_decoded, final_decoded$experiment == "G1")
final2 <- subset(final_decoded, final_decoded$experiment == "G2")
final3 <- subset(final_decoded, final_decoded$experiment == "G3")
final4 <- subset(final_decoded, final_decoded$experiment == "G4")
final5 <- subset(final_decoded, final_decoded$experiment == "G5")
final6 <- subset(final_decoded, final_decoded$experiment == "G6")
final7 <- subset(final_decoded, final_decoded$experiment == "G7")
final8 <- subset(final_decoded, final_decoded$experiment == "G8")

grand.means <- aggregate(CoG ~ Treatment, data = final1, FUN = mean)
CoG_rep_exp1 <- ggerrorplot(final1, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
                           facet.by = c("Treatment"), ncol=1,
                           desc_stat = "mean_sd", add = "jitter", 
                           add.params = list(color = "darkgray"),
                           xlab="", ylab="Center of Gravity (cm of MR)") 
CoG_rep_exp1 <- CoG_rep_exp1 + stat_compare_means(method="t.test", 
                                                ref.group = ".all.", label = "p.signif", 
                                                hide.ns = T) + rremove("legend")
CoG_rep_exp1 <- CoG_rep_exp1 + geom_hline(data = grand.means, aes(yintercept = CoG),
                                    linetype = 2,
                                    group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp1

grand.means <- aggregate(CoG ~ Treatment, data = final2, FUN = mean)
CoG_rep_exp2 <- ggerrorplot(final2, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
                            facet.by = c("Treatment"), ncol=1,
                            desc_stat = "mean_sd", add = "jitter", 
                            add.params = list(color = "darkgray"),
                            xlab="", ylab="Center of Gravity (cm of MR)") 
CoG_rep_exp2 <- CoG_rep_exp2 + stat_compare_means(method="t.test", 
                                                  ref.group = ".all.", label = "p.signif", 
                                                  hide.ns = T) + rremove("legend")
CoG_rep_exp2 <- CoG_rep_exp2 + geom_hline(data = grand.means, aes(yintercept = CoG),
                                          linetype = 2,
                                          group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp2

grand.means <- aggregate(CoG ~ Treatment, data = final3, FUN = mean)
CoG_rep_exp3 <- ggerrorplot(final3, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
                            facet.by = c("Treatment"), ncol=1,
                            desc_stat = "mean_sd", add = "jitter", 
                            add.params = list(color = "darkgray"),
                            xlab="", ylab="Center of Gravity (cm of MR)") 
CoG_rep_exp3 <- CoG_rep_exp3 + stat_compare_means(method="t.test", 
                                                  ref.group = ".all.", label = "p.signif", 
                                                  hide.ns = T) + rremove("legend")
CoG_rep_exp3 <- CoG_rep_exp3 + geom_hline(data = grand.means, aes(yintercept = CoG),
                                          linetype = 2,
                                          group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp3

## Warning: Computation failed in `stat_compare_means()`:
## not enough 'x' observations

## Warning: Removed 1 rows containing missing values (geom_segment).

grand.means <- aggregate(CoG ~ Treatment, data = final4, FUN = mean)
CoG_rep_exp4 <- ggerrorplot(final4, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
                            facet.by = c("Treatment"), ncol=1,
                            desc_stat = "mean_sd", add = "jitter", 
                            add.params = list(color = "darkgray"),
                            xlab="", ylab="Center of Gravity (cm of MR)") 
CoG_rep_exp4 <- CoG_rep_exp4 + stat_compare_means(method="t.test", 
                                                  ref.group = ".all.", label = "p.signif", 
                                                  hide.ns = T) + rremove("legend")
CoG_rep_exp4 <- CoG_rep_exp4 + geom_hline(data = grand.means, aes(yintercept = CoG),
                                          linetype = 2,
                                          group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp4

grand.means <- aggregate(CoG ~ Treatment, data = final5, FUN = mean)
CoG_rep_exp5 <- ggerrorplot(final5, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
                            facet.by = c("Treatment"), ncol=1,
                            desc_stat = "mean_sd", add = "jitter", 
                            add.params = list(color = "darkgray"),
                            xlab="", ylab="Center of Gravity (cm of MR)") 
CoG_rep_exp5 <- CoG_rep_exp5 + stat_compare_means(method="t.test", 
                                                  ref.group = ".all.", label = "p.signif", 
                                                  hide.ns = T) + rremove("legend")
CoG_rep_exp5 <- CoG_rep_exp5 + geom_hline(data = grand.means, aes(yintercept = CoG),
                                          linetype = 2,
                                          group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp5

grand.means <- aggregate(CoG ~ Treatment, data = final6, FUN = mean)
CoG_rep_exp6 <- ggerrorplot(final6, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
                            facet.by = c("Treatment"), ncol=1,
                            desc_stat = "mean_sd", add = "jitter", 
                            add.params = list(color = "darkgray"),
                            xlab="", ylab="Center of Gravity (cm of MR)") 
CoG_rep_exp6 <- CoG_rep_exp6 + stat_compare_means(method="t.test", 
                                                  ref.group = ".all.", label = "p.signif", 
                                                  hide.ns = T) + rremove("legend")
CoG_rep_exp6 <- CoG_rep_exp6 + geom_hline(data = grand.means, aes(yintercept = CoG),
                                          linetype = 2,
                                          group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp6

grand.means <- aggregate(CoG ~ Treatment, data = final7, FUN = mean)
CoG_rep_exp7 <- ggerrorplot(final7, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
                            facet.by = c("Treatment"), ncol=1,
                            desc_stat = "mean_sd", add = "jitter", 
                            add.params = list(color = "darkgray"),
                            xlab="", ylab="Center of Gravity (cm of MR)") 
CoG_rep_exp7 <- CoG_rep_exp7 + stat_compare_means(method="t.test", 
                                                  ref.group = ".all.", label = "p.signif", 
                                                  hide.ns = T) + rremove("legend")
CoG_rep_exp7 <- CoG_rep_exp7 + geom_hline(data = grand.means, aes(yintercept = CoG),
                                          linetype = 2,
                                          group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp7

grand.means <- aggregate(CoG ~ Treatment, data = final8, FUN = mean)
CoG_rep_exp8 <- ggerrorplot(final8, y = "CoG", x = "Identifier", fill="Identifier", color = "Identifier",
                            facet.by = c("Treatment"), ncol=1,
                            desc_stat = "mean_sd", add = "jitter", 
                            add.params = list(color = "darkgray"),
                            xlab="", ylab="Center of Gravity (cm of MR)") 
CoG_rep_exp8 <- CoG_rep_exp8 + stat_compare_means(method="t.test", 
                                                  ref.group = ".all.", label = "p.signif", 
                                                  hide.ns = T) + rremove("legend")
CoG_rep_exp8 <- CoG_rep_exp8 + geom_hline(data = grand.means, aes(yintercept = CoG),
                                          linetype = 2,
                                          group = "Treatment") + rotate_x_text() + font("xy.text", size = 8)
CoG_rep_exp8

## Warning: Computation failed in `stat_compare_means()`:
## not enough 'x' observations

## Warning: Removed 1 rows containing missing values (geom_segment).

pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.22_All_acc_check_CoG_per_exp.pdf", width=16, height=23)

plot_grid(CoG_rep_exp1, CoG_rep_exp2, CoG_rep_exp3, CoG_rep_exp4, CoG_rep_exp5, CoG_rep_exp6,
          CoG_rep_exp7, CoG_rep_exp8,
          ncol = 2, align = "hv", labels=c("AUTO"), 
          label_size = 14)

## Warning: Computation failed in `stat_compare_means()`:
## not enough 'x' observations

## Warning: Removed 1 rows containing missing values (geom_segment).

## Warning: Computation failed in `stat_compare_means()`:
## not enough 'x' observations

## Warning: Removed 1 rows containing missing values (geom_segment).

dev.off()

## quartz_off_screen 
##                 2

I also don’t thing that visualizing the distribution of LR in these kind of graphs make much sense. Thus…. Let’s summarize the data per accession and then have a look at correlation between the traits and between conditions

Summary statistics

Just like in previous notebook - I would like to remove one genotype called “T102964” it is only screened in exp. GA6 and consist only of one plant scored at salt while no plants were scored at control conditions

final_decoded$Identifier <- as.character(final_decoded$Identifier)
dim(final_decoded)

## [1] 1360   44

bye <- c("T102964 ")

final_decoded <- subset(final_decoded, !(final_decoded$Identifier %in% bye))
dim(final_decoded)

## [1] 1355   44

colnames(final_decoded)

##  [1] "genotypes"     "Identifier"    "Accession"     "image"        
##  [5] "root"          "experiment"    "Treatment"     "length"       
##  [9] "Apical"        "Branched"      "Basal"         "Apical_perc"  
## [13] "Branched_perc" "Basal_perc"    "LR_no_10_100"  "LR_no_20_100" 
## [17] "LR_no_30_100"  "LR_no_40_100"  "LR_no_50_100"  "LR_no_60_100" 
## [21] "LR_no_70_100"  "LR_no_80_100"  "LR_no_90_100"  "LR_no_100_100"
## [25] "LR_no_25_4"    "LR_no_50_4"    "LR_no_75_4"    "LR_no_100_4"  
## [29] "LRL_10_100"    "LRL_20_100"    "LRL_30_100"    "LRL_40_100"   
## [33] "LRL_50_100"    "LRL_60_100"    "LRL_70_100"    "LRL_80_100"   
## [37] "LRL_90_100"    "LRL_100_100"   "LRL_25_4"      "LRL_50_4"     
## [41] "LRL_75_4"      "LRL_100_4"     "CoG"           "CoG_perc"

unique(final_decoded$Identifier)

##   [1] "CR001 "               "CR077 "               "CR078 "              
##   [4] "CR079 "               "CR081 "               "CR082 "              
##   [7] "CR083 "               "CR084 "               "CR085 "              
##  [10] "CR086 "               "CR088 "               "CR028 "              
##  [13] "LA0147 "              "CR049 "               "CR056 "              
##  [16] "CR058 "               "CR064 "               "CR073 "              
##  [19] "CR075 "               "CR076 "               "CR089 "              
##  [22] "CR103 "               "CR104 "               "CR105 "              
##  [25] "CR106 "               "CR107 "               "CR108 "              
##  [28] "CR114 "               "CR110 "               "CR115 "              
##  [31] "CR117 "               "CR092 "               "CR288 "              
##  [34] "CR093 "               "CR095 "               "CR097 "              
##  [37] "CR098 "               "CR099 "               "CR100 "              
##  [40] "CR101 "               "CR118 "               "CR144 "              
##  [43] "CR145 "               "CR147 "               "CR148 "              
##  [46] "CR149 "               "CR152 "               "CR153 "              
##  [49] "CR154 "               "CR155 "               "CR156 "              
##  [52] "CR120 "               "CR122 "               "CR123 "              
##  [55] "CR124 "               "CR125 "               "CR130 "              
##  [58] "CR141 "               "CR142 "               "CR157 "              
##  [61] "CR205 "               "CR236 "               "CR238 "              
##  [64] "CR240 "               "CR241 "               "CR242 "              
##  [67] "CR246 "               "CR248 "               "CR250 "              
##  [70] "CR258 "               "CR158 "               "CR161 "              
##  [73] "CR166 "               "CR179 "               "CR181 "              
##  [76] "CR199 "               "CR202 "               "CR203 "              
##  [79] "CR263 "               "CR287 "               "CR289 "              
##  [82] "CR290 "               "CR291 "               "CR292 "              
##  [85] "CR293 "               "T102068 "             "T102950 "            
##  [88] "T102957 "             "CR264 "               "CR271 "              
##  [91] "CR274 "               "CR275 "               "CR276 "              
##  [94] "CR282 "               "CR283 "               "CR286 "              
##  [97] "T102958 "             "T102968 "             "T102969 "            
## [100] "T102970 "             "T102971 "             "T102972 "            
## [103] "T102973 "             "T102974 "             "T102975 "            
## [106] "T102977 "             "T102978 "             "T102959 "            
## [109] "T102961 "             "T102962 "             "T102963 "            
## [112] "T102965 "             "T102966 "             "T102967 "            
## [115] "T102979 "             "BGV5895 "             "BGV6071 "            
## [118] "BGV6767 "             "BGV6769 "             "BGV6827 "            
## [121] "BGV6931 "             "BGV7942 "             "BGV7979 "            
## [124] "BGV8036 "             "BGV8096 "             "T102980 "            
## [127] "T102982 "             "T102983 "             "T102984 "            
## [130] "BGV12627 "            "BGV13157 "            "BGV13161 "           
## [133] "BGV13945 "            "BGV8109 "             "BGV8219 "            
## [136] "BGV8221 "             "BGV8228 "             "BGV8230 "            
## [139] "Ferum "               "Stupicke Polni Rane "

now - lets calculate mean trait value for each accession for all of our traits

colnames(final_decoded)

##  [1] "genotypes"     "Identifier"    "Accession"     "image"        
##  [5] "root"          "experiment"    "Treatment"     "length"       
##  [9] "Apical"        "Branched"      "Basal"         "Apical_perc"  
## [13] "Branched_perc" "Basal_perc"    "LR_no_10_100"  "LR_no_20_100" 
## [17] "LR_no_30_100"  "LR_no_40_100"  "LR_no_50_100"  "LR_no_60_100" 
## [21] "LR_no_70_100"  "LR_no_80_100"  "LR_no_90_100"  "LR_no_100_100"
## [25] "LR_no_25_4"    "LR_no_50_4"    "LR_no_75_4"    "LR_no_100_4"  
## [29] "LRL_10_100"    "LRL_20_100"    "LRL_30_100"    "LRL_40_100"   
## [33] "LRL_50_100"    "LRL_60_100"    "LRL_70_100"    "LRL_80_100"   
## [37] "LRL_90_100"    "LRL_100_100"   "LRL_25_4"      "LRL_50_4"     
## [41] "LRL_75_4"      "LRL_100_4"     "CoG"           "CoG_perc"

geno.means <- summaryBy(Apical + Branched + Basal + Apical_perc + Branched_perc + Basal_perc + LR_no_10_100 + LR_no_20_100 + LR_no_30_100 + LR_no_40_100 + LR_no_50_100 + LR_no_60_100 + LR_no_70_100 + LR_no_80_100 + LR_no_90_100 + LR_no_100_100 + LR_no_25_4 + LR_no_50_4 + LR_no_75_4 + LR_no_100_4 + LRL_10_100 + LRL_20_100 + LRL_30_100 + LRL_40_100 + LRL_50_100 + LRL_60_100 + LRL_70_100 + LRL_80_100 + LRL_90_100 + LRL_100_100 + LRL_25_4 + LRL_50_4 + LRL_75_4 + LRL_100_4 + CoG + CoG_perc  ~ Treatment + Identifier, data = final_decoded)

geno.means.c <- subset(geno.means, geno.means$Treatment == "0mM")
head(geno.means.c)

geno.means.s <- subset(geno.means, geno.means$Treatment == "125mM")
head(geno.means.s)

Let’s use only the informative collumns and re-name them into “Control” > c and “Salt” > s:

colnames(geno.means.c) <- gsub(".mean", ".C", colnames(geno.means.c))
colnames(geno.means.s) <- gsub(".mean", ".S", colnames(geno.means.s))

colnames(geno.means.c)

##  [1] "Treatment"       "Identifier"      "Apical.C"        "Branched.C"     
##  [5] "Basal.C"         "Apical_perc.C"   "Branched_perc.C" "Basal_perc.C"   
##  [9] "LR_no_10_100.C"  "LR_no_20_100.C"  "LR_no_30_100.C"  "LR_no_40_100.C" 
## [13] "LR_no_50_100.C"  "LR_no_60_100.C"  "LR_no_70_100.C"  "LR_no_80_100.C" 
## [17] "LR_no_90_100.C"  "LR_no_100_100.C" "LR_no_25_4.C"    "LR_no_50_4.C"   
## [21] "LR_no_75_4.C"    "LR_no_100_4.C"   "LRL_10_100.C"    "LRL_20_100.C"   
## [25] "LRL_30_100.C"    "LRL_40_100.C"    "LRL_50_100.C"    "LRL_60_100.C"   
## [29] "LRL_70_100.C"    "LRL_80_100.C"    "LRL_90_100.C"    "LRL_100_100.C"  
## [33] "LRL_25_4.C"      "LRL_50_4.C"      "LRL_75_4.C"      "LRL_100_4.C"    
## [37] "CoG.C"           "CoG_perc.C"

colnames(geno.means.s)

##  [1] "Treatment"       "Identifier"      "Apical.S"        "Branched.S"     
##  [5] "Basal.S"         "Apical_perc.S"   "Branched_perc.S" "Basal_perc.S"   
##  [9] "LR_no_10_100.S"  "LR_no_20_100.S"  "LR_no_30_100.S"  "LR_no_40_100.S" 
## [13] "LR_no_50_100.S"  "LR_no_60_100.S"  "LR_no_70_100.S"  "LR_no_80_100.S" 
## [17] "LR_no_90_100.S"  "LR_no_100_100.S" "LR_no_25_4.S"    "LR_no_50_4.S"   
## [21] "LR_no_75_4.S"    "LR_no_100_4.S"   "LRL_10_100.S"    "LRL_20_100.S"   
## [25] "LRL_30_100.S"    "LRL_40_100.S"    "LRL_50_100.S"    "LRL_60_100.S"   
## [29] "LRL_70_100.S"    "LRL_80_100.S"    "LRL_90_100.S"    "LRL_100_100.S"  
## [33] "LRL_25_4.S"      "LRL_50_4.S"      "LRL_75_4.S"      "LRL_100_4.S"    
## [37] "CoG.S"           "CoG_perc.S"

geno.means.c <- geno.means.c[,c(2:38)]
geno.means.s <- geno.means.s[,c(2:38)]

Then - we fuse the avg trait values from both conditions together in one file and calculate STI for individual traits of interest:

STI <- merge(geno.means.c, geno.means.s, by="Identifier")
head(STI)

colnames(STI)

##  [1] "Identifier"      "Apical.C"        "Branched.C"      "Basal.C"        
##  [5] "Apical_perc.C"   "Branched_perc.C" "Basal_perc.C"    "LR_no_10_100.C" 
##  [9] "LR_no_20_100.C"  "LR_no_30_100.C"  "LR_no_40_100.C"  "LR_no_50_100.C" 
## [13] "LR_no_60_100.C"  "LR_no_70_100.C"  "LR_no_80_100.C"  "LR_no_90_100.C" 
## [17] "LR_no_100_100.C" "LR_no_25_4.C"    "LR_no_50_4.C"    "LR_no_75_4.C"   
## [21] "LR_no_100_4.C"   "LRL_10_100.C"    "LRL_20_100.C"    "LRL_30_100.C"   
## [25] "LRL_40_100.C"    "LRL_50_100.C"    "LRL_60_100.C"    "LRL_70_100.C"   
## [29] "LRL_80_100.C"    "LRL_90_100.C"    "LRL_100_100.C"   "LRL_25_4.C"     
## [33] "LRL_50_4.C"      "LRL_75_4.C"      "LRL_100_4.C"     "CoG.C"          
## [37] "CoG_perc.C"      "Apical.S"        "Branched.S"      "Basal.S"        
## [41] "Apical_perc.S"   "Branched_perc.S" "Basal_perc.S"    "LR_no_10_100.S" 
## [45] "LR_no_20_100.S"  "LR_no_30_100.S"  "LR_no_40_100.S"  "LR_no_50_100.S" 
## [49] "LR_no_60_100.S"  "LR_no_70_100.S"  "LR_no_80_100.S"  "LR_no_90_100.S" 
## [53] "LR_no_100_100.S" "LR_no_25_4.S"    "LR_no_50_4.S"    "LR_no_75_4.S"   
## [57] "LR_no_100_4.S"   "LRL_10_100.S"    "LRL_20_100.S"    "LRL_30_100.S"   
## [61] "LRL_40_100.S"    "LRL_50_100.S"    "LRL_60_100.S"    "LRL_70_100.S"   
## [65] "LRL_80_100.S"    "LRL_90_100.S"    "LRL_100_100.S"   "LRL_25_4.S"     
## [69] "LRL_50_4.S"      "LRL_75_4.S"      "LRL_100_4.S"     "CoG.S"          
## [73] "CoG_perc.S"

# OK - let's think which traits make sense to calculate STI? 
# Zones

STI$Apical.STI <- STI$Apical.S/STI$Apical.C
STI$Branched.STI <- STI$Branched.S/STI$Branched.C
STI$Basal.STI <- STI$Basal.S/STI$Basal.C

STI$Apical_perc.STI <- STI$Apical_perc.S/STI$Apical_perc.C
STI$Branched_perc.STI <- STI$Branched_perc.S/STI$Branched_perc.C
STI$Basal_perc.STI <- STI$Basal_perc.S/STI$Basal_perc.C

# CoG cm and CoG as percentage
STI$CoG.STI <- STI$CoG.S/STI$CoG.C
STI$CoG_perc.STI <- STI$CoG_perc.S/STI$CoG_perc.C


head(STI)

# Let's have a look how these graphs look like:

# Apical Zone
my_line <- median(STI$Apical.STI)
STI_graph_Apical <- ggplot(STI, aes(x = Identifier, y = Apical.STI))
STI_graph_Apical <- STI_graph_Apical + geom_bar(stat = "identity")
STI_graph_Apical <- STI_graph_Apical + xlab("") + ylab("Fraction of Control (Apical zone length)")
STI_graph_Apical <- STI_graph_Apical + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_Apical <- STI_graph_Apical + theme_classic()
STI_graph_Apical <- STI_graph_Apical + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_Apical

# Branched Zone
my_line <- median(STI$Branched.STI)
STI_graph_Branched <- ggplot(STI, aes(x = Identifier, y = Branched.STI))
STI_graph_Branched <- STI_graph_Branched + geom_bar(stat = "identity")
STI_graph_Branched <- STI_graph_Branched + xlab("") + ylab("Fraction of Control (Branched zone length)")
STI_graph_Branched <- STI_graph_Branched + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_Branched <- STI_graph_Branched + theme_classic()
STI_graph_Branched <- STI_graph_Branched + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_Branched

# Basal Zone
my_line <- median(STI$Basal.STI)
STI_graph_Basal <- ggplot(STI, aes(x = Identifier, y = Basal.STI))
STI_graph_Basal <- STI_graph_Basal + geom_bar(stat = "identity")
STI_graph_Basal <- STI_graph_Basal + xlab("") + ylab("Fraction of Control (Basal zone length)")
STI_graph_Basal <- STI_graph_Basal + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_Basal <- STI_graph_Basal + theme_classic()
STI_graph_Basal <- STI_graph_Basal + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_Basal

# Center of Gravity
my_line <- median(STI$CoG.STI)
STI_graph_CoG <- ggplot(STI, aes(x = Identifier, y = CoG.STI))
STI_graph_CoG <- STI_graph_CoG + geom_bar(stat = "identity")
STI_graph_CoG <- STI_graph_CoG + xlab("") + ylab("Fraction of Control (Center of Gravity)")
STI_graph_CoG <- STI_graph_CoG + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_CoG <- STI_graph_CoG + theme_classic()
STI_graph_CoG <- STI_graph_CoG + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_CoG

I think it would be good to combine this data with the MR data analyzed previously - and see how the changes in zonation and CoG are comparing with the absolute seedling’s root size

MR_data <- read.csv("GWAS_ready.csv")
head(MR_data)

MR_data$class <- ""

for(i in 1:nrow(MR_data)){
  if(MR_data$TRS_STI[i] < 0.5109159){
    MR_data$class[i] <- "super-bad"}
  if(MR_data$TRS_STI[i] > 0.5109159 & MR_data$TRS_STI[i] < 0.6815476){
    MR_data$class[i] <- "bad"}
  if(MR_data$TRS_STI[i] > 0.6815476 & MR_data$TRS_STI[i] < 0.8521793){
    MR_data$class[i] <- "good"}
  if(MR_data$TRS_STI[i] > 0.8521793){
    MR_data$class[i] <- "kick-ass"}
}
head(MR_data)

MR_data <- MR_data[,c(2:38)]

colnames(MR_data)[1] <- "Identifier"
all <- merge(MR_data, STI)
head(all)

all$class <- factor(all$class, levels=c("super-bad", "bad", "good", "kick-ass"))

all$Identifier = with(all, reorder(Identifier, TRS_STI, median))

Then - let’s re-plot again all the graphs from above:

# Apical Zone
my_line <- median(all$Apical.STI)
STI_graph_Apical <- ggplot(all, aes(x = Identifier, y = Apical.STI, fill = class))
STI_graph_Apical <- STI_graph_Apical + geom_bar(stat = "identity")
STI_graph_Apical <- STI_graph_Apical + xlab("") + ylab("Fraction of Control (Apical zone length)")
STI_graph_Apical <- STI_graph_Apical + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_Apical <- STI_graph_Apical + theme_classic()
STI_graph_Apical <- STI_graph_Apical + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_Apical

# Branched Zone
my_line <- median(all$Branched.STI)
STI_graph_Branched <- ggplot(all, aes(x = Identifier, y = Branched.STI, fill = class))
STI_graph_Branched <- STI_graph_Branched + geom_bar(stat = "identity")
STI_graph_Branched <- STI_graph_Branched + xlab("") + ylab("Fraction of Control (Branched zone length)")
STI_graph_Branched <- STI_graph_Branched + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_Branched <- STI_graph_Branched + theme_classic()
STI_graph_Branched <- STI_graph_Branched + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_Branched

# Basal Zone
my_line <- median(all$Basal.STI)
STI_graph_Basal <- ggplot(all, aes(x = Identifier, y = Basal.STI, fill = class))
STI_graph_Basal <- STI_graph_Basal + geom_bar(stat = "identity")
STI_graph_Basal <- STI_graph_Basal + xlab("") + ylab("Fraction of Control (Basal zone length)")
STI_graph_Basal <- STI_graph_Basal + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_Basal <- STI_graph_Basal + theme_classic()
STI_graph_Basal <- STI_graph_Basal + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_Basal

# Center of Gravity
my_line <- median(all$CoG.STI)
STI_graph_CoG <- ggplot(all, aes(x = Identifier, y = CoG.STI, fill = class))
STI_graph_CoG <- STI_graph_CoG + geom_bar(stat = "identity")
STI_graph_CoG <- STI_graph_CoG + xlab("") + ylab("Fraction of Control (Center of Gravity)")
STI_graph_CoG <- STI_graph_CoG + geom_hline(yintercept = my_line, linetype = 2, color = "lightgray")
STI_graph_CoG <- STI_graph_CoG + theme_classic()
STI_graph_CoG <- STI_graph_CoG + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
STI_graph_CoG

Let’s pull all these graphs into one figure too:

pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.23_SIT_of_branching.pdf", width=16, height=23)

plot_grid(STI_graph_Apical, STI_graph_Branched, STI_graph_Basal, STI_graph_CoG,
          ncol = 2, align = "hv", labels=c("AUTO"), 
          label_size = 14)
dev.off()

## quartz_off_screen 
##                 2

Correlations

Global correlations - making a graph bigger than life ;)

colnames(all)

##   [1] "Identifier"        "MRL_C"             "STR_C"            
##   [4] "noLR_C"            "LRD_C"             "LRL_C"            
##   [7] "aLRL_C"            "DIR_C"             "BasZ_C"           
##  [10] "BraZ_C"            "ApZ_C"             "aLRLpMRL_C"       
##  [13] "TRS_C"             "BasZpMRL_C"        "BraZpMRL_C"       
##  [16] "ApZpMRL_C"         "MRL_S"             "STR_S"            
##  [19] "noLR_S"            "LRD_S"             "LRL_S"            
##  [22] "aLRL_S"            "DIR_S"             "BasZ_S"           
##  [25] "BraZ_S"            "ApZ_S"             "aLRLpMRL_S"       
##  [28] "TRS_S"             "BasZpMRL_S"        "BraZpMRL_S"       
##  [31] "ApZpMRL_S"         "MRL_STI"           "LRL_STI"          
##  [34] "noLR_STI"          "aLRL_STI"          "TRS_STI"          
##  [37] "class"             "Apical.C"          "Branched.C"       
##  [40] "Basal.C"           "Apical_perc.C"     "Branched_perc.C"  
##  [43] "Basal_perc.C"      "LR_no_10_100.C"    "LR_no_20_100.C"   
##  [46] "LR_no_30_100.C"    "LR_no_40_100.C"    "LR_no_50_100.C"   
##  [49] "LR_no_60_100.C"    "LR_no_70_100.C"    "LR_no_80_100.C"   
##  [52] "LR_no_90_100.C"    "LR_no_100_100.C"   "LR_no_25_4.C"     
##  [55] "LR_no_50_4.C"      "LR_no_75_4.C"      "LR_no_100_4.C"    
##  [58] "LRL_10_100.C"      "LRL_20_100.C"      "LRL_30_100.C"     
##  [61] "LRL_40_100.C"      "LRL_50_100.C"      "LRL_60_100.C"     
##  [64] "LRL_70_100.C"      "LRL_80_100.C"      "LRL_90_100.C"     
##  [67] "LRL_100_100.C"     "LRL_25_4.C"        "LRL_50_4.C"       
##  [70] "LRL_75_4.C"        "LRL_100_4.C"       "CoG.C"            
##  [73] "CoG_perc.C"        "Apical.S"          "Branched.S"       
##  [76] "Basal.S"           "Apical_perc.S"     "Branched_perc.S"  
##  [79] "Basal_perc.S"      "LR_no_10_100.S"    "LR_no_20_100.S"   
##  [82] "LR_no_30_100.S"    "LR_no_40_100.S"    "LR_no_50_100.S"   
##  [85] "LR_no_60_100.S"    "LR_no_70_100.S"    "LR_no_80_100.S"   
##  [88] "LR_no_90_100.S"    "LR_no_100_100.S"   "LR_no_25_4.S"     
##  [91] "LR_no_50_4.S"      "LR_no_75_4.S"      "LR_no_100_4.S"    
##  [94] "LRL_10_100.S"      "LRL_20_100.S"      "LRL_30_100.S"     
##  [97] "LRL_40_100.S"      "LRL_50_100.S"      "LRL_60_100.S"     
## [100] "LRL_70_100.S"      "LRL_80_100.S"      "LRL_90_100.S"     
## [103] "LRL_100_100.S"     "LRL_25_4.S"        "LRL_50_4.S"       
## [106] "LRL_75_4.S"        "LRL_100_4.S"       "CoG.S"            
## [109] "CoG_perc.S"        "Apical.STI"        "Branched.STI"     
## [112] "Basal.STI"         "Apical_perc.STI"   "Branched_perc.STI"
## [115] "Basal_perc.STI"    "CoG.STI"           "CoG_perc.STI"

all_m <- as.matrix(all[,c(2:36,38:117)])

corr_A <- cor(all_m)
res1_A <- cor.mtest(all_m)
diag(corr_A) = NA
diag(res1_A$p) = NA


pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.24_Correlation_all.pdf", height = 20, width= 20)
corrplot(corr_A, type = "upper", p.mat = res1_A$p, insig = "label_sig",
         sig.level = c(.001, .01, .05), pch.cex = .9, pch.col = "black",
         na.label = "x", method = "color", tl.col = "black", tl.pos = "tp",
         tl.cex = 0.7, col=brewer.pal(n=8, name="PuOr"), mar = c(1, 1, 1, 1))
dev.off()

## quartz_off_screen 
##                 2

Let’s have a look at the correlation between the most interesting traits based on our understanding and interest ;)

# Performance based on TRS with the CoG performance

inter_corr <- ggscatter(all, x= "TRS_STI", y="CoG.STI", 
                        add = "reg.line", conf.int = F, palette = "jco", 
                        xlab = "TRS @ Salt / TRS @ Control", ylab = "CoG @ Salt / CoG @ Control")
inter_corr <- inter_corr + stat_cor() + rremove("legend")
inter_corr

## `geom_smooth()` using formula 'y ~ x'

# OK - maybe above is too far fetched - lets have a look with CoG @ Salt
inter_corr <- ggscatter(all, x= "TRS_STI", y="CoG.S", 
                        add = "reg.line", conf.int = F, palette = "jco", 
                        xlab = "TRS @ Salt / TRS @ Control", ylab = "CoG @ Salt")
inter_corr <- inter_corr + stat_cor() + rremove("legend")
inter_corr

## `geom_smooth()` using formula 'y ~ x'

# OK - we are starting to getting somewhere - we have a slight indication that LOWER center of gravity is corresponding to larger root system at salt stress
inter_corr <- ggscatter(all, x= "TRS_S", y="CoG.S", 
                        add = "reg.line", conf.int = F, palette = "jco", 
                        xlab = "TRS @ Salt", ylab = "CoG @ Salt")
inter_corr <- inter_corr + stat_cor() + rremove("legend")
inter_corr

## `geom_smooth()` using formula 'y ~ x'

# Do we then see an opposite relation with Apical zone? >> NOPE

inter_corr2 <- ggscatter(all, x= "TRS_S", y="Apical.S", 
                        add = "reg.line", conf.int = F, palette = "jco", 
                        xlab = "TRS @ Salt (cm)", ylab = "Apical zone length @ Salt (cm)")
inter_corr2 <- inter_corr2 + stat_cor() + rremove("legend")
inter_corr2

## `geom_smooth()` using formula 'y ~ x'

# How about Branched zone? >> YES!

inter_corr2 <- ggscatter(all, x= "TRS_S", y="Branched.S", 
                        add = "reg.line", conf.int = F, palette = "jco", 
                        xlab = "TRS @ Salt (cm)", ylab = "Branched zone length @ Salt (cm)")
inter_corr2 <- inter_corr2 + stat_cor() + rremove("legend")
inter_corr2

## `geom_smooth()` using formula 'y ~ x'

# How about Basal zone? >> YES!

inter_corr3 <- ggscatter(all, x= "TRS_S", y="Basal.S", 
                        add = "reg.line", conf.int = F, palette = "jco", 
                        xlab = "TRS @ Salt (cm)", ylab = "Basal zone length @ Salt (cm)")
inter_corr3 <- inter_corr3 + stat_cor() + rremove("legend")
inter_corr3

## `geom_smooth()` using formula 'y ~ x'

# And how is the Center of Gravity correlated between the CoG @ Control and @ Salt

inter_corr4 <- ggscatter(all, x= "CoG.C", y="CoG.S", 
                        add = "reg.line", conf.int = F, palette = "jco", 
                        xlab = "CoG @ Control (cm MR)", ylab = "CoG @ Salt (cm MR)")
inter_corr4 <- inter_corr4 + stat_cor() + rremove("legend")
inter_corr4

## `geom_smooth()` using formula 'y ~ x'

Let’s combine all these graphs into figure too:

pdf("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Magda_pheno_analysis/Fig.25_Correlation_most interesting traits.pdf", height = 10, width= 10)
plot_grid(inter_corr, inter_corr2, inter_corr3, inter_corr4,
          ncol = 2, align = "hv", labels=c("AUTO"), 
          label_size = 14)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

dev.off()

## quartz_off_screen 
##                 2

Prepare data for GWAS

There was previously an issue with some genotypes that were not in the genotyping file. So let’s re-evaluate which Identifiers to include ;)

old_GWAS<- read.csv("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/GWAS_Try1/Albert_Pheno.csv")
head(old_GWAS)

colnames(old_GWAS)[1] <- "Identifier"

what_you_want <- old_GWAS$Identifier

all$Identifier <- gsub(" ", "", all$Identifier)

all$Identifier %in% what_you_want

##   [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [13]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [25]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [37]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [49]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [61]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [73]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [85]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [97]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [109]  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [121]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [133]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE

dim(all)

## [1] 138 117

all_final <- subset(all, all$Identifier %in% what_you_want)
dim(all_final)

## [1] 135 117

colnames(all_final)

##   [1] "Identifier"        "MRL_C"             "STR_C"            
##   [4] "noLR_C"            "LRD_C"             "LRL_C"            
##   [7] "aLRL_C"            "DIR_C"             "BasZ_C"           
##  [10] "BraZ_C"            "ApZ_C"             "aLRLpMRL_C"       
##  [13] "TRS_C"             "BasZpMRL_C"        "BraZpMRL_C"       
##  [16] "ApZpMRL_C"         "MRL_S"             "STR_S"            
##  [19] "noLR_S"            "LRD_S"             "LRL_S"            
##  [22] "aLRL_S"            "DIR_S"             "BasZ_S"           
##  [25] "BraZ_S"            "ApZ_S"             "aLRLpMRL_S"       
##  [28] "TRS_S"             "BasZpMRL_S"        "BraZpMRL_S"       
##  [31] "ApZpMRL_S"         "MRL_STI"           "LRL_STI"          
##  [34] "noLR_STI"          "aLRL_STI"          "TRS_STI"          
##  [37] "class"             "Apical.C"          "Branched.C"       
##  [40] "Basal.C"           "Apical_perc.C"     "Branched_perc.C"  
##  [43] "Basal_perc.C"      "LR_no_10_100.C"    "LR_no_20_100.C"   
##  [46] "LR_no_30_100.C"    "LR_no_40_100.C"    "LR_no_50_100.C"   
##  [49] "LR_no_60_100.C"    "LR_no_70_100.C"    "LR_no_80_100.C"   
##  [52] "LR_no_90_100.C"    "LR_no_100_100.C"   "LR_no_25_4.C"     
##  [55] "LR_no_50_4.C"      "LR_no_75_4.C"      "LR_no_100_4.C"    
##  [58] "LRL_10_100.C"      "LRL_20_100.C"      "LRL_30_100.C"     
##  [61] "LRL_40_100.C"      "LRL_50_100.C"      "LRL_60_100.C"     
##  [64] "LRL_70_100.C"      "LRL_80_100.C"      "LRL_90_100.C"     
##  [67] "LRL_100_100.C"     "LRL_25_4.C"        "LRL_50_4.C"       
##  [70] "LRL_75_4.C"        "LRL_100_4.C"       "CoG.C"            
##  [73] "CoG_perc.C"        "Apical.S"          "Branched.S"       
##  [76] "Basal.S"           "Apical_perc.S"     "Branched_perc.S"  
##  [79] "Basal_perc.S"      "LR_no_10_100.S"    "LR_no_20_100.S"   
##  [82] "LR_no_30_100.S"    "LR_no_40_100.S"    "LR_no_50_100.S"   
##  [85] "LR_no_60_100.S"    "LR_no_70_100.S"    "LR_no_80_100.S"   
##  [88] "LR_no_90_100.S"    "LR_no_100_100.S"   "LR_no_25_4.S"     
##  [91] "LR_no_50_4.S"      "LR_no_75_4.S"      "LR_no_100_4.S"    
##  [94] "LRL_10_100.S"      "LRL_20_100.S"      "LRL_30_100.S"     
##  [97] "LRL_40_100.S"      "LRL_50_100.S"      "LRL_60_100.S"     
## [100] "LRL_70_100.S"      "LRL_80_100.S"      "LRL_90_100.S"     
## [103] "LRL_100_100.S"     "LRL_25_4.S"        "LRL_50_4.S"       
## [106] "LRL_75_4.S"        "LRL_100_4.S"       "CoG.S"            
## [109] "CoG_perc.S"        "Apical.STI"        "Branched.STI"     
## [112] "Basal.STI"         "Apical_perc.STI"   "Branched_perc.STI"
## [115] "Basal_perc.STI"    "CoG.STI"           "CoG_perc.STI"

Let’s add more traits to this final table and re-categorize class into four separate categories:

all_final$class_bad <- all_final$class == "bad"
all_final$class_bad <- as.integer(as.logical(all_final$class_bad))

all_final$class_superbad <- all_final$class == "super-bad"
all_final$class_superbad <- as.integer(as.logical(all_final$class_superbad))

all_final$class_good <- all_final$class == "good"
all_final$class_good <- as.integer(as.logical(all_final$class_good))

all_final$class_kickass <- all_final$class == "kick-ass"
all_final$class_kickass <- as.integer(as.logical(all_final$class_kickass))

colnames(all_final)

##   [1] "Identifier"        "MRL_C"             "STR_C"            
##   [4] "noLR_C"            "LRD_C"             "LRL_C"            
##   [7] "aLRL_C"            "DIR_C"             "BasZ_C"           
##  [10] "BraZ_C"            "ApZ_C"             "aLRLpMRL_C"       
##  [13] "TRS_C"             "BasZpMRL_C"        "BraZpMRL_C"       
##  [16] "ApZpMRL_C"         "MRL_S"             "STR_S"            
##  [19] "noLR_S"            "LRD_S"             "LRL_S"            
##  [22] "aLRL_S"            "DIR_S"             "BasZ_S"           
##  [25] "BraZ_S"            "ApZ_S"             "aLRLpMRL_S"       
##  [28] "TRS_S"             "BasZpMRL_S"        "BraZpMRL_S"       
##  [31] "ApZpMRL_S"         "MRL_STI"           "LRL_STI"          
##  [34] "noLR_STI"          "aLRL_STI"          "TRS_STI"          
##  [37] "class"             "Apical.C"          "Branched.C"       
##  [40] "Basal.C"           "Apical_perc.C"     "Branched_perc.C"  
##  [43] "Basal_perc.C"      "LR_no_10_100.C"    "LR_no_20_100.C"   
##  [46] "LR_no_30_100.C"    "LR_no_40_100.C"    "LR_no_50_100.C"   
##  [49] "LR_no_60_100.C"    "LR_no_70_100.C"    "LR_no_80_100.C"   
##  [52] "LR_no_90_100.C"    "LR_no_100_100.C"   "LR_no_25_4.C"     
##  [55] "LR_no_50_4.C"      "LR_no_75_4.C"      "LR_no_100_4.C"    
##  [58] "LRL_10_100.C"      "LRL_20_100.C"      "LRL_30_100.C"     
##  [61] "LRL_40_100.C"      "LRL_50_100.C"      "LRL_60_100.C"     
##  [64] "LRL_70_100.C"      "LRL_80_100.C"      "LRL_90_100.C"     
##  [67] "LRL_100_100.C"     "LRL_25_4.C"        "LRL_50_4.C"       
##  [70] "LRL_75_4.C"        "LRL_100_4.C"       "CoG.C"            
##  [73] "CoG_perc.C"        "Apical.S"          "Branched.S"       
##  [76] "Basal.S"           "Apical_perc.S"     "Branched_perc.S"  
##  [79] "Basal_perc.S"      "LR_no_10_100.S"    "LR_no_20_100.S"   
##  [82] "LR_no_30_100.S"    "LR_no_40_100.S"    "LR_no_50_100.S"   
##  [85] "LR_no_60_100.S"    "LR_no_70_100.S"    "LR_no_80_100.S"   
##  [88] "LR_no_90_100.S"    "LR_no_100_100.S"   "LR_no_25_4.S"     
##  [91] "LR_no_50_4.S"      "LR_no_75_4.S"      "LR_no_100_4.S"    
##  [94] "LRL_10_100.S"      "LRL_20_100.S"      "LRL_30_100.S"     
##  [97] "LRL_40_100.S"      "LRL_50_100.S"      "LRL_60_100.S"     
## [100] "LRL_70_100.S"      "LRL_80_100.S"      "LRL_90_100.S"     
## [103] "LRL_100_100.S"     "LRL_25_4.S"        "LRL_50_4.S"       
## [106] "LRL_75_4.S"        "LRL_100_4.S"       "CoG.S"            
## [109] "CoG_perc.S"        "Apical.STI"        "Branched.STI"     
## [112] "Basal.STI"         "Apical_perc.STI"   "Branched_perc.STI"
## [115] "Basal_perc.STI"    "CoG.STI"           "CoG_perc.STI"     
## [118] "class_bad"         "class_superbad"    "class_good"       
## [121] "class_kickass"

remove all unneccessary columns:

for_GWAS <- all_final[,c(1:36,38:121)]

then - add some more columns, because there is always more traits I can come up with :)

for_GWAS$LR_no_fract_10_100.C <- for_GWAS$LR_no_10_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_20_100.C <- for_GWAS$LR_no_20_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_30_100.C <- for_GWAS$LR_no_30_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_40_100.C <- for_GWAS$LR_no_40_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_50_100.C <- for_GWAS$LR_no_50_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_60_100.C <- for_GWAS$LR_no_60_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_70_100.C <- for_GWAS$LR_no_70_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_80_100.C <- for_GWAS$LR_no_80_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_90_100.C <- for_GWAS$LR_no_90_100.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_100_100.C <- for_GWAS$LR_no_100_100.C / for_GWAS$noLR_C

for_GWAS$LR_no_fract_10_100.S <- for_GWAS$LR_no_10_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_20_100.S <- for_GWAS$LR_no_20_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_30_100.S <- for_GWAS$LR_no_30_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_40_100.S <- for_GWAS$LR_no_40_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_50_100.S <- for_GWAS$LR_no_50_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_60_100.S <- for_GWAS$LR_no_60_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_70_100.S <- for_GWAS$LR_no_70_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_80_100.S <- for_GWAS$LR_no_80_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_90_100.S <- for_GWAS$LR_no_90_100.S / for_GWAS$noLR_S
for_GWAS$LR_no_fract_100_100.S <- for_GWAS$LR_no_100_100.S / for_GWAS$noLR_S

for_GWAS$LR_no_fract_25_4.C <- for_GWAS$LR_no_25_4.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_50_4.C <- for_GWAS$LR_no_50_4.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_75_4.C <- for_GWAS$LR_no_75_4.C / for_GWAS$noLR_C
for_GWAS$LR_no_fract_100_4.C <- for_GWAS$LR_no_100_4.C / for_GWAS$noLR_C

for_GWAS$LR_no_fract_25_4.S <- for_GWAS$LR_no_25_4.S / for_GWAS$noLR_C
for_GWAS$LR_no_fract_50_4.S <- for_GWAS$LR_no_50_4.S / for_GWAS$noLR_C
for_GWAS$LR_no_fract_75_4.S <- for_GWAS$LR_no_75_4.S / for_GWAS$noLR_C
for_GWAS$LR_no_fract_100_4.S <- for_GWAS$LR_no_100_4.S / for_GWAS$noLR_C

# LRL same

for_GWAS$LRL_fract_10_100.C <- for_GWAS$LRL_10_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_20_100.C <- for_GWAS$LRL_20_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_30_100.C <- for_GWAS$LRL_30_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_40_100.C <- for_GWAS$LRL_40_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_50_100.C <- for_GWAS$LRL_50_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_60_100.C <- for_GWAS$LRL_60_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_70_100.C <- for_GWAS$LRL_70_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_80_100.C <- for_GWAS$LRL_80_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_90_100.C <- for_GWAS$LRL_90_100.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_100_100.C <- for_GWAS$LRL_100_100.C / for_GWAS$LRL_C

for_GWAS$LRL_fract_10_100.S <- for_GWAS$LRL_10_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_20_100.S <- for_GWAS$LRL_20_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_30_100.S <- for_GWAS$LRL_30_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_40_100.S <- for_GWAS$LRL_40_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_50_100.S <- for_GWAS$LRL_50_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_60_100.S <- for_GWAS$LRL_60_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_70_100.S <- for_GWAS$LRL_70_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_80_100.S <- for_GWAS$LRL_80_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_90_100.S <- for_GWAS$LRL_90_100.S / for_GWAS$LRL_S
for_GWAS$LRL_fract_100_100.S <- for_GWAS$LRL_100_100.S / for_GWAS$LRL_S

for_GWAS$LRL_fract_25_4.C <- for_GWAS$LRL_25_4.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_50_4.C <- for_GWAS$LRL_50_4.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_75_4.C <- for_GWAS$LRL_75_4.C / for_GWAS$LRL_C
for_GWAS$LRL_fract_100_4.C <- for_GWAS$LRL_100_4.C / for_GWAS$LRL_C

for_GWAS$LRL_fract_25_4.S <- for_GWAS$LRL_25_4.S / for_GWAS$LRL_C
for_GWAS$LRL_fract_50_4.S <- for_GWAS$LRL_50_4.S / for_GWAS$LRL_C
for_GWAS$LRL_fract_75_4.S <- for_GWAS$LRL_75_4.S / for_GWAS$LRL_C
for_GWAS$LRL_fract_100_4.S <- for_GWAS$LRL_100_4.S / for_GWAS$LRL_C

OK - NOW we aree ultimately DONE :)))

Let’s save it, but we need to have ecot_ID as numbers only

Lets see how it worked last time with files from Arabidopsis script:

decoding_1 <- read.table("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Arabidopsis script GWAS/data files/df.traitgwa.txt", header=T)

decoding_2 <- read.table("/Users/magdalena/Dropbox/DataAndAnalysis/collaborations/Safarina Christa tomato GWAS/Arabidopsis script GWAS/data files/df.traitgwa.better.txt", header=T)

head(decoding_1)

head(decoding_2)

decoding_1 <- decoding_1$genotype
decoding_2 <- decoding_2$genotype
decoding_2

##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135

for_GWAS$ecot_ID <- decoding_2
colnames(for_GWAS)

##   [1] "Identifier"            "MRL_C"                 "STR_C"                
##   [4] "noLR_C"                "LRD_C"                 "LRL_C"                
##   [7] "aLRL_C"                "DIR_C"                 "BasZ_C"               
##  [10] "BraZ_C"                "ApZ_C"                 "aLRLpMRL_C"           
##  [13] "TRS_C"                 "BasZpMRL_C"            "BraZpMRL_C"           
##  [16] "ApZpMRL_C"             "MRL_S"                 "STR_S"                
##  [19] "noLR_S"                "LRD_S"                 "LRL_S"                
##  [22] "aLRL_S"                "DIR_S"                 "BasZ_S"               
##  [25] "BraZ_S"                "ApZ_S"                 "aLRLpMRL_S"           
##  [28] "TRS_S"                 "BasZpMRL_S"            "BraZpMRL_S"           
##  [31] "ApZpMRL_S"             "MRL_STI"               "LRL_STI"              
##  [34] "noLR_STI"              "aLRL_STI"              "TRS_STI"              
##  [37] "Apical.C"              "Branched.C"            "Basal.C"              
##  [40] "Apical_perc.C"         "Branched_perc.C"       "Basal_perc.C"         
##  [43] "LR_no_10_100.C"        "LR_no_20_100.C"        "LR_no_30_100.C"       
##  [46] "LR_no_40_100.C"        "LR_no_50_100.C"        "LR_no_60_100.C"       
##  [49] "LR_no_70_100.C"        "LR_no_80_100.C"        "LR_no_90_100.C"       
##  [52] "LR_no_100_100.C"       "LR_no_25_4.C"          "LR_no_50_4.C"         
##  [55] "LR_no_75_4.C"          "LR_no_100_4.C"         "LRL_10_100.C"         
##  [58] "LRL_20_100.C"          "LRL_30_100.C"          "LRL_40_100.C"         
##  [61] "LRL_50_100.C"          "LRL_60_100.C"          "LRL_70_100.C"         
##  [64] "LRL_80_100.C"          "LRL_90_100.C"          "LRL_100_100.C"        
##  [67] "LRL_25_4.C"            "LRL_50_4.C"            "LRL_75_4.C"           
##  [70] "LRL_100_4.C"           "CoG.C"                 "CoG_perc.C"           
##  [73] "Apical.S"              "Branched.S"            "Basal.S"              
##  [76] "Apical_perc.S"         "Branched_perc.S"       "Basal_perc.S"         
##  [79] "LR_no_10_100.S"        "LR_no_20_100.S"        "LR_no_30_100.S"       
##  [82] "LR_no_40_100.S"        "LR_no_50_100.S"        "LR_no_60_100.S"       
##  [85] "LR_no_70_100.S"        "LR_no_80_100.S"        "LR_no_90_100.S"       
##  [88] "LR_no_100_100.S"       "LR_no_25_4.S"          "LR_no_50_4.S"         
##  [91] "LR_no_75_4.S"          "LR_no_100_4.S"         "LRL_10_100.S"         
##  [94] "LRL_20_100.S"          "LRL_30_100.S"          "LRL_40_100.S"         
##  [97] "LRL_50_100.S"          "LRL_60_100.S"          "LRL_70_100.S"         
## [100] "LRL_80_100.S"          "LRL_90_100.S"          "LRL_100_100.S"        
## [103] "LRL_25_4.S"            "LRL_50_4.S"            "LRL_75_4.S"           
## [106] "LRL_100_4.S"           "CoG.S"                 "CoG_perc.S"           
## [109] "Apical.STI"            "Branched.STI"          "Basal.STI"            
## [112] "Apical_perc.STI"       "Branched_perc.STI"     "Basal_perc.STI"       
## [115] "CoG.STI"               "CoG_perc.STI"          "class_bad"            
## [118] "class_superbad"        "class_good"            "class_kickass"        
## [121] "LR_no_fract_10_100.C"  "LR_no_fract_20_100.C"  "LR_no_fract_30_100.C" 
## [124] "LR_no_fract_40_100.C"  "LR_no_fract_50_100.C"  "LR_no_fract_60_100.C" 
## [127] "LR_no_fract_70_100.C"  "LR_no_fract_80_100.C"  "LR_no_fract_90_100.C" 
## [130] "LR_no_fract_100_100.C" "LR_no_fract_10_100.S"  "LR_no_fract_20_100.S" 
## [133] "LR_no_fract_30_100.S"  "LR_no_fract_40_100.S"  "LR_no_fract_50_100.S" 
## [136] "LR_no_fract_60_100.S"  "LR_no_fract_70_100.S"  "LR_no_fract_80_100.S" 
## [139] "LR_no_fract_90_100.S"  "LR_no_fract_100_100.S" "LR_no_fract_25_4.C"   
## [142] "LR_no_fract_50_4.C"    "LR_no_fract_75_4.C"    "LR_no_fract_100_4.C"  
## [145] "LR_no_fract_25_4.S"    "LR_no_fract_50_4.S"    "LR_no_fract_75_4.S"   
## [148] "LR_no_fract_100_4.S"   "LRL_fract_10_100.C"    "LRL_fract_20_100.C"   
## [151] "LRL_fract_30_100.C"    "LRL_fract_40_100.C"    "LRL_fract_50_100.C"   
## [154] "LRL_fract_60_100.C"    "LRL_fract_70_100.C"    "LRL_fract_80_100.C"   
## [157] "LRL_fract_90_100.C"    "LRL_fract_100_100.C"   "LRL_fract_10_100.S"   
## [160] "LRL_fract_20_100.S"    "LRL_fract_30_100.S"    "LRL_fract_40_100.S"   
## [163] "LRL_fract_50_100.S"    "LRL_fract_60_100.S"    "LRL_fract_70_100.S"   
## [166] "LRL_fract_80_100.S"    "LRL_fract_90_100.S"    "LRL_fract_100_100.S"  
## [169] "LRL_fract_25_4.C"      "LRL_fract_50_4.C"      "LRL_fract_75_4.C"     
## [172] "LRL_fract_100_4.C"     "LRL_fract_25_4.S"      "LRL_fract_50_4.S"     
## [175] "LRL_fract_75_4.S"      "LRL_fract_100_4.S"     "ecot_ID"

for_GWAS2 <- for_GWAS[,c(177, 1:176)]
for_GWAS_only <- for_GWAS[,c(177,2:176)]

write.csv(for_GWAS2, "ultimate_GWAS_Safi.csv", row.names = F)
write.csv(for_GWAS_only, "ultimate_GWAS_only_Safi.csv", row.names = F)

Analysis of lateral root data from Safarina’s tomato experiment