Load libraries

library(reshape2)
library(ggplot2)
library(cowplot)
library(moments)
library(plyr)
library(dplyr)

Phenotype data process and transformation

Field canopy (temperature and other traits)

Load data

### Load entry 
DB <- read.csv("~/Library/CloudStorage/Box-Box/Projects/11_DIV/02_Phenotype/01_Canopy/DIV_BLUPS_12Days-Canopy.csv", 
               stringsAsFactors = FALSE, header = T, row.names = 1)
nrow(DB)
## [1] 382
head(DB)
Time <- c("temp_189","temp_196", "temp_204", "temp_210", "temp_217", "temp_224", 
          "temp_231","temp_238", "temp_245", "temp_253", "temp_259", "temp_267", 
          "temp_33_189", "temp_33_196", "temp_33_204", "temp_33_210", "temp_33_217", "temp_33_224", 
          "temp_33_231","temp_33_238", "temp_33_245", "temp_33_253", "temp_33_259", "temp_33_267",
          "temp_10_189", "temp_10_196", "temp_10_204", "temp_10_210", "temp_10_217", "temp_10_224", 
          "temp_10_231","temp_10_238", "temp_10_245", "temp_10_253", "temp_10_259", "temp_10_267",
          "CTD_189", "CTD_196", "CTD_204", "CTD_210", "CTD_217", "CTD_224", 
          "CTD_231","CTD_238", "CTD_245", "CTD_253", "CTD_259", "CTD_267")

Plot all raw data and transformed data together

for (i in 1:length(Time)){

  temp.df <- DB[, grep(paste0(Time[i],"_"), colnames(DB), value = F)]
  temp.df$Accession <- rownames(temp.df)
  melt.df <- melt(temp.df, id.vars = "Accession", 
     variable.name = "condition", value.name = "Sum.Area")
  ### Add the calculation
  melt.df$Log10.Sum.area <- log10(abs(melt.df$Sum.Area))
  melt.df$Sqrt.Sum.area <- sqrt(abs(melt.df$Sum.Area))
  
  ### Calculate the mean for each group
  temp.mean1 <- ddply(melt.df %>% na.omit(), "condition", summarise, grp.mean=mean(Sum.Area))
  temp.mean2 <- ddply(melt.df %>% na.omit(), "condition", summarise, grp.mean.log=mean(Log10.Sum.area))
  temp.mean3 <- ddply(melt.df %>% na.omit(), "condition", summarise, grp.mean.sqrt=mean(Sqrt.Sum.area))
  
 A <-  ggplot(melt.df %>% filter(!grepl("both", condition, ignore.case = TRUE)) %>% na.omit(), 
              aes(x=Sum.Area, color = condition, fill = condition)) +
       geom_histogram(aes(y=..density..),alpha = 0.8)+
       scale_fill_manual(values=c("lightblue", "lightpink"))+
       scale_color_manual(values=c("darkblue", "red")) +
       theme_classic() +
       theme(legend.position = "bottom") +
       geom_density(alpha = 0.2)
 
 
  B  <-  ggplot(melt.df %>% filter(!grepl("both", condition, ignore.case = TRUE)) %>% na.omit(), 
              aes(x=Log10.Sum.area, color = condition, fill = condition)) +
       geom_histogram(aes(y=..density..),alpha = 0.8)+
       scale_fill_manual(values=c("lightblue", "lightpink"))+
       scale_color_manual(values=c("darkblue", "red")) +
       theme_classic() +
       theme(legend.position = "bottom") +
       geom_density(alpha = 0.2)
  
    C  <-  ggplot(melt.df %>% filter(!grepl("both", condition, ignore.case = TRUE)) %>% na.omit(), 
              aes(x=Sqrt.Sum.area, color = condition, fill = condition)) +
       geom_histogram(aes(y=..density..),alpha = 0.8)+
       scale_fill_manual(values=c("lightblue", "lightpink"))+
       scale_color_manual(values=c("darkblue", "red")) +
       theme_classic() +
       theme(legend.position = "bottom") +
       geom_density(alpha = 0.2)

   
   D <- ggplot(melt.df %>% filter(grepl("both", condition, ignore.case = TRUE)) %>% na.omit(), 
              aes(x=Sum.Area, color = condition, fill = condition)) +
       geom_histogram(aes(y=..density..),alpha = 0.8)+
       scale_fill_manual(values=c("grey70"))+
       scale_color_manual(values=c("grey30")) +
       theme_classic() +
       theme(legend.position = "bottom") +
       geom_density(alpha = 0.2)
   
   E <-  ggplot(melt.df %>% filter(grepl("both", condition, ignore.case = TRUE)) %>% na.omit(), 
              aes(x=Log10.Sum.area, color = condition, fill = condition)) +
       geom_histogram(aes(y=..density..),alpha = 0.8)+
       scale_fill_manual(values=c("grey70"))+
       scale_color_manual(values=c("grey30")) +
       theme_classic() +
       theme(legend.position = "bottom") +
       geom_density(alpha = 0.2)
   
   G <- ggplot(melt.df %>% filter(grepl("both", condition, ignore.case = TRUE)) %>% na.omit(), 
              aes(x=Sqrt.Sum.area, color = condition, fill = condition)) +
       geom_histogram(aes(y=..density..),alpha = 0.8)+
       scale_fill_manual(values=c("grey70"))+
       scale_color_manual(values=c("grey30")) +
       theme_classic() +
       theme(legend.position = "bottom") +
       geom_density(alpha = 0.2)
   
   ### Combine all plots for each day
   pdf(paste0("~/Library/CloudStorage/Box-Box/Projects/11_DIV/02_Phenotype/01_Canopy/02_Pheno-Nomality/Canopy_",Time[i], ".pdf"), 
       height = 10, width = 15)
   
    print(plot_grid(A, B, C, D, E, G, labels = c('A', 'B', 'C', 'D', 'E', 'G'), label_size = 12))
    dev.off()
   
}

Export statistical profile

for (i in 1:length(Time)){
  
  temp.df <- DB[, grep(paste0(Time[i],"_"), colnames(DB), value = F)]
  temp.df$Accession <- rownames(temp.df)

    ### Calculate the transformation
   temp.df$WL.sqrt <- sqrt(abs(temp.df[,1]))
   temp.df$WW.sqrt <- sqrt(abs(temp.df[,2]))
   temp.df$WL.log10 <- log10(abs(temp.df[,1]))
   temp.df$WW.log10 <- log10(abs(temp.df[,2]))
   temp.df$both.log10 <- log10(abs(temp.df[,3]))
   temp.df$both.sqrt <- sqrt(abs(temp.df[,3]))
   
   temp.clean <- temp.df[, colnames(temp.df) != "Accession"]
  
    ### Create an empty dataframe with specified row numbers and column names
    sum.df <- data.frame(matrix(nrow = 9, ncol = 7))
    colnames(sum.df) <- c("term", "shapiro.test(P)", "skewness", "kurtosis",
                    "Mean", "Media", "Delta")
    
    for (x in 1: nrow(sum.df)){
      sum.df[x,1] <- colnames(temp.clean[,x, drop = F])
      S.test <- shapiro.test(temp.clean[,x] %>% na.omit())
      sum.df[x,2] <- S.test[2]$p.value
      sum.df[x,3] <- skewness(temp.clean[,1] %>% na.omit())
      sum.df[x,4] <- kurtosis(temp.clean[,x] %>% na.omit())
      sum.df[x,5] <- mean(temp.clean[,x] %>% na.omit())
      sum.df[x,6] <- median(temp.clean[,x] %>% na.omit())
      sum.df[x,7] <- sum.df[x,6] - sum.df[x,5]
    }
    
    sum.df$Day <- Time[i]
    assign(paste0("Normal-distribution_statistics_", Time[i]), sum.df)
}

### Combine all list 
Stats_list <- lapply(ls(pattern = "Normal-distribution_statistics_"), get)
Normal_distribution_Stats <- bind_rows(Stats_list)
head(Normal_distribution_Stats, 30)
### Export report files 
write.csv(Normal_distribution_Stats, 
          "~/Library/CloudStorage/Box-Box/Projects/11_DIV/02_Phenotype/01_Canopy/01_Normality-stats-canopy-temperature-daily.csv",quote = F)

Prepare phenotype files for GWAS

FIBER DATA

Reformat the data

data <- read.csv("~/Library/CloudStorage/Box-Box/Projects/11_DIV/02_Phenotype/02_Fiber/Seed-Fiber_blups_results-clean.csv", header = F)

fiber_index <- read.csv("~/Library/CloudStorage/Box-Box/Projects/11_DIV/02_Phenotype/02_Fiber/Traits/fiber-index.csv", header = T)
fiber_index
base_columns <- c(1, 2)

# Loop through the additional columns and save subtables
for (i in 1:nrow(fiber_index)) {
  # Extract the subtable with base columns and the current additional column
  subtable <- data[, c(base_columns, fiber_index[i,1])]
  
  # Generate the file name dynamically
  file_name <- paste0(fiber_index[i,2], ".txt")
  
  # Save the subtable to a text file
  write.table(subtable, file =     
              paste0("~/Library/CloudStorage/Box-Box/Projects/11_DIV/02_Phenotype/02_Fiber/", file_name), 
              sep = "\t",  row.names = FALSE, col.names = FALSE, quote = FALSE)
}

CANOPY DATA

Reformat the data

data.canopy <- read.csv("~/Library/CloudStorage/Box-Box/Projects/11_DIV/02_Phenotype/01_Canopy/DIV_BLUPS_12Days-Canopy-clean.csv", header = F)
head(data.canopy)
canopy_index <- read.csv("~/Library/CloudStorage/Box-Box/Projects/11_DIV/02_Phenotype/01_Canopy/03_Traits/canopy-index.csv", header = T)
canopy_index
base_columns <- c(1, 2)

# Loop through the additional columns and save subtables
for (i in 1:nrow(canopy_index)) {
  # Extract the subtable with base columns and the current additional column
  subtable <- data.canopy[, c(base_columns, canopy_index[i,1])]
  
  # Generate the file name dynamically
  file_name <- paste0(canopy_index[i,2], ".txt")
  
  # Save the subtable to a text file
  write.table(subtable, file =     
              paste0("~/Library/CloudStorage/Box-Box/Projects/11_DIV/02_Phenotype/01_Canopy/03_Traits/", file_name), 
              sep = "\t",  row.names = FALSE, col.names = FALSE, quote = FALSE)
}