This notebook describes the haplotype analysis of rice loci identified by means of GWAS by Martina Huber, Prof. Ronald Pierink’s group.
library(tidyr)
## Registered S3 methods overwritten by 'tibble':
## method from
## format.tbl pillar
## print.tbl pillar
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("ggplot2")
library("doBy")
##
## Attaching package: 'doBy'
## The following object is masked from 'package:dplyr':
##
## order_by
library("reshape2")
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(ggbeeswarm)
library(ggpubr)
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:cowplot':
##
## stamp
## The following objects are masked from 'package:reshape2':
##
## colsplit, melt, recast
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
library(multcompView)
First - upload the genotype matrix file:
Locus_A <- read.table("New_LA.GT.FORMAT", header=T)
LA_GT <- Locus_A
head(LA_GT)
We need to get the genotype names translated into the names that correspond with the phenotype file:
decode <- read.csv("new_haplotypes/accession index_HYDRA-ID_for_Magda.csv")
head(decode)
decode <- decode[,c(2:7)]
decode$VCF_better <- gsub("@", ".", decode$VCF_code)
# check how many are overlapping for sanity check
yes_col <- decode$VCF_better %in% colnames(LA_GT)
yes_col
## [1] TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE
## [13] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
## [61] TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## [169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE
## [193] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
## [205] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [217] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE
## [229] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [253] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [265] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
## [277] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [289] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [301] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [313] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [325] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [337] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
length(yes_col [yes_col == TRUE])
## [1] 324
then - we need to translate the 0/0 and 1/1 into A and T - as all the SNPs in here should be homozygous and bi-allelic
sapply(LA_GT$IRGC121316.c88dcbba.0, class)
## 0/0 1/1 0/0 0/0 1/1
## "character" "character" "character" "character" "character"
head(LA_GT)
unique(LA_GT$IRGC121491)
## [1] "0/0" "./." "1/1"
# current order is
# ./. 0/0 0/1 1/1
# and we need to change it into:
# "NA", "A", "N.A", "T" * mainly because we are not able to do haplotypes on biallelic SNPs - therefore they get classified as "NAs" too
# also - we cannot have two values converging into "NA", so we add "N.A", which we subsequently replace by "NA" in the last line
# therefore we loop it for all collumns:
for(e in 3:ncol(LA_GT)){
LA_GT[,e] <- gsub("0/0", "A", LA_GT[,e])
LA_GT[,e] <- gsub("1/1", "T", LA_GT[,e])
LA_GT[,e] <- gsub("./.", "NA", LA_GT[,e])
}
head(LA_GT)
Then let’s subset into the accessions exclusively present in the phenotyping file:
pheno <- read.csv("new_haplotypes/14traits_SI_norm.csv")
# make sure that the column names are the same
colnames(pheno)
## [1] "index.pots" "HDRA.ID" "NSFTV.ID"
## [4] "IRGC.ID" "Variety" "Subpop"
## [7] "Shoot.area..cm2." "Hull.area..cm2." "Solidity"
## [10] "Perimeter..cm." "Nr.leaves" "Nr.tillers"
## [13] "Plant.height..cm." "Culm.height..cm." "Leaf.length..cm."
## [16] "DW..g." "Leaf.angle..º." "Tiller.angle..º."
## [19] "Erectness..º." "Droopiness..º." "SUM_rank_traits"
## [22] "SUM_norm_traits" "norm_SI_rank" "SI_rank"
# "index.pots" "HDRA.ID" "NSFTV.ID "IRGC.ID" "Variety"
colnames(decode)
## [1] "HDRA.genotype.assay.ID" "NSFTV.ID" "IRGC.ID"
## [4] "Accession.Name" "Subpop" "VCF_code"
## [7] "VCF_better"
# "HDRA.ID" "NSFTV.ID" "IRGC.ID" "Accession.Name"
colnames(decode)[1] <- "HDRA.ID"
head(pheno)
decode
dim(pheno)
## [1] 355 24
dim(decode)
## [1] 343 7
pheno$HDRA.ID %in% decode$HDRA.ID
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [193] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [205] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [217] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [229] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [253] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [265] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [277] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [289] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [301] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [313] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [325] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [337] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
## [349] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
pheno$NSFTV.ID %in% decode$NSFTV.ID
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [193] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [205] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [217] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [229] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [253] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [265] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [277] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [289] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [301] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [313] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [325] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [337] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
## [349] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
pheno$IRGC.ID %in% decode$IRGC.ID
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [193] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [205] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [217] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [229] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [253] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [265] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [277] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [289] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [301] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [313] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [325] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [337] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE
## [349] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# Merge two data sets
pheno2 <- merge(decode, pheno, id=c("HDRA.ID", "NSFTV.ID", "IRGC.ID"))
head(pheno2)
dim(pheno)
## [1] 355 24
dim(decode)
## [1] 343 7
pheno2
All looks good! Now - we need to subset the SNP information based on the IDs present in the phenotype file:
pheno_id <- pheno2$VCF_better
unique(pheno_id)
## [1] "NSFTV81.006dfe9b.0" "NSFTV39.014ab0f1.0" "NSFTV132.02cc7c6d.0"
## [4] "NSFTV286.044639de.0" "NSFTV643.0491646d.0" "NSFTV32.068b860d.0"
## [7] "NSFTV358.0764178c.0" "NSFTV275.07dac217.0" "NSFTV269.092f15e7.0"
## [10] "NSFTV256.0be20a08.0" "NSFTV6.0d125c0e.0" "NSFTV324.0defa551.0"
## [13] "NSFTV287.0e787735.0" "NSFTV126.0f6a67da.0" "NSFTV110.11bf5114.0"
## [16] "NSFTV355.122a975b.0" "NSFTV150.128dd425.0" "NSFTV72.12d82364.0"
## [19] "NSFTV311.137628a5.0" "NSFTV235.13ae4f27.0" "NSFTV216.149fb038.0"
## [22] "NSFTV364.14e43b02.0" "NSFTV323.1510e4aa.0" "NSFTV277.1516d75f.0"
## [25] "NSFTV177.15e6c437.0" "NSFTV105.16463092.0" "NSFTV278.17627a92.0"
## [28] "NSFTV145.17c4070a.0" "NSFTV391.180a155f.0" "NSFTV253.1911c363.0"
## [31] "NSFTV163.192e24ab.0" "NSFTV228.195567bf.0" "NSFTV27.1956dd3f.0"
## [34] "NSFTV219.199f4455.0" "NSFTV138.1a946dc6.0" "NSFTV133.1a95985b.0"
## [37] "NSFTV65.1ab838a2.0" "NSFTV25.1ce7093b.0" "NSFTV335.1e26852c.0"
## [40] "NSFTV328.1e4f3933.0" "NSFTV396.1eb5d579.0" "NSFTV100.1f10be3d.0"
## [43] "NSFTV321.1f6f13c3.0" "NSFTV394.1f856ac1.0" "NSFTV94.20a4c97d.0"
## [46] "NSFTV49.21d3f1b3.0" "NSFTV333.22e60af9.0" "NSFTV380.238f25f1.0"
## [49] "NSFTV36.2593e5a7.0" "NSFTV201.263f2baf.0" "NSFTV377.2779bba9.0"
## [52] "NSFTV121.280279b3.0" "NSFTV241.2a7724c1.0" "NSFTV9.2a9d8d47.0"
## [55] "NSFTV162.2b28e441.0" "NSFTV33.2b4e06fe.0" "NSFTV306.2c583eed.0"
## [58] "NSFTV157.2db62b89.0" "NSFTV10.2e1c9c87.0" "NSFTV29.30646147.0"
## [61] "NSFTV8.30c3073f.0" "NSFTV221.311aaf30.0" "NSFTV144.3269952e.0"
## [64] "NSFTV85.328163e1.0" "NSFTV26.32a15c4d.0" "NSFTV96.32a6808e.0"
## [67] "NSFTV234.32ca1759.0" "NSFTV205.348853a5.0" "NSFTV214.34df4fca.0"
## [70] "NSFTV206.37249a74.0" "NSFTV67.39dd7feb.0" "NSFTV119.3aa51818.0"
## [73] "NSFTV385.3b25c24f.0" "NSFTV195.3c10a4b8.0" "NSFTV329.3e58f34c.0"
## [76] "NSFTV143.3ea144c5.0" "NSFTV318.3eb8fab5.0" "NSFTV255.3fb62843.0"
## [79] "NSFTV153.40f343f4.0" "NSFTV651.432387a8.0" "NSFTV17.446f6c62.0"
## [82] "NSFTV243.44aeff79.0" "NSFTV186.4559ff3f.0" "NSFTV397.45d3c920.0"
## [85] "NSFTV77.45ec7861.0" "NSFTV178.480f7505.0" "NSFTV147.48b4f132.0"
## [88] "NSFTV639.48d3bc66.0" "NSFTV273.493f2b7e.0" "NSFTV231.49649a00.0"
## [91] "NSFTV340.4b0a7350.0" "NSFTV345.4e35b58a.0" "NSFTV386.4f4f777a.0"
## [94] "NSFTV161.4f643bf5.0" "NSFTV239.523d5ec4.0" "NSFTV22.52c6e2ba.0"
## [97] "NSFTV112.531e23fa.0" "NSFTV226.536afc14.0" "NSFTV209.5384be83.0"
## [100] "NSFTV265.544f48c0.0" "NSFTV211.5497b233.0" "NSFTV179.5505a767.0"
## [103] "NSFTV5.5533f406.0" "NSFTV74.55d3afae.0" "NSFTV193.5632be21.0"
## [106] "NSFTV252.56f5103e.0" "NSFTV51.57d7feea.0" "NSFTV280.58d5bdc3.0"
## [109] "NSFTV308.593def74.0" "NSFTV282.5aa7968d.0" "NSFTV117.5b144b9c.0"
## [112] "NSFTV390.5c592759.0" "NSFTV92.5df8f871.0" "NSFTV3.5ef1be74.0"
## [115] "NSFTV244.5efe8622.0" "NSFTV258.60c362f1.0" "NSFTV187.60e142c3.0"
## [118] "NSFTV166.61152244.0" "NSFTV53.61b7bf53.0" "NSFTV71.61cc3858.0"
## [121] "NSFTV54.06334433.0" "NSFTV125.63f298ba.0" "NSFTV279.642238d9.0"
## [124] "NSFTV87.64fa0112.0" "NSFTV158.6516a500.0" "NSFTV56.652530bb.0"
## [127] "NSFTV13.660f0236.0" "NSFTV189.661eaeaa.0" "NSFTV78.66d97672.0"
## [130] "NSFTV270.6815ec6d.0" "NSFTV46.68c2ecf8.0" "NSFTV37.69527f35.0"
## [133] "NSFTV98.6ab77e3e.0" "NSFTV20.6af7c9fc.0" "NSFTV283.6b37cb3d.0"
## [136] "NSFTV116.6cedf6aa.0" "NSFTV102.6e26f4cc.0" "NSFTV360.6f068769.0"
## [139] "NSFTV240.6f263bd1.0" "NSFTV184.7119ebee.0" "NSFTV389.7126c359.0"
## [142] "NSFTV123.714ac141.0" "NSFTV118.71bd9426.0" "NSFTV57.71d5fcf4.0"
## [145] "NSFTV164.72497775.0" "NSFTV322.72cef953.0" "NSFTV366.73b20824.0"
## [148] "NSFTV303.73c4fdfb.0" "NSFTV90.743f41c1.0" "NSFTV315.74c9fbbc.0"
## [151] "NSFTV66.7691a376.0" "NSFTV128.76a1efc9.0" "NSFTV40.76f56677.0"
## [154] "NSFTV304.773e969e.0" "NSFTV23.7741d7c1.0" "NSFTV204.778cea6e.0"
## [157] "NSFTV113.7a723d9e.0" "NSFTV107.7b7a0d82.0" "NSFTV84.7bee5b9f.0"
## [160] "NSFTV222.7cfe4a0c.0" "NSFTV44.7d4c6c4e.0" "NSFTV274.7dba928e.0"
## [163] "NSFTV332.7e956f22.0" "NSFTV99.7eb7c6a8.0" "NSFTV60.7f7fb805.0"
## [166] "NSFTV289.7fc911f7.0" "NSFTV142.806c51cc.0" "NSFTV346.80fc89ae.0"
## [169] "NSFTV198.818143d8.0" "NSFTV4.81d03b86.0" "NSFTV305.8288d278.0"
## [172] "NSFTV371.84ad8457.0" "NSFTV347.853e318c.0" "NSFTV140.85551f9c.0"
## [175] "NSFTV281.85959164.0" "NSFTV218.85da3a70.0" "NSFTV104.8629f76c.0"
## [178] "NSFTV137.8653bbdb.0" "NSFTV301.86556239.0" "NSFTV313.872ec0b4.0"
## [181] "NSFTV249.87621330.0" "NSFTV641.88631879.0" "NSFTV250.88e1f162.0"
## [184] "NSFTV89.897c5eef.0" "NSFTV106.8a06320f.0" "NSFTV245.8a41a10a.0"
## [187] "NSFTV350.8adbe877.0" "NSFTV103.8c76404c.0" "NSFTV181.8cd35626.0"
## [190] "NSFTV176.8cfa8fb2.0" "NSFTV45.8d6aded4.0" "NSFTV120.8e6220e5.0"
## [193] "NSFTV167.8e941960.0" "NSFTV129.8fafd383.0" "NSFTV155.9127f236.0"
## [196] "NSFTV217.915c0033.0" "NSFTV156.93387d96.0" "NSFTV199.958ed737.0"
## [199] "NSFTV384.9764b3c8.0" "NSFTV191.97ce6839.0" "NSFTV73.97dcf87d.0"
## [202] "NSFTV334.09869491.0" "NSFTV642.986a3c37.0" "NSFTV172.9ac72c9e.0"
## [205] "NSFTV165.9b2223bb.0" "NSFTV248.9b37187d.0" "NSFTV285.9baa653c.0"
## [208] "NSFTV58.9c099b78.0" "NSFTV93.9f1f4614.0" "NSFTV101.9f782e9e.0"
## [211] "NSFTV341.9f786a86.0" "NSFTV387.a08839ce.0" "NSFTV344.a0f35768.0"
## [214] "NSFTV225.a31b4a06.0" "NSFTV284.a32e0586.0" "NSFTV348.a446becd.0"
## [217] "NSFTV349.a5a6e2dd.0" "NSFTV59.a5ec1b10.0" "NSFTV130.a796716d.0"
## [220] "NSFTV227.a7983b03.0" "NSFTV337.a7bec464.0" "NSFTV141.a8319fc6.0"
## [223] "NSFTV342.ac7a352c.0" "NSFTV267.aceb3352.0" "NSFTV638.ad1a9c05.0"
## [226] "NSFTV326.aeed4684.0" "NSFTV353.af77442b.0" "NSFTV75.b0816082.0"
## [229] "NSFTV242.b21f7528.0" "NSFTV213.b2320902.0" "NSFTV171.b238d197.0"
## [232] "NSFTV291.b29d3bf4.0" "NSFTV339.b3a301ea.0" "NSFTV268.b49ad0db.0"
## [235] "NSFTV343.b663b3f8.0" "NSFTV271.b67fd5f6.0" "NSFTV122.b6dc1bcc.0"
## [238] "NSFTV316.b828b757.0" "NSFTV317.b956dfb5.0" "NSFTV288.bc6b0af5.0"
## [241] "NSFTV652.bc9c6f80.0" "NSFTV139.bd0d322b.0" "NSFTV370.bd7aaa87.0"
## [244] "NSFTV83.becfe767.0" "NSFTV188.bf18cbae.0" "NSFTV190.bf7afed8.0"
## [247] "NSFTV260.bfa16661.0" "NSFTV183.c08a5ea2.0" "NSFTV79.c0936cf1.0"
## [250] "NSFTV307.c0d6eca3.0" "NSFTV69.c14c4e03.0" "NSFTV373.c20dfc59.0"
## [253] "NSFTV392.c4a397e5.0" "NSFTV232.c528bce0.0" "NSFTV35.c59435a8.0"
## [256] "NSFTV220.c5bf98cc.0" "NSFTV319.c7aeb39f.0" "NSFTV263.c7d2513e.0"
## [259] "NSFTV359.c86f37d0.0" "NSFTV200.c8a73fe7.0" "NSFTV185.c97be7ae.0"
## [262] "NSFTV76.cb5e38e6.0" "NSFTV257.cb9c18df.0" "NSFTV18.cbd6b346.0"
## [265] "NSFTV357.cc0ef8cf.0" "NSFTV202.ccdc6d39.0" "NSFTV261.ceb798a3.0"
## [268] "NSFTV320.cf44b628.0" "NSFTV196.cff871e8.0" "NSFTV325.d0392fe8.0"
## [271] "NSFTV131.d09d62e7.0" "NSFTV19.d22c8e33.0" "NSFTV31.d2da857d.0"
## [274] "NSFTV331.d438bfed.0" "NSFTV207.d628ec3a.0" "NSFTV290.d864377c.0"
## [277] "NSFTV368.db737b9b.0" "NSFTV237.dc2cf39b.0" "NSFTV369.dd2bfbfb.0"
## [280] "NSFTV30.dd6e755e.0" "NSFTV376.de696d95.0" "NSFTV247.de7ac7bf.0"
## [283] "NSFTV151.dee26171.0" "NSFTV41.dfb13733.0" "NSFTV154.dfdfb828.0"
## [286] "NSFTV24.e074229f.0" "NSFTV246.e0815776.0" "NSFTV363.e32c9d62.0"
## [289] "NSFTV108.e3b049a9.0" "NSFTV152.e4533c8b.0" "NSFTV43.e5951598.0"
## [292] "NSFTV367.e59cbfbe.0" "NSFTV208.e876c9ec.0" "NSFTV372.e8f708a5.0"
## [295] "NSFTV314.ea28671d.0" "NSFTV114.eac19fb8.0" "NSFTV236.eb17cda6.0"
## [298] "NSFTV636.ebb47c09.0" "NSFTV378.eca85a73.0" "NSFTV148.ed1beb9e.0"
## [301] "NSFTV55.ef8fd965.0" "NSFTV70.efbb93de.0" "NSFTV16.f0328a18.0"
## [304] "NSFTV356.f22d2dc6.0" "NSFTV365.f2e723fc.0" "NSFTV224.f2e9bed5.0"
## [307] "NSFTV264.f2f12364.0" "NSFTV262.f30e146b.0" "NSFTV635.f3d9c979.0"
## [310] "NSFTV203.f46f03bf.0" "NSFTV644.f66e275e.0" "NSFTV223.f73fa9e9.0"
## [313] "NSFTV266.f7da2506.0" "NSFTV180.f948d7b5.0" "NSFTV336.fa180a1e.0"
## [316] "NSFTV21.fa4c4111.0" "NSFTV327.fb2b3d82.0" "NSFTV310.fbae20bd.0"
## [319] "NSFTV276.fc26ce23.0" "NSFTV192.fdd98970.0" "NSFTV259.fe70ec33.0"
## [322] "NSFTV338.fed0b9a3.0" "NSFTV215.fed1dbae.0" "NSFTV395.ffde60f9.0"
## [325] "#N/A"
head(LA_GT)
# select only relevant SNP information
col.yes <- which(colnames(LA_GT) %in% pheno_id)
only_relev <- LA_GT[,col.yes]
head(only_relev)
dim(only_relev)
## [1] 5 324
Then - let’s add chromosome and position information
only_relev$Chr <- LA_GT$CHROM
only_relev$Pos <- LA_GT$POS
dim(only_relev)
## [1] 5 326
only_relev
LA_fin <- only_relev[,c(325:326,1:324)]
head(LA_fin)
write.csv(LA_fin, "Locus_A_final_file.csv", row.names = F)
So finally - once we want to run a Haplotype analysis for an individual gene - we would do it like this:
e.g. Locus 1 gene ID Os01t0225200 - as we cannot do anything with the NA SNPs - we need to remove accessions which miss the selected SNPs or they are bi-allelic
ttemp2 <- as.data.frame(LA_fin)
head(ttemp2)
# let's not select 1st two collumns:
ttemp3 <- ttemp2[,c(3:ncol(ttemp2))]
ttemp3
# now - let's transpose the shabbangle:
temp4 <- as.data.frame(t(ttemp3))
temp4
for(i in 1:nrow(temp4)){
temp4[i,] <- gsub("NA", NA, temp4[i,])
}
# get the accessions with NAs out of the data frame
nona_temp <- na.omit(temp4)
dim(nona_temp)
## [1] 263 5
head(nona_temp)
Now is the time to make a string for each genotype of the SNPs in the gene:
haplotype <- nona_temp %>%
unite("haplotype", V1:V5)
haplotype
haplotype$haplotype <- gsub("_", "", haplotype$haplotype)
haplotype$VCF_better <- rownames(haplotype)
haplotype
Finally - let’s fuse it into one file with phenotypes
colnames(pheno2)
## [1] "HDRA.ID" "NSFTV.ID" "IRGC.ID"
## [4] "Subpop" "Accession.Name" "VCF_code"
## [7] "VCF_better" "index.pots" "Variety"
## [10] "Shoot.area..cm2." "Hull.area..cm2." "Solidity"
## [13] "Perimeter..cm." "Nr.leaves" "Nr.tillers"
## [16] "Plant.height..cm." "Culm.height..cm." "Leaf.length..cm."
## [19] "DW..g." "Leaf.angle..º." "Tiller.angle..º."
## [22] "Erectness..º." "Droopiness..º." "SUM_rank_traits"
## [25] "SUM_norm_traits" "norm_SI_rank" "SI_rank"
pheno_temp <- pheno2[,c(7,9:27)]
haplotype2 <- as.data.frame(haplotype)
ultimate <- merge(pheno_temp, haplotype2, by=c("VCF_better"))
head(ultimate)
Now - let’s check if we can make some graphs! :) Exciting :)
First - we need to examine which haplotypes are represented by most accessions and which by less than 3 accessions (they will be excluded):
head(ultimate)
unique(ultimate$haplotype)
## [1] "AAAAT" "ATAAT" "AAAAA" "AATAT" "ATTAT" "TTAAT" "ATAAA" "TAAAT"
# let's see which haplotype is most popular among accessions:
ultimate %>% group_by(haplotype) %>%
tally()
So - we have 1 unique haplotype: - AAAAAAAAAAAAAAAA - most popular - represnted by 84 accessions and
- TTAAT is represented by < 3 accessions - and ought to be excluded
So let’s exclude the haplotypes without proper representation:
too_few <- c("TTAAT")
ultimate2 <- subset(ultimate, !(ultimate$haplotype %in% too_few))
unique(ultimate2$haplotype)
## [1] "AAAAT" "ATAAT" "AAAAA" "AATAT" "ATTAT" "ATAAA" "TAAAT"
write.csv(ultimate, "LA_Haplotype_Os01t0225200_gene_and_pheno.csv", row.names = F)
ultimate2$haplotype <- gsub("AAAAT", "hap01", ultimate2$haplotype)
ultimate2$haplotype <- gsub("AATAT", "hap02", ultimate2$haplotype)
ultimate2$haplotype <- gsub("AAAAA", "hap03", ultimate2$haplotype)
ultimate2$haplotype <- gsub("ATAAT", "hap04", ultimate2$haplotype)
ultimate2$haplotype <- gsub("ATTAT", "hap05", ultimate2$haplotype)
ultimate2$haplotype <- gsub("ATAAA", "hap06", ultimate2$haplotype)
ultimate2$haplotype <- gsub("TAAAT", "hap07", ultimate2$haplotype)
ultimate2$haplotype <- factor(ultimate2$haplotype, levels = c("hap01", "hap02", "hap03", "hap04", "hap05", "hap06", "hap07"))
colnames(ultimate2)
## [1] "VCF_better" "Variety" "Shoot.area..cm2."
## [4] "Hull.area..cm2." "Solidity" "Perimeter..cm."
## [7] "Nr.leaves" "Nr.tillers" "Plant.height..cm."
## [10] "Culm.height..cm." "Leaf.length..cm." "DW..g."
## [13] "Leaf.angle..º." "Tiller.angle..º." "Erectness..º."
## [16] "Droopiness..º." "SUM_rank_traits" "SUM_norm_traits"
## [19] "norm_SI_rank" "SI_rank" "haplotype"
ultimate2$Leaf.angle..º. <- as.numeric(ultimate2$Leaf.angle..º.)
ultimate2$Tiller.angle..º. <- as.numeric(ultimate2$Tiller.angle..º.)
ultimate2$Erectness..º. <- as.numeric(ultimate2$Erectness..º.)
ultimate2$Droopiness..º. <- as.numeric(ultimate2$Droopiness..º.)
Now - we can start plotting and examining whether haplotype groups show significant differences - we will compare to the most abundant haplotype
Output <- TukeyHSD(aov(aov(Shoot.area..cm2. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "a" "b" "ac" "abc" "bc" "abc" "c"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Shoot.area..cm2., colour = haplotype))
#my_box_plot <- my_box_plot + geom_boxplot()
my_box_plot <- my_box_plot + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot <- my_box_plot + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot <- my_box_plot + theme(legend.position = "none")
my_box_plot <- my_box_plot + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot <- my_box_plot + xlab("") + ylab(expression(paste("Shoot area (", cm^2, ")", sep = "")))
my_box_plot <- my_box_plot + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot <- my_box_plot + stat_pvalue_manual(test, label = "Tukey", y.position = 950)
my_box_plot
OK - let’s move on to other phenotypes:
Output <- TukeyHSD(aov(aov(Hull.area..cm2. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "a" "b" "ac" "abc" "bc" "abc" "ac"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot2 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Hull.area..cm2., colour = haplotype))
my_box_plot2 <- my_box_plot2 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot2 <- my_box_plot2 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot2 <- my_box_plot2 + theme(legend.position = "none")
my_box_plot2 <- my_box_plot2 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot2 <- my_box_plot2 + xlab("") + ylab(expression(paste("Hull area (", cm^2, ")", sep = "")))
my_box_plot2 <- my_box_plot2 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot2 <- my_box_plot2 + stat_pvalue_manual(test, label = "Tukey", y.position = 6000)
my_box_plot2
Output <- TukeyHSD(aov(aov(Solidity ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "a" "b" "ac" "abc" "abc" "abc" "bc"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot3 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Solidity, colour = haplotype))
my_box_plot3 <- my_box_plot3 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot3 <- my_box_plot3 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot3 <- my_box_plot3 + theme(legend.position = "none")
my_box_plot3 <- my_box_plot3 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot3 <- my_box_plot3 + xlab("") + ylab("Solidity (a.u.)")
my_box_plot3 <- my_box_plot3 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot3 <- my_box_plot3 + stat_pvalue_manual(test, label = "Tukey", y.position = 0.21)
my_box_plot3
Output <- TukeyHSD(aov(aov(Perimeter..cm. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "a" "b" "a" "ab" "ab" "c" "a"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot4 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Perimeter..cm., colour = haplotype))
my_box_plot4 <- my_box_plot4 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot4 <- my_box_plot4 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot4 <- my_box_plot4 + theme(legend.position = "none")
my_box_plot4 <- my_box_plot4 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot4 <- my_box_plot4 + xlab("") + ylab("Perimeter (cm)")
my_box_plot4 <- my_box_plot4 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot4 <- my_box_plot4 + stat_pvalue_manual(test, label = "Tukey", y.position = 2500)
my_box_plot4
Output <- TukeyHSD(aov(aov(Nr.leaves ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "ab" "c" "a" "abc" "abc" "abc" "bc"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot5 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Nr.leaves, colour = haplotype))
my_box_plot5 <- my_box_plot5 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot5 <- my_box_plot5 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot5 <- my_box_plot5 + theme(legend.position = "none")
my_box_plot5 <- my_box_plot5 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot5 <- my_box_plot5 + xlab("") + ylab("Leaf number")
my_box_plot5 <- my_box_plot5 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot5 <- my_box_plot5 + stat_pvalue_manual(test, label = "Tukey", y.position = 55)
my_box_plot5
Output <- TukeyHSD(aov(aov(Nr.tillers ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "a" "b" "a" "abc" "abc" "ac" "bc"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot6 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Nr.tillers, colour = haplotype))
my_box_plot6 <- my_box_plot6 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot6 <- my_box_plot6 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot6 <- my_box_plot6 + theme(legend.position = "none")
my_box_plot6 <- my_box_plot6 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot6 <- my_box_plot6 + xlab("") + ylab("Tiller number")
my_box_plot6 <- my_box_plot6 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot6 <- my_box_plot6 + stat_pvalue_manual(test, label = "Tukey", y.position = 17)
my_box_plot6
Output <- TukeyHSD(aov(aov(Plant.height..cm. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "a" "b" "ab" "ab" "ab" "ab" "a"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot7 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Plant.height..cm., colour = haplotype))
my_box_plot7 <- my_box_plot7 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot7 <- my_box_plot7 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot7 <- my_box_plot7 + theme(legend.position = "none")
my_box_plot7 <- my_box_plot7 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot7 <- my_box_plot7 + xlab("") + ylab("Plant height (cm)")
my_box_plot7 <- my_box_plot7 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot7 <- my_box_plot7 + stat_pvalue_manual(test, label = "Tukey", y.position = 95)
my_box_plot7
Output <- TukeyHSD(aov(aov(Culm.height..cm. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "ab" "a" "a" "ab" "ab" "ab" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot8 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Culm.height..cm., colour = haplotype))
my_box_plot8 <- my_box_plot8 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot8 <- my_box_plot8 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot8 <- my_box_plot8 + theme(legend.position = "none")
my_box_plot8 <- my_box_plot8 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot8 <- my_box_plot8 + xlab("") + ylab("Culm height (cm)")
my_box_plot8 <- my_box_plot8 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot8 <- my_box_plot8 + stat_pvalue_manual(test, label = "Tukey", y.position = 35)
my_box_plot8
Output <- TukeyHSD(aov(aov(Leaf.length..cm. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "a" "b" "ab" "ab" "b" "ab" "a"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot9 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Leaf.length..cm., colour = haplotype))
my_box_plot9 <- my_box_plot9 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot9 <- my_box_plot9 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot9 <- my_box_plot9 + theme(legend.position = "none")
my_box_plot9 <- my_box_plot9 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot9 <- my_box_plot9 + xlab("") + ylab("Leaf length (cm)")
my_box_plot9 <- my_box_plot9 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot9 <- my_box_plot9 + stat_pvalue_manual(test, label = "Tukey", y.position = 65)
my_box_plot9
Output <- TukeyHSD(aov(aov(DW..g. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "a" "b" "a" "ab" "ab" "a" "a"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot10 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = DW..g., colour = haplotype))
my_box_plot10 <- my_box_plot10 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot10 <- my_box_plot10 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot10 <- my_box_plot10 + theme(legend.position = "none")
my_box_plot10 <- my_box_plot10 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot10 <- my_box_plot10 + xlab("") + ylab("Dry weight (g)")
my_box_plot10 <- my_box_plot10 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot10 <- my_box_plot10 + stat_pvalue_manual(test, label = "Tukey", y.position = 4)
my_box_plot10
Output <- TukeyHSD(aov(aov(Leaf.angle..º. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## $Letters
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "a" "a" "a" "a" "a" "a" "a"
##
## $LetterMatrix
## a
## hap02 TRUE
## hap03 TRUE
## hap04 TRUE
## hap05 TRUE
## hap06 TRUE
## hap07 TRUE
## hap01 TRUE
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot11 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Leaf.angle..º., colour = haplotype))
my_box_plot11 <- my_box_plot11 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot11 <- my_box_plot11 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot11 <- my_box_plot11 + theme(legend.position = "none")
my_box_plot11 <- my_box_plot11 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot11 <- my_box_plot11 + xlab("") + ylab("Leaf angle (º)")
my_box_plot11 <- my_box_plot11 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot11 <- my_box_plot11 + stat_pvalue_manual(test, label = "Tukey", y.position = 65)
my_box_plot11
Output <- TukeyHSD(aov(aov(Tiller.angle..º. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "ab" "a" "b" "ab" "ab" "ab" "ab"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot12 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Tiller.angle..º., colour = haplotype))
my_box_plot12 <- my_box_plot12 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot12 <- my_box_plot12 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot12 <- my_box_plot12 + theme(legend.position = "none")
my_box_plot12 <- my_box_plot12 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot12 <- my_box_plot12 + xlab("") + ylab("Tiller angle (º)")
my_box_plot12 <- my_box_plot12 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot12 <- my_box_plot12 + stat_pvalue_manual(test, label = "Tukey", y.position = 37)
my_box_plot12
Output <- TukeyHSD(aov(aov(Erectness..º. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "a" "b" "a" "ab" "b" "ab" "a"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot13 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Erectness..º., colour = haplotype))
my_box_plot13 <- my_box_plot13 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot13 <- my_box_plot13 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot13 <- my_box_plot13 + theme(legend.position = "none")
my_box_plot13 <- my_box_plot13 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot13 <- my_box_plot13 + xlab("") + ylab("Erectness (º)")
my_box_plot13 <- my_box_plot13 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot13 <- my_box_plot13 + stat_pvalue_manual(test, label = "Tukey", y.position = 165)
my_box_plot13
Output <- TukeyHSD(aov(aov(Droopiness..º. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap01
## "a" "b" "a" "ab" "b" "ab" "a"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot14 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Droopiness..º., colour = haplotype))
my_box_plot14 <- my_box_plot14 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot14 <- my_box_plot14 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot14 <- my_box_plot14 + theme(legend.position = "none")
my_box_plot14 <- my_box_plot14 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot14 <- my_box_plot14 + xlab("") + ylab("Droopiness (º)")
my_box_plot14 <- my_box_plot14 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot14 <- my_box_plot14 + stat_pvalue_manual(test, label = "Tukey", y.position = 170)
my_box_plot14
Let’s combine all of these figures into one pdf:
pdf("Locus_A_Haplotypes_Os01g0225200-01.pdf", height = 30, width = 12)
plot_grid(my_box_plot, my_box_plot2, my_box_plot3, my_box_plot4, my_box_plot5, my_box_plot6, my_box_plot7, my_box_plot8,
my_box_plot9, my_box_plot10, my_box_plot11, my_box_plot12, my_box_plot13, my_box_plot14,
ncol=2,
align = "hv", labels=c("AUTO"),
label_size = 24)
dev.off()
## quartz_off_screen
## 2
Locus_D <- read.table("New_LD.GT.FORMAT", header=T)
LD_GT <- Locus_D
head(LD_GT)
then - we need to translate the 0/0 and 1/1 into A and T - as all the SNPs in here should be homozygous and bi-allelic
sapply(LD_GT$IRGC121316.c88dcbba.0, class)
## 0/0 0/0 0/0 0/0 0/0 ./.
## "character" "character" "character" "character" "character" "character"
## 0/0 0/0 1/1 0/0 0/0 0/0
## "character" "character" "character" "character" "character" "character"
## 0/0 0/0 ./. 0/0 ./. 0/0
## "character" "character" "character" "character" "character" "character"
## 0/0 0/0 0/0 ./. 0/0 0/0
## "character" "character" "character" "character" "character" "character"
## 0/0 0/0 0/0 0/0 0/0 0/0
## "character" "character" "character" "character" "character" "character"
## 1/1 0/0 0/0 0/0 0/0
## "character" "character" "character" "character" "character"
head(LD_GT)
unique(LD_GT$IRGC121491)
## [1] "0/0" "./." "1/1"
# current order is
# ./. 0/0 0/1 1/1
# and we need to change it into:
# "NA", "A", "N.A", "T" * mainly because we are not able to do haplotypes on biallelic SNPs - therefore they get classified as "NAs" too
# also - we cannot have two values converging into "NA", so we add "N.A", which we subsequently replace by "NA" in the last line
# therefore we loop it for all collumns:
for(e in 3:ncol(LD_GT)){
LD_GT[,e] <- gsub("0/0", "A", LD_GT[,e])
LD_GT[,e] <- gsub("1/1", "T", LD_GT[,e])
LD_GT[,e] <- gsub("./.", "NA", LD_GT[,e])
}
head(LD_GT)
All looks good! Now - we need to subset the SNP information based on the IDs present in the phenotype file:
pheno_id <- pheno2$VCF_better
unique(pheno_id)
## [1] "NSFTV81.006dfe9b.0" "NSFTV39.014ab0f1.0" "NSFTV132.02cc7c6d.0"
## [4] "NSFTV286.044639de.0" "NSFTV643.0491646d.0" "NSFTV32.068b860d.0"
## [7] "NSFTV358.0764178c.0" "NSFTV275.07dac217.0" "NSFTV269.092f15e7.0"
## [10] "NSFTV256.0be20a08.0" "NSFTV6.0d125c0e.0" "NSFTV324.0defa551.0"
## [13] "NSFTV287.0e787735.0" "NSFTV126.0f6a67da.0" "NSFTV110.11bf5114.0"
## [16] "NSFTV355.122a975b.0" "NSFTV150.128dd425.0" "NSFTV72.12d82364.0"
## [19] "NSFTV311.137628a5.0" "NSFTV235.13ae4f27.0" "NSFTV216.149fb038.0"
## [22] "NSFTV364.14e43b02.0" "NSFTV323.1510e4aa.0" "NSFTV277.1516d75f.0"
## [25] "NSFTV177.15e6c437.0" "NSFTV105.16463092.0" "NSFTV278.17627a92.0"
## [28] "NSFTV145.17c4070a.0" "NSFTV391.180a155f.0" "NSFTV253.1911c363.0"
## [31] "NSFTV163.192e24ab.0" "NSFTV228.195567bf.0" "NSFTV27.1956dd3f.0"
## [34] "NSFTV219.199f4455.0" "NSFTV138.1a946dc6.0" "NSFTV133.1a95985b.0"
## [37] "NSFTV65.1ab838a2.0" "NSFTV25.1ce7093b.0" "NSFTV335.1e26852c.0"
## [40] "NSFTV328.1e4f3933.0" "NSFTV396.1eb5d579.0" "NSFTV100.1f10be3d.0"
## [43] "NSFTV321.1f6f13c3.0" "NSFTV394.1f856ac1.0" "NSFTV94.20a4c97d.0"
## [46] "NSFTV49.21d3f1b3.0" "NSFTV333.22e60af9.0" "NSFTV380.238f25f1.0"
## [49] "NSFTV36.2593e5a7.0" "NSFTV201.263f2baf.0" "NSFTV377.2779bba9.0"
## [52] "NSFTV121.280279b3.0" "NSFTV241.2a7724c1.0" "NSFTV9.2a9d8d47.0"
## [55] "NSFTV162.2b28e441.0" "NSFTV33.2b4e06fe.0" "NSFTV306.2c583eed.0"
## [58] "NSFTV157.2db62b89.0" "NSFTV10.2e1c9c87.0" "NSFTV29.30646147.0"
## [61] "NSFTV8.30c3073f.0" "NSFTV221.311aaf30.0" "NSFTV144.3269952e.0"
## [64] "NSFTV85.328163e1.0" "NSFTV26.32a15c4d.0" "NSFTV96.32a6808e.0"
## [67] "NSFTV234.32ca1759.0" "NSFTV205.348853a5.0" "NSFTV214.34df4fca.0"
## [70] "NSFTV206.37249a74.0" "NSFTV67.39dd7feb.0" "NSFTV119.3aa51818.0"
## [73] "NSFTV385.3b25c24f.0" "NSFTV195.3c10a4b8.0" "NSFTV329.3e58f34c.0"
## [76] "NSFTV143.3ea144c5.0" "NSFTV318.3eb8fab5.0" "NSFTV255.3fb62843.0"
## [79] "NSFTV153.40f343f4.0" "NSFTV651.432387a8.0" "NSFTV17.446f6c62.0"
## [82] "NSFTV243.44aeff79.0" "NSFTV186.4559ff3f.0" "NSFTV397.45d3c920.0"
## [85] "NSFTV77.45ec7861.0" "NSFTV178.480f7505.0" "NSFTV147.48b4f132.0"
## [88] "NSFTV639.48d3bc66.0" "NSFTV273.493f2b7e.0" "NSFTV231.49649a00.0"
## [91] "NSFTV340.4b0a7350.0" "NSFTV345.4e35b58a.0" "NSFTV386.4f4f777a.0"
## [94] "NSFTV161.4f643bf5.0" "NSFTV239.523d5ec4.0" "NSFTV22.52c6e2ba.0"
## [97] "NSFTV112.531e23fa.0" "NSFTV226.536afc14.0" "NSFTV209.5384be83.0"
## [100] "NSFTV265.544f48c0.0" "NSFTV211.5497b233.0" "NSFTV179.5505a767.0"
## [103] "NSFTV5.5533f406.0" "NSFTV74.55d3afae.0" "NSFTV193.5632be21.0"
## [106] "NSFTV252.56f5103e.0" "NSFTV51.57d7feea.0" "NSFTV280.58d5bdc3.0"
## [109] "NSFTV308.593def74.0" "NSFTV282.5aa7968d.0" "NSFTV117.5b144b9c.0"
## [112] "NSFTV390.5c592759.0" "NSFTV92.5df8f871.0" "NSFTV3.5ef1be74.0"
## [115] "NSFTV244.5efe8622.0" "NSFTV258.60c362f1.0" "NSFTV187.60e142c3.0"
## [118] "NSFTV166.61152244.0" "NSFTV53.61b7bf53.0" "NSFTV71.61cc3858.0"
## [121] "NSFTV54.06334433.0" "NSFTV125.63f298ba.0" "NSFTV279.642238d9.0"
## [124] "NSFTV87.64fa0112.0" "NSFTV158.6516a500.0" "NSFTV56.652530bb.0"
## [127] "NSFTV13.660f0236.0" "NSFTV189.661eaeaa.0" "NSFTV78.66d97672.0"
## [130] "NSFTV270.6815ec6d.0" "NSFTV46.68c2ecf8.0" "NSFTV37.69527f35.0"
## [133] "NSFTV98.6ab77e3e.0" "NSFTV20.6af7c9fc.0" "NSFTV283.6b37cb3d.0"
## [136] "NSFTV116.6cedf6aa.0" "NSFTV102.6e26f4cc.0" "NSFTV360.6f068769.0"
## [139] "NSFTV240.6f263bd1.0" "NSFTV184.7119ebee.0" "NSFTV389.7126c359.0"
## [142] "NSFTV123.714ac141.0" "NSFTV118.71bd9426.0" "NSFTV57.71d5fcf4.0"
## [145] "NSFTV164.72497775.0" "NSFTV322.72cef953.0" "NSFTV366.73b20824.0"
## [148] "NSFTV303.73c4fdfb.0" "NSFTV90.743f41c1.0" "NSFTV315.74c9fbbc.0"
## [151] "NSFTV66.7691a376.0" "NSFTV128.76a1efc9.0" "NSFTV40.76f56677.0"
## [154] "NSFTV304.773e969e.0" "NSFTV23.7741d7c1.0" "NSFTV204.778cea6e.0"
## [157] "NSFTV113.7a723d9e.0" "NSFTV107.7b7a0d82.0" "NSFTV84.7bee5b9f.0"
## [160] "NSFTV222.7cfe4a0c.0" "NSFTV44.7d4c6c4e.0" "NSFTV274.7dba928e.0"
## [163] "NSFTV332.7e956f22.0" "NSFTV99.7eb7c6a8.0" "NSFTV60.7f7fb805.0"
## [166] "NSFTV289.7fc911f7.0" "NSFTV142.806c51cc.0" "NSFTV346.80fc89ae.0"
## [169] "NSFTV198.818143d8.0" "NSFTV4.81d03b86.0" "NSFTV305.8288d278.0"
## [172] "NSFTV371.84ad8457.0" "NSFTV347.853e318c.0" "NSFTV140.85551f9c.0"
## [175] "NSFTV281.85959164.0" "NSFTV218.85da3a70.0" "NSFTV104.8629f76c.0"
## [178] "NSFTV137.8653bbdb.0" "NSFTV301.86556239.0" "NSFTV313.872ec0b4.0"
## [181] "NSFTV249.87621330.0" "NSFTV641.88631879.0" "NSFTV250.88e1f162.0"
## [184] "NSFTV89.897c5eef.0" "NSFTV106.8a06320f.0" "NSFTV245.8a41a10a.0"
## [187] "NSFTV350.8adbe877.0" "NSFTV103.8c76404c.0" "NSFTV181.8cd35626.0"
## [190] "NSFTV176.8cfa8fb2.0" "NSFTV45.8d6aded4.0" "NSFTV120.8e6220e5.0"
## [193] "NSFTV167.8e941960.0" "NSFTV129.8fafd383.0" "NSFTV155.9127f236.0"
## [196] "NSFTV217.915c0033.0" "NSFTV156.93387d96.0" "NSFTV199.958ed737.0"
## [199] "NSFTV384.9764b3c8.0" "NSFTV191.97ce6839.0" "NSFTV73.97dcf87d.0"
## [202] "NSFTV334.09869491.0" "NSFTV642.986a3c37.0" "NSFTV172.9ac72c9e.0"
## [205] "NSFTV165.9b2223bb.0" "NSFTV248.9b37187d.0" "NSFTV285.9baa653c.0"
## [208] "NSFTV58.9c099b78.0" "NSFTV93.9f1f4614.0" "NSFTV101.9f782e9e.0"
## [211] "NSFTV341.9f786a86.0" "NSFTV387.a08839ce.0" "NSFTV344.a0f35768.0"
## [214] "NSFTV225.a31b4a06.0" "NSFTV284.a32e0586.0" "NSFTV348.a446becd.0"
## [217] "NSFTV349.a5a6e2dd.0" "NSFTV59.a5ec1b10.0" "NSFTV130.a796716d.0"
## [220] "NSFTV227.a7983b03.0" "NSFTV337.a7bec464.0" "NSFTV141.a8319fc6.0"
## [223] "NSFTV342.ac7a352c.0" "NSFTV267.aceb3352.0" "NSFTV638.ad1a9c05.0"
## [226] "NSFTV326.aeed4684.0" "NSFTV353.af77442b.0" "NSFTV75.b0816082.0"
## [229] "NSFTV242.b21f7528.0" "NSFTV213.b2320902.0" "NSFTV171.b238d197.0"
## [232] "NSFTV291.b29d3bf4.0" "NSFTV339.b3a301ea.0" "NSFTV268.b49ad0db.0"
## [235] "NSFTV343.b663b3f8.0" "NSFTV271.b67fd5f6.0" "NSFTV122.b6dc1bcc.0"
## [238] "NSFTV316.b828b757.0" "NSFTV317.b956dfb5.0" "NSFTV288.bc6b0af5.0"
## [241] "NSFTV652.bc9c6f80.0" "NSFTV139.bd0d322b.0" "NSFTV370.bd7aaa87.0"
## [244] "NSFTV83.becfe767.0" "NSFTV188.bf18cbae.0" "NSFTV190.bf7afed8.0"
## [247] "NSFTV260.bfa16661.0" "NSFTV183.c08a5ea2.0" "NSFTV79.c0936cf1.0"
## [250] "NSFTV307.c0d6eca3.0" "NSFTV69.c14c4e03.0" "NSFTV373.c20dfc59.0"
## [253] "NSFTV392.c4a397e5.0" "NSFTV232.c528bce0.0" "NSFTV35.c59435a8.0"
## [256] "NSFTV220.c5bf98cc.0" "NSFTV319.c7aeb39f.0" "NSFTV263.c7d2513e.0"
## [259] "NSFTV359.c86f37d0.0" "NSFTV200.c8a73fe7.0" "NSFTV185.c97be7ae.0"
## [262] "NSFTV76.cb5e38e6.0" "NSFTV257.cb9c18df.0" "NSFTV18.cbd6b346.0"
## [265] "NSFTV357.cc0ef8cf.0" "NSFTV202.ccdc6d39.0" "NSFTV261.ceb798a3.0"
## [268] "NSFTV320.cf44b628.0" "NSFTV196.cff871e8.0" "NSFTV325.d0392fe8.0"
## [271] "NSFTV131.d09d62e7.0" "NSFTV19.d22c8e33.0" "NSFTV31.d2da857d.0"
## [274] "NSFTV331.d438bfed.0" "NSFTV207.d628ec3a.0" "NSFTV290.d864377c.0"
## [277] "NSFTV368.db737b9b.0" "NSFTV237.dc2cf39b.0" "NSFTV369.dd2bfbfb.0"
## [280] "NSFTV30.dd6e755e.0" "NSFTV376.de696d95.0" "NSFTV247.de7ac7bf.0"
## [283] "NSFTV151.dee26171.0" "NSFTV41.dfb13733.0" "NSFTV154.dfdfb828.0"
## [286] "NSFTV24.e074229f.0" "NSFTV246.e0815776.0" "NSFTV363.e32c9d62.0"
## [289] "NSFTV108.e3b049a9.0" "NSFTV152.e4533c8b.0" "NSFTV43.e5951598.0"
## [292] "NSFTV367.e59cbfbe.0" "NSFTV208.e876c9ec.0" "NSFTV372.e8f708a5.0"
## [295] "NSFTV314.ea28671d.0" "NSFTV114.eac19fb8.0" "NSFTV236.eb17cda6.0"
## [298] "NSFTV636.ebb47c09.0" "NSFTV378.eca85a73.0" "NSFTV148.ed1beb9e.0"
## [301] "NSFTV55.ef8fd965.0" "NSFTV70.efbb93de.0" "NSFTV16.f0328a18.0"
## [304] "NSFTV356.f22d2dc6.0" "NSFTV365.f2e723fc.0" "NSFTV224.f2e9bed5.0"
## [307] "NSFTV264.f2f12364.0" "NSFTV262.f30e146b.0" "NSFTV635.f3d9c979.0"
## [310] "NSFTV203.f46f03bf.0" "NSFTV644.f66e275e.0" "NSFTV223.f73fa9e9.0"
## [313] "NSFTV266.f7da2506.0" "NSFTV180.f948d7b5.0" "NSFTV336.fa180a1e.0"
## [316] "NSFTV21.fa4c4111.0" "NSFTV327.fb2b3d82.0" "NSFTV310.fbae20bd.0"
## [319] "NSFTV276.fc26ce23.0" "NSFTV192.fdd98970.0" "NSFTV259.fe70ec33.0"
## [322] "NSFTV338.fed0b9a3.0" "NSFTV215.fed1dbae.0" "NSFTV395.ffde60f9.0"
## [325] "#N/A"
head(LD_GT)
# select only relevant SNP information
col.yes <- which(colnames(LD_GT) %in% pheno_id)
only_relev <- LD_GT[,col.yes]
head(only_relev)
dim(only_relev)
## [1] 35 324
Then - let’s add chromosome and position information
only_relev$Chr <- LD_GT$CHROM
only_relev$Pos <- LD_GT$POS
dim(only_relev)
## [1] 35 326
only_relev
LD_fin <- only_relev[,c(325:326,1:324)]
head(LD_fin)
write.csv(LD_fin, "Locus_D_final_file.csv", row.names = F)
So finally - once we want to run a Haplotype analysis for an individual gene - we would do it like this:
e.g. Locus 1 gene ID Os01t0810800 - as we cannot do anything with the NA SNPs - we need to remove accessions which miss the selected SNPs or they are bi-allelic
ttemp2 <- as.data.frame(LD_fin)
head(ttemp2)
# let's not select 1st two collumns:
ttemp3 <- ttemp2[,c(3:ncol(ttemp2))]
ttemp3
# now - let's transpose the shabbangle:
temp4 <- as.data.frame(t(ttemp3))
temp4
for(i in 1:nrow(temp4)){
temp4[i,] <- gsub("NA", NA, temp4[i,])
}
# get the accessions with NAs out of the data frame
nona_temp <- na.omit(temp4)
dim(nona_temp)
## [1] 56 35
head(nona_temp)
Now is the time to make a string for each genotype of the SNPs in the gene:
haplotype <- nona_temp %>%
unite("haplotype", V1:V35)
haplotype
haplotype$haplotype <- gsub("_", "", haplotype$haplotype)
haplotype$VCF_better <- rownames(haplotype)
haplotype
Finally - let’s fuse it into one file with phenotypes
colnames(pheno2)
## [1] "HDRA.ID" "NSFTV.ID" "IRGC.ID"
## [4] "Subpop" "Accession.Name" "VCF_code"
## [7] "VCF_better" "index.pots" "Variety"
## [10] "Shoot.area..cm2." "Hull.area..cm2." "Solidity"
## [13] "Perimeter..cm." "Nr.leaves" "Nr.tillers"
## [16] "Plant.height..cm." "Culm.height..cm." "Leaf.length..cm."
## [19] "DW..g." "Leaf.angle..º." "Tiller.angle..º."
## [22] "Erectness..º." "Droopiness..º." "SUM_rank_traits"
## [25] "SUM_norm_traits" "norm_SI_rank" "SI_rank"
pheno_temp <- pheno2[,c(7,9:27)]
haplotype2 <- as.data.frame(haplotype)
ultimate <- merge(pheno_temp, haplotype2, by=c("VCF_better"))
head(ultimate)
Now - let’s check if we can make some graphs! :) Exciting :)
First - we need to examine which haplotypes are represented by most accessions and which by less than 3 accessions (they will be excluded):
head(ultimate)
unique(ultimate$haplotype)
## [1] "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAATAAAA"
## [3] "AAAATTAAAATATATAATATAATATTAAAATAAAT" "AAAATTAAAATATAAAATATAATATTAAAATAAAT"
## [5] "AAAAAAAAAAATAAAAAAAAAAAAAAAAAAAAAAA" "AAAAAAAAAAAAAATAAAAAAAAAAAAAAATTAAT"
## [7] "AAAATTATAATATATAATATAATATTAAAATAAAT" "AAAAAAATAAAAAATAAAAAAAAAAAAAAAAAAAA"
## [9] "AAAAAAAATAATAAAATAAAAAAAAAAAAAAAAAA" "AAAAAAATAAAAAAAAAAAAAAAAAAAAAAAAAAA"
## [11] "AAAAAAATAAAAAAAAAAAAAAAAAAAAAATAAAA" "AATAATTATTTATATAATAAAATATTTTATTTAAT"
## [13] "TAAATTAAAATATATAATATAATATTAAAATAAAT"
# let's see which haplotype is most popular among accessions:
ultimate %>% group_by(haplotype) %>%
tally()
So - we have MANY unique haplotypes and only three haplotypes represented by three or more accessions: - AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA - most popular
So let’s include the haplotypes represented by at least three accessions:
want_that <- c("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "AAAAAAAAAAATAAAAAAAAAAAAAAAAAAAAAAA", "AAAATTAAAATATATAATATAATATTAAAATAAAT")
ultimate2 <- subset(ultimate, (ultimate$haplotype %in% want_that))
unique(ultimate2$haplotype)
## [1] "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" "AAAATTAAAATATATAATATAATATTAAAATAAAT"
## [3] "AAAAAAAAAAATAAAAAAAAAAAAAAAAAAAAAAA"
write.csv(ultimate, "LD_Haplotype_Os01t0810800_gene_and_pheno.csv", row.names = F)
ultimate2$haplotype <- gsub("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "hap01", ultimate2$haplotype)
ultimate2$haplotype <- gsub("AAAATTAAAATATATAATATAATATTAAAATAAAT", "hap02", ultimate2$haplotype)
ultimate2$haplotype <- gsub("AAAAAAAAAAATAAAAAAAAAAAAAAAAAAAAAAA", "hap03", ultimate2$haplotype)
ultimate2$haplotype <- factor(ultimate2$haplotype, levels = c("hap01", "hap02", "hap03"))
colnames(ultimate2)
## [1] "VCF_better" "Variety" "Shoot.area..cm2."
## [4] "Hull.area..cm2." "Solidity" "Perimeter..cm."
## [7] "Nr.leaves" "Nr.tillers" "Plant.height..cm."
## [10] "Culm.height..cm." "Leaf.length..cm." "DW..g."
## [13] "Leaf.angle..º." "Tiller.angle..º." "Erectness..º."
## [16] "Droopiness..º." "SUM_rank_traits" "SUM_norm_traits"
## [19] "norm_SI_rank" "SI_rank" "haplotype"
ultimate2$Leaf.angle..º. <- as.numeric(ultimate2$Leaf.angle..º.)
ultimate2$Tiller.angle..º. <- as.numeric(ultimate2$Tiller.angle..º.)
ultimate2$Erectness..º. <- as.numeric(ultimate2$Erectness..º.)
ultimate2$Droopiness..º. <- as.numeric(ultimate2$Droopiness..º.)
Now - we can start plotting and examining whether haplotype groups show significant differences - we will compare to the most abundant haplotype
Output <- TukeyHSD(aov(aov(Shoot.area..cm2. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap01
## "a" "b" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Shoot.area..cm2., colour = haplotype))
#my_box_plot <- my_box_plot + geom_boxplot()
my_box_plot <- my_box_plot + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot <- my_box_plot + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot <- my_box_plot + theme(legend.position = "none")
my_box_plot <- my_box_plot + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot <- my_box_plot + xlab("") + ylab(expression(paste("Shoot area (", cm^2, ")", sep = "")))
my_box_plot <- my_box_plot + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot <- my_box_plot + stat_pvalue_manual(test, label = "Tukey", y.position = 950)
my_box_plot
OK - let’s move on to other phenotypes:
Output <- TukeyHSD(aov(aov(Hull.area..cm2. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap01
## "a" "b" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot2 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Hull.area..cm2., colour = haplotype))
my_box_plot2 <- my_box_plot2 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot2 <- my_box_plot2 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot2 <- my_box_plot2 + theme(legend.position = "none")
my_box_plot2 <- my_box_plot2 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot2 <- my_box_plot2 + xlab("") + ylab(expression(paste("Hull area (", cm^2, ")", sep = "")))
my_box_plot2 <- my_box_plot2 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot2 <- my_box_plot2 + stat_pvalue_manual(test, label = "Tukey", y.position = 6000)
my_box_plot2
Output <- TukeyHSD(aov(aov(Solidity ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## $Letters
## hap02 hap03 hap01
## "a" "a" "a"
##
## $LetterMatrix
## a
## hap02 TRUE
## hap03 TRUE
## hap01 TRUE
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot3 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Solidity, colour = haplotype))
my_box_plot3 <- my_box_plot3 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot3 <- my_box_plot3 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot3 <- my_box_plot3 + theme(legend.position = "none")
my_box_plot3 <- my_box_plot3 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot3 <- my_box_plot3 + xlab("") + ylab("Solidity (a.u.)")
my_box_plot3 <- my_box_plot3 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot3 <- my_box_plot3 + stat_pvalue_manual(test, label = "Tukey", y.position = 0.21)
my_box_plot3
Output <- TukeyHSD(aov(aov(Perimeter..cm. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap01
## "a" "b" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot4 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Perimeter..cm., colour = haplotype))
my_box_plot4 <- my_box_plot4 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot4 <- my_box_plot4 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot4 <- my_box_plot4 + theme(legend.position = "none")
my_box_plot4 <- my_box_plot4 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot4 <- my_box_plot4 + xlab("") + ylab("Perimeter (cm)")
my_box_plot4 <- my_box_plot4 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot4 <- my_box_plot4 + stat_pvalue_manual(test, label = "Tukey", y.position = 2500)
my_box_plot4
Output <- TukeyHSD(aov(aov(Nr.leaves ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap01
## "a" "b" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot5 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Nr.leaves, colour = haplotype))
my_box_plot5 <- my_box_plot5 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot5 <- my_box_plot5 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot5 <- my_box_plot5 + theme(legend.position = "none")
my_box_plot5 <- my_box_plot5 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot5 <- my_box_plot5 + xlab("") + ylab("Leaf number")
my_box_plot5 <- my_box_plot5 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot5 <- my_box_plot5 + stat_pvalue_manual(test, label = "Tukey", y.position = 55)
my_box_plot5
Output <- TukeyHSD(aov(aov(Nr.tillers ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap01
## "a" "b" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot6 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Nr.tillers, colour = haplotype))
my_box_plot6 <- my_box_plot6 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot6 <- my_box_plot6 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot6 <- my_box_plot6 + theme(legend.position = "none")
my_box_plot6 <- my_box_plot6 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot6 <- my_box_plot6 + xlab("") + ylab("Tiller number")
my_box_plot6 <- my_box_plot6 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot6 <- my_box_plot6 + stat_pvalue_manual(test, label = "Tukey", y.position = 17)
my_box_plot6
Output <- TukeyHSD(aov(aov(Plant.height..cm. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## $Letters
## hap02 hap03 hap01
## "a" "a" "a"
##
## $LetterMatrix
## a
## hap02 TRUE
## hap03 TRUE
## hap01 TRUE
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot7 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Plant.height..cm., colour = haplotype))
my_box_plot7 <- my_box_plot7 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot7 <- my_box_plot7 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot7 <- my_box_plot7 + theme(legend.position = "none")
my_box_plot7 <- my_box_plot7 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot7 <- my_box_plot7 + xlab("") + ylab("Plant height (cm)")
my_box_plot7 <- my_box_plot7 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot7 <- my_box_plot7 + stat_pvalue_manual(test, label = "Tukey", y.position = 95)
my_box_plot7
Output <- TukeyHSD(aov(aov(Culm.height..cm. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## $Letters
## hap02 hap03 hap01
## "a" "a" "a"
##
## $LetterMatrix
## a
## hap02 TRUE
## hap03 TRUE
## hap01 TRUE
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot8 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Culm.height..cm., colour = haplotype))
my_box_plot8 <- my_box_plot8 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot8 <- my_box_plot8 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot8 <- my_box_plot8 + theme(legend.position = "none")
my_box_plot8 <- my_box_plot8 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot8 <- my_box_plot8 + xlab("") + ylab("Culm height (cm)")
my_box_plot8 <- my_box_plot8 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot8 <- my_box_plot8 + stat_pvalue_manual(test, label = "Tukey", y.position = 35)
my_box_plot8
Output <- TukeyHSD(aov(aov(Leaf.length..cm. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## $Letters
## hap02 hap03 hap01
## "a" "a" "a"
##
## $LetterMatrix
## a
## hap02 TRUE
## hap03 TRUE
## hap01 TRUE
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot9 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Leaf.length..cm., colour = haplotype))
my_box_plot9 <- my_box_plot9 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot9 <- my_box_plot9 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot9 <- my_box_plot9 + theme(legend.position = "none")
my_box_plot9 <- my_box_plot9 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot9 <- my_box_plot9 + xlab("") + ylab("Leaf length (cm)")
my_box_plot9 <- my_box_plot9 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot9 <- my_box_plot9 + stat_pvalue_manual(test, label = "Tukey", y.position = 65)
my_box_plot9
Output <- TukeyHSD(aov(aov(DW..g. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap01
## "a" "ab" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot10 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = DW..g., colour = haplotype))
my_box_plot10 <- my_box_plot10 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot10 <- my_box_plot10 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot10 <- my_box_plot10 + theme(legend.position = "none")
my_box_plot10 <- my_box_plot10 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot10 <- my_box_plot10 + xlab("") + ylab("Dry weight (g)")
my_box_plot10 <- my_box_plot10 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot10 <- my_box_plot10 + stat_pvalue_manual(test, label = "Tukey", y.position = 4)
my_box_plot10
Output <- TukeyHSD(aov(aov(Leaf.angle..º. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## $Letters
## hap02 hap03 hap01
## "a" "a" "a"
##
## $LetterMatrix
## a
## hap02 TRUE
## hap03 TRUE
## hap01 TRUE
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot11 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Leaf.angle..º., colour = haplotype))
my_box_plot11 <- my_box_plot11 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot11 <- my_box_plot11 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot11 <- my_box_plot11 + theme(legend.position = "none")
my_box_plot11 <- my_box_plot11 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot11 <- my_box_plot11 + xlab("") + ylab("Leaf angle (º)")
my_box_plot11 <- my_box_plot11 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot11 <- my_box_plot11 + stat_pvalue_manual(test, label = "Tukey", y.position = 65)
my_box_plot11
Output <- TukeyHSD(aov(aov(Tiller.angle..º. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap01
## "a" "ab" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot12 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Tiller.angle..º., colour = haplotype))
my_box_plot12 <- my_box_plot12 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot12 <- my_box_plot12 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot12 <- my_box_plot12 + theme(legend.position = "none")
my_box_plot12 <- my_box_plot12 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot12 <- my_box_plot12 + xlab("") + ylab("Tiller angle (º)")
my_box_plot12 <- my_box_plot12 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot12 <- my_box_plot12 + stat_pvalue_manual(test, label = "Tukey", y.position = 37)
my_box_plot12
Output <- TukeyHSD(aov(aov(Erectness..º. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## $Letters
## hap02 hap03 hap01
## "a" "a" "a"
##
## $LetterMatrix
## a
## hap02 TRUE
## hap03 TRUE
## hap01 TRUE
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot13 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Erectness..º., colour = haplotype))
my_box_plot13 <- my_box_plot13 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot13 <- my_box_plot13 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot13 <- my_box_plot13 + theme(legend.position = "none")
my_box_plot13 <- my_box_plot13 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot13 <- my_box_plot13 + xlab("") + ylab("Erectness (º)")
my_box_plot13 <- my_box_plot13 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot13 <- my_box_plot13 + stat_pvalue_manual(test, label = "Tukey", y.position = 165)
my_box_plot13
Output <- TukeyHSD(aov(aov(Droopiness..º. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## $Letters
## hap02 hap03 hap01
## "a" "a" "a"
##
## $LetterMatrix
## a
## hap02 TRUE
## hap03 TRUE
## hap01 TRUE
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot14 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Droopiness..º., colour = haplotype))
my_box_plot14 <- my_box_plot14 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot14 <- my_box_plot14 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot14 <- my_box_plot14 + theme(legend.position = "none")
my_box_plot14 <- my_box_plot14 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3"))
my_box_plot14 <- my_box_plot14 + xlab("") + ylab("Droopiness (º)")
my_box_plot14 <- my_box_plot14 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot14 <- my_box_plot14 + stat_pvalue_manual(test, label = "Tukey", y.position = 170)
my_box_plot14
Let’s combine all of these figures into one pdf:
pdf("Locus_D_Haplotypes_Os01t0810800.pdf", height = 30, width = 12)
plot_grid(my_box_plot, my_box_plot2, my_box_plot3, my_box_plot4, my_box_plot5, my_box_plot6, my_box_plot7, my_box_plot8,
my_box_plot9, my_box_plot10, my_box_plot11, my_box_plot12, my_box_plot13, my_box_plot14,
ncol=2,
align = "hv", labels=c("AUTO"),
label_size = 24)
dev.off()
## quartz_off_screen
## 2
Locus_K <- read.table("New_LK.GT.FORMAT", header=T)
LK_GT <- Locus_K
head(LK_GT)
then - we need to translate the 0/0 and 1/1 into A and T - as all the SNPs in here should be homozygous and bi-allelic
sapply(LK_GT$IRGC121316.c88dcbba.0, class)
## 0/0 0/0 0/0 0/0 0/0 0/0
## "character" "character" "character" "character" "character" "character"
## 0/0 0/0 0/0 0/0 0/0 0/0
## "character" "character" "character" "character" "character" "character"
head(LK_GT)
unique(LD_GT$IRGC121491)
## [1] "A" "NA" "T"
# current order is
# ./. 0/0 0/1 1/1
# and we need to change it into:
# "NA", "A", "N.A", "T" * mainly because we are not able to do haplotypes on biallelic SNPs - therefore they get classified as "NAs" too
# also - we cannot have two values converging into "NA", so we add "N.A", which we subsequently replace by "NA" in the last line
# therefore we loop it for all collumns:
for(e in 3:ncol(LK_GT)){
LK_GT[,e] <- gsub("0/0", "A", LK_GT[,e])
LK_GT[,e] <- gsub("1/1", "T", LK_GT[,e])
LK_GT[,e] <- gsub("./.", "NA", LK_GT[,e])
}
head(LK_GT)
All looks good! Now - we need to subset the SNP information based on the IDs present in the phenotype file:
pheno_id <- pheno2$VCF_better
unique(pheno_id)
## [1] "NSFTV81.006dfe9b.0" "NSFTV39.014ab0f1.0" "NSFTV132.02cc7c6d.0"
## [4] "NSFTV286.044639de.0" "NSFTV643.0491646d.0" "NSFTV32.068b860d.0"
## [7] "NSFTV358.0764178c.0" "NSFTV275.07dac217.0" "NSFTV269.092f15e7.0"
## [10] "NSFTV256.0be20a08.0" "NSFTV6.0d125c0e.0" "NSFTV324.0defa551.0"
## [13] "NSFTV287.0e787735.0" "NSFTV126.0f6a67da.0" "NSFTV110.11bf5114.0"
## [16] "NSFTV355.122a975b.0" "NSFTV150.128dd425.0" "NSFTV72.12d82364.0"
## [19] "NSFTV311.137628a5.0" "NSFTV235.13ae4f27.0" "NSFTV216.149fb038.0"
## [22] "NSFTV364.14e43b02.0" "NSFTV323.1510e4aa.0" "NSFTV277.1516d75f.0"
## [25] "NSFTV177.15e6c437.0" "NSFTV105.16463092.0" "NSFTV278.17627a92.0"
## [28] "NSFTV145.17c4070a.0" "NSFTV391.180a155f.0" "NSFTV253.1911c363.0"
## [31] "NSFTV163.192e24ab.0" "NSFTV228.195567bf.0" "NSFTV27.1956dd3f.0"
## [34] "NSFTV219.199f4455.0" "NSFTV138.1a946dc6.0" "NSFTV133.1a95985b.0"
## [37] "NSFTV65.1ab838a2.0" "NSFTV25.1ce7093b.0" "NSFTV335.1e26852c.0"
## [40] "NSFTV328.1e4f3933.0" "NSFTV396.1eb5d579.0" "NSFTV100.1f10be3d.0"
## [43] "NSFTV321.1f6f13c3.0" "NSFTV394.1f856ac1.0" "NSFTV94.20a4c97d.0"
## [46] "NSFTV49.21d3f1b3.0" "NSFTV333.22e60af9.0" "NSFTV380.238f25f1.0"
## [49] "NSFTV36.2593e5a7.0" "NSFTV201.263f2baf.0" "NSFTV377.2779bba9.0"
## [52] "NSFTV121.280279b3.0" "NSFTV241.2a7724c1.0" "NSFTV9.2a9d8d47.0"
## [55] "NSFTV162.2b28e441.0" "NSFTV33.2b4e06fe.0" "NSFTV306.2c583eed.0"
## [58] "NSFTV157.2db62b89.0" "NSFTV10.2e1c9c87.0" "NSFTV29.30646147.0"
## [61] "NSFTV8.30c3073f.0" "NSFTV221.311aaf30.0" "NSFTV144.3269952e.0"
## [64] "NSFTV85.328163e1.0" "NSFTV26.32a15c4d.0" "NSFTV96.32a6808e.0"
## [67] "NSFTV234.32ca1759.0" "NSFTV205.348853a5.0" "NSFTV214.34df4fca.0"
## [70] "NSFTV206.37249a74.0" "NSFTV67.39dd7feb.0" "NSFTV119.3aa51818.0"
## [73] "NSFTV385.3b25c24f.0" "NSFTV195.3c10a4b8.0" "NSFTV329.3e58f34c.0"
## [76] "NSFTV143.3ea144c5.0" "NSFTV318.3eb8fab5.0" "NSFTV255.3fb62843.0"
## [79] "NSFTV153.40f343f4.0" "NSFTV651.432387a8.0" "NSFTV17.446f6c62.0"
## [82] "NSFTV243.44aeff79.0" "NSFTV186.4559ff3f.0" "NSFTV397.45d3c920.0"
## [85] "NSFTV77.45ec7861.0" "NSFTV178.480f7505.0" "NSFTV147.48b4f132.0"
## [88] "NSFTV639.48d3bc66.0" "NSFTV273.493f2b7e.0" "NSFTV231.49649a00.0"
## [91] "NSFTV340.4b0a7350.0" "NSFTV345.4e35b58a.0" "NSFTV386.4f4f777a.0"
## [94] "NSFTV161.4f643bf5.0" "NSFTV239.523d5ec4.0" "NSFTV22.52c6e2ba.0"
## [97] "NSFTV112.531e23fa.0" "NSFTV226.536afc14.0" "NSFTV209.5384be83.0"
## [100] "NSFTV265.544f48c0.0" "NSFTV211.5497b233.0" "NSFTV179.5505a767.0"
## [103] "NSFTV5.5533f406.0" "NSFTV74.55d3afae.0" "NSFTV193.5632be21.0"
## [106] "NSFTV252.56f5103e.0" "NSFTV51.57d7feea.0" "NSFTV280.58d5bdc3.0"
## [109] "NSFTV308.593def74.0" "NSFTV282.5aa7968d.0" "NSFTV117.5b144b9c.0"
## [112] "NSFTV390.5c592759.0" "NSFTV92.5df8f871.0" "NSFTV3.5ef1be74.0"
## [115] "NSFTV244.5efe8622.0" "NSFTV258.60c362f1.0" "NSFTV187.60e142c3.0"
## [118] "NSFTV166.61152244.0" "NSFTV53.61b7bf53.0" "NSFTV71.61cc3858.0"
## [121] "NSFTV54.06334433.0" "NSFTV125.63f298ba.0" "NSFTV279.642238d9.0"
## [124] "NSFTV87.64fa0112.0" "NSFTV158.6516a500.0" "NSFTV56.652530bb.0"
## [127] "NSFTV13.660f0236.0" "NSFTV189.661eaeaa.0" "NSFTV78.66d97672.0"
## [130] "NSFTV270.6815ec6d.0" "NSFTV46.68c2ecf8.0" "NSFTV37.69527f35.0"
## [133] "NSFTV98.6ab77e3e.0" "NSFTV20.6af7c9fc.0" "NSFTV283.6b37cb3d.0"
## [136] "NSFTV116.6cedf6aa.0" "NSFTV102.6e26f4cc.0" "NSFTV360.6f068769.0"
## [139] "NSFTV240.6f263bd1.0" "NSFTV184.7119ebee.0" "NSFTV389.7126c359.0"
## [142] "NSFTV123.714ac141.0" "NSFTV118.71bd9426.0" "NSFTV57.71d5fcf4.0"
## [145] "NSFTV164.72497775.0" "NSFTV322.72cef953.0" "NSFTV366.73b20824.0"
## [148] "NSFTV303.73c4fdfb.0" "NSFTV90.743f41c1.0" "NSFTV315.74c9fbbc.0"
## [151] "NSFTV66.7691a376.0" "NSFTV128.76a1efc9.0" "NSFTV40.76f56677.0"
## [154] "NSFTV304.773e969e.0" "NSFTV23.7741d7c1.0" "NSFTV204.778cea6e.0"
## [157] "NSFTV113.7a723d9e.0" "NSFTV107.7b7a0d82.0" "NSFTV84.7bee5b9f.0"
## [160] "NSFTV222.7cfe4a0c.0" "NSFTV44.7d4c6c4e.0" "NSFTV274.7dba928e.0"
## [163] "NSFTV332.7e956f22.0" "NSFTV99.7eb7c6a8.0" "NSFTV60.7f7fb805.0"
## [166] "NSFTV289.7fc911f7.0" "NSFTV142.806c51cc.0" "NSFTV346.80fc89ae.0"
## [169] "NSFTV198.818143d8.0" "NSFTV4.81d03b86.0" "NSFTV305.8288d278.0"
## [172] "NSFTV371.84ad8457.0" "NSFTV347.853e318c.0" "NSFTV140.85551f9c.0"
## [175] "NSFTV281.85959164.0" "NSFTV218.85da3a70.0" "NSFTV104.8629f76c.0"
## [178] "NSFTV137.8653bbdb.0" "NSFTV301.86556239.0" "NSFTV313.872ec0b4.0"
## [181] "NSFTV249.87621330.0" "NSFTV641.88631879.0" "NSFTV250.88e1f162.0"
## [184] "NSFTV89.897c5eef.0" "NSFTV106.8a06320f.0" "NSFTV245.8a41a10a.0"
## [187] "NSFTV350.8adbe877.0" "NSFTV103.8c76404c.0" "NSFTV181.8cd35626.0"
## [190] "NSFTV176.8cfa8fb2.0" "NSFTV45.8d6aded4.0" "NSFTV120.8e6220e5.0"
## [193] "NSFTV167.8e941960.0" "NSFTV129.8fafd383.0" "NSFTV155.9127f236.0"
## [196] "NSFTV217.915c0033.0" "NSFTV156.93387d96.0" "NSFTV199.958ed737.0"
## [199] "NSFTV384.9764b3c8.0" "NSFTV191.97ce6839.0" "NSFTV73.97dcf87d.0"
## [202] "NSFTV334.09869491.0" "NSFTV642.986a3c37.0" "NSFTV172.9ac72c9e.0"
## [205] "NSFTV165.9b2223bb.0" "NSFTV248.9b37187d.0" "NSFTV285.9baa653c.0"
## [208] "NSFTV58.9c099b78.0" "NSFTV93.9f1f4614.0" "NSFTV101.9f782e9e.0"
## [211] "NSFTV341.9f786a86.0" "NSFTV387.a08839ce.0" "NSFTV344.a0f35768.0"
## [214] "NSFTV225.a31b4a06.0" "NSFTV284.a32e0586.0" "NSFTV348.a446becd.0"
## [217] "NSFTV349.a5a6e2dd.0" "NSFTV59.a5ec1b10.0" "NSFTV130.a796716d.0"
## [220] "NSFTV227.a7983b03.0" "NSFTV337.a7bec464.0" "NSFTV141.a8319fc6.0"
## [223] "NSFTV342.ac7a352c.0" "NSFTV267.aceb3352.0" "NSFTV638.ad1a9c05.0"
## [226] "NSFTV326.aeed4684.0" "NSFTV353.af77442b.0" "NSFTV75.b0816082.0"
## [229] "NSFTV242.b21f7528.0" "NSFTV213.b2320902.0" "NSFTV171.b238d197.0"
## [232] "NSFTV291.b29d3bf4.0" "NSFTV339.b3a301ea.0" "NSFTV268.b49ad0db.0"
## [235] "NSFTV343.b663b3f8.0" "NSFTV271.b67fd5f6.0" "NSFTV122.b6dc1bcc.0"
## [238] "NSFTV316.b828b757.0" "NSFTV317.b956dfb5.0" "NSFTV288.bc6b0af5.0"
## [241] "NSFTV652.bc9c6f80.0" "NSFTV139.bd0d322b.0" "NSFTV370.bd7aaa87.0"
## [244] "NSFTV83.becfe767.0" "NSFTV188.bf18cbae.0" "NSFTV190.bf7afed8.0"
## [247] "NSFTV260.bfa16661.0" "NSFTV183.c08a5ea2.0" "NSFTV79.c0936cf1.0"
## [250] "NSFTV307.c0d6eca3.0" "NSFTV69.c14c4e03.0" "NSFTV373.c20dfc59.0"
## [253] "NSFTV392.c4a397e5.0" "NSFTV232.c528bce0.0" "NSFTV35.c59435a8.0"
## [256] "NSFTV220.c5bf98cc.0" "NSFTV319.c7aeb39f.0" "NSFTV263.c7d2513e.0"
## [259] "NSFTV359.c86f37d0.0" "NSFTV200.c8a73fe7.0" "NSFTV185.c97be7ae.0"
## [262] "NSFTV76.cb5e38e6.0" "NSFTV257.cb9c18df.0" "NSFTV18.cbd6b346.0"
## [265] "NSFTV357.cc0ef8cf.0" "NSFTV202.ccdc6d39.0" "NSFTV261.ceb798a3.0"
## [268] "NSFTV320.cf44b628.0" "NSFTV196.cff871e8.0" "NSFTV325.d0392fe8.0"
## [271] "NSFTV131.d09d62e7.0" "NSFTV19.d22c8e33.0" "NSFTV31.d2da857d.0"
## [274] "NSFTV331.d438bfed.0" "NSFTV207.d628ec3a.0" "NSFTV290.d864377c.0"
## [277] "NSFTV368.db737b9b.0" "NSFTV237.dc2cf39b.0" "NSFTV369.dd2bfbfb.0"
## [280] "NSFTV30.dd6e755e.0" "NSFTV376.de696d95.0" "NSFTV247.de7ac7bf.0"
## [283] "NSFTV151.dee26171.0" "NSFTV41.dfb13733.0" "NSFTV154.dfdfb828.0"
## [286] "NSFTV24.e074229f.0" "NSFTV246.e0815776.0" "NSFTV363.e32c9d62.0"
## [289] "NSFTV108.e3b049a9.0" "NSFTV152.e4533c8b.0" "NSFTV43.e5951598.0"
## [292] "NSFTV367.e59cbfbe.0" "NSFTV208.e876c9ec.0" "NSFTV372.e8f708a5.0"
## [295] "NSFTV314.ea28671d.0" "NSFTV114.eac19fb8.0" "NSFTV236.eb17cda6.0"
## [298] "NSFTV636.ebb47c09.0" "NSFTV378.eca85a73.0" "NSFTV148.ed1beb9e.0"
## [301] "NSFTV55.ef8fd965.0" "NSFTV70.efbb93de.0" "NSFTV16.f0328a18.0"
## [304] "NSFTV356.f22d2dc6.0" "NSFTV365.f2e723fc.0" "NSFTV224.f2e9bed5.0"
## [307] "NSFTV264.f2f12364.0" "NSFTV262.f30e146b.0" "NSFTV635.f3d9c979.0"
## [310] "NSFTV203.f46f03bf.0" "NSFTV644.f66e275e.0" "NSFTV223.f73fa9e9.0"
## [313] "NSFTV266.f7da2506.0" "NSFTV180.f948d7b5.0" "NSFTV336.fa180a1e.0"
## [316] "NSFTV21.fa4c4111.0" "NSFTV327.fb2b3d82.0" "NSFTV310.fbae20bd.0"
## [319] "NSFTV276.fc26ce23.0" "NSFTV192.fdd98970.0" "NSFTV259.fe70ec33.0"
## [322] "NSFTV338.fed0b9a3.0" "NSFTV215.fed1dbae.0" "NSFTV395.ffde60f9.0"
## [325] "#N/A"
head(LK_GT)
# select only relevant SNP information
col.yes <- which(colnames(LK_GT) %in% pheno_id)
only_relev <- LK_GT[,col.yes]
head(only_relev)
dim(only_relev)
## [1] 12 324
Then - let’s add chromosome and position information
only_relev$Chr <- LK_GT$CHROM
only_relev$Pos <- LK_GT$POS
dim(only_relev)
## [1] 12 326
only_relev
LK_fin <- only_relev[,c(325:326,1:324)]
head(LK_fin)
write.csv(LK_fin, "Locus_K_final_file.csv", row.names = F)
Need to remove all of the NAs:
ttemp2 <- as.data.frame(LK_fin)
head(ttemp2)
# let's not select 1st two collumns:
ttemp3 <- ttemp2[,c(3:ncol(ttemp2))]
ttemp3
# now - let's transpose the shabbangle:
temp4 <- as.data.frame(t(ttemp3))
temp4
for(i in 1:nrow(temp4)){
temp4[i,] <- gsub("NA", NA, temp4[i,])
}
# get the accessions with NAs out of the data frame
nona_temp <- na.omit(temp4)
dim(nona_temp)
## [1] 233 12
head(nona_temp)
Now is the time to make a string for each genotype of the SNPs in the gene:
haplotype <- nona_temp %>%
unite("haplotype", V1:V12)
haplotype
haplotype$haplotype <- gsub("_", "", haplotype$haplotype)
haplotype$VCF_better <- rownames(haplotype)
haplotype
Finally - let’s fuse it into one file with phenotypes
colnames(pheno2)
## [1] "HDRA.ID" "NSFTV.ID" "IRGC.ID"
## [4] "Subpop" "Accession.Name" "VCF_code"
## [7] "VCF_better" "index.pots" "Variety"
## [10] "Shoot.area..cm2." "Hull.area..cm2." "Solidity"
## [13] "Perimeter..cm." "Nr.leaves" "Nr.tillers"
## [16] "Plant.height..cm." "Culm.height..cm." "Leaf.length..cm."
## [19] "DW..g." "Leaf.angle..º." "Tiller.angle..º."
## [22] "Erectness..º." "Droopiness..º." "SUM_rank_traits"
## [25] "SUM_norm_traits" "norm_SI_rank" "SI_rank"
pheno_temp <- pheno2[,c(7,9:27)]
haplotype2 <- as.data.frame(haplotype)
ultimate <- merge(pheno_temp, haplotype2, by=c("VCF_better"))
head(ultimate)
Now - let’s check if we can make some graphs! :) Exciting :)
First - we need to examine which haplotypes are represented by most accessions and which by less than 3 accessions (they will be excluded):
head(ultimate)
unique(ultimate$haplotype)
## [1] "AAAAAAAAAAAA" "AAAATAAAAAAA" "ATAAATATAAAA" "ATATATATAAAA" "ATAAATATTAAA"
## [6] "ATAAATTTAATA" "ATAATTATAAAA" "ATATTTATAAAA" "ATAAATATAAAT" "ATAAATTTAAAA"
## [11] "AAAAAAAAAAAT" "ATATATATAAAT"
# let's see which haplotype is most popular among accessions:
ultimate %>% group_by(haplotype) %>%
tally()
So - we have MANY unique haplotypes and only three haplotypes represented by three or more accessions: - AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA - most popular
So let’s include the haplotypes represented by at least three accessions:
get_lost <- c("ATAAATATAAAT", "ATAAATTTAAAA", "ATAATTATAAAA", "ATATATATAAAT")
ultimate2 <- subset(ultimate, !(ultimate$haplotype %in% get_lost))
unique(ultimate2$haplotype)
## [1] "AAAAAAAAAAAA" "AAAATAAAAAAA" "ATAAATATAAAA" "ATATATATAAAA" "ATAAATATTAAA"
## [6] "ATAAATTTAATA" "ATATTTATAAAA" "AAAAAAAAAAAT"
write.csv(ultimate, "LK_Haplotype_Os12t0557800-01_gene_and_pheno.csv", row.names = F)
ultimate2 %>% group_by(haplotype) %>%
tally()
ultimate2$haplotype <- gsub("AAAAAAAAAAAA", "hap01", ultimate2$haplotype)
ultimate2$haplotype <- gsub("ATATATATAAAA", "hap02", ultimate2$haplotype)
ultimate2$haplotype <- gsub("ATAAATATTAAA", "hap03", ultimate2$haplotype)
ultimate2$haplotype <- gsub("ATAAATATAAAA", "hap04", ultimate2$haplotype)
ultimate2$haplotype <- gsub("AAAATAAAAAAA", "hap05", ultimate2$haplotype)
ultimate2$haplotype <- gsub("ATATTTATAAAA", "hap06", ultimate2$haplotype)
ultimate2$haplotype <- gsub("AAAAAAAAAAAT", "hap07", ultimate2$haplotype)
ultimate2$haplotype <- gsub("ATAAATTTAATA", "hap08", ultimate2$haplotype)
ultimate2$haplotype <- factor(ultimate2$haplotype, levels = c("hap01", "hap02", "hap03", "hap04", "hap05", "hap06", "hap07", "hap08"))
colnames(ultimate2)
## [1] "VCF_better" "Variety" "Shoot.area..cm2."
## [4] "Hull.area..cm2." "Solidity" "Perimeter..cm."
## [7] "Nr.leaves" "Nr.tillers" "Plant.height..cm."
## [10] "Culm.height..cm." "Leaf.length..cm." "DW..g."
## [13] "Leaf.angle..º." "Tiller.angle..º." "Erectness..º."
## [16] "Droopiness..º." "SUM_rank_traits" "SUM_norm_traits"
## [19] "norm_SI_rank" "SI_rank" "haplotype"
ultimate2$Leaf.angle..º. <- as.numeric(ultimate2$Leaf.angle..º.)
ultimate2$Tiller.angle..º. <- as.numeric(ultimate2$Tiller.angle..º.)
ultimate2$Erectness..º. <- as.numeric(ultimate2$Erectness..º.)
ultimate2$Droopiness..º. <- as.numeric(ultimate2$Droopiness..º.)
Now - we can start plotting and examining whether haplotype groups show significant differences - we will compare to the most abundant haplotype
Output <- TukeyHSD(aov(aov(Shoot.area..cm2. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "a" "a" "a" "ab" "a" "b" "a" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
test
my_box_plot <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Shoot.area..cm2., colour = haplotype))
#my_box_plot <- my_box_plot + geom_boxplot()
my_box_plot <- my_box_plot + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot <- my_box_plot + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot <- my_box_plot + theme(legend.position = "none")
my_box_plot <- my_box_plot + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot <- my_box_plot + xlab("") + ylab(expression(paste("Shoot area (", cm^2, ")", sep = "")))
my_box_plot <- my_box_plot + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot <- my_box_plot + stat_pvalue_manual(test, label = "Tukey", y.position = 950)
my_box_plot
OK - let’s move on to other phenotypes:
Output <- TukeyHSD(aov(aov(Hull.area..cm2. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "a" "a" "a" "ab" "a" "b" "a" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot2 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Hull.area..cm2., colour = haplotype))
my_box_plot2 <- my_box_plot2 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot2 <- my_box_plot2 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot2 <- my_box_plot2 + theme(legend.position = "none")
my_box_plot2 <- my_box_plot2 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot2 <- my_box_plot2 + xlab("") + ylab(expression(paste("Hull area (", cm^2, ")", sep = "")))
my_box_plot2 <- my_box_plot2 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot2 <- my_box_plot2 + stat_pvalue_manual(test, label = "Tukey", y.position = 6000)
my_box_plot2
Output <- TukeyHSD(aov(aov(Solidity ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## $Letters
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "a" "a" "a" "a" "a" "a" "a" "a"
##
## $LetterMatrix
## a
## hap02 TRUE
## hap03 TRUE
## hap04 TRUE
## hap05 TRUE
## hap06 TRUE
## hap07 TRUE
## hap08 TRUE
## hap01 TRUE
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot3 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Solidity, colour = haplotype))
my_box_plot3 <- my_box_plot3 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot3 <- my_box_plot3 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot3 <- my_box_plot3 + theme(legend.position = "none")
my_box_plot3 <- my_box_plot3 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot3 <- my_box_plot3 + xlab("") + ylab("Solidity (a.u.)")
my_box_plot3 <- my_box_plot3 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot3 <- my_box_plot3 + stat_pvalue_manual(test, label = "Tukey", y.position = 0.21)
my_box_plot3
Output <- TukeyHSD(aov(aov(Perimeter..cm. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "a" "bc" "b" "acd" "abc" "ad" "abcd" "d"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot4 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Perimeter..cm., colour = haplotype))
my_box_plot4 <- my_box_plot4 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot4 <- my_box_plot4 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot4 <- my_box_plot4 + theme(legend.position = "none")
my_box_plot4 <- my_box_plot4 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot4 <- my_box_plot4 + xlab("") + ylab("Perimeter (cm)")
my_box_plot4 <- my_box_plot4 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot4 <- my_box_plot4 + stat_pvalue_manual(test, label = "Tukey", y.position = 2500)
my_box_plot4
Output <- TukeyHSD(aov(aov(Nr.leaves ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "ab" "c" "c" "abd" "abc" "ad" "bc" "d"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot5 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Nr.leaves, colour = haplotype))
my_box_plot5 <- my_box_plot5 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot5 <- my_box_plot5 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot5 <- my_box_plot5 + theme(legend.position = "none")
my_box_plot5 <- my_box_plot5 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot5 <- my_box_plot5 + xlab("") + ylab("Leaf number")
my_box_plot5 <- my_box_plot5 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot5 <- my_box_plot5 + stat_pvalue_manual(test, label = "Tukey", y.position = 55)
my_box_plot5
Output <- TukeyHSD(aov(aov(Nr.tillers ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "a" "b" "b" "ac" "abc" "ac" "ab" "c"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot6 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Nr.tillers, colour = haplotype))
my_box_plot6 <- my_box_plot6 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot6 <- my_box_plot6 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot6 <- my_box_plot6 + theme(legend.position = "none")
my_box_plot6 <- my_box_plot6 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot6 <- my_box_plot6 + xlab("") + ylab("Tiller number")
my_box_plot6 <- my_box_plot6 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot6 <- my_box_plot6 + stat_pvalue_manual(test, label = "Tukey", y.position = 17)
my_box_plot6
Output <- TukeyHSD(aov(aov(Plant.height..cm. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "a" "ab" "a" "a" "a" "b" "a" "a"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot7 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Plant.height..cm., colour = haplotype))
my_box_plot7 <- my_box_plot7 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot7 <- my_box_plot7 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot7 <- my_box_plot7 + theme(legend.position = "none")
my_box_plot7 <- my_box_plot7 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot7 <- my_box_plot7 + xlab("") + ylab("Plant height (cm)")
my_box_plot7 <- my_box_plot7 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot7 <- my_box_plot7 + stat_pvalue_manual(test, label = "Tukey", y.position = 95)
my_box_plot7
Output <- TukeyHSD(aov(aov(Culm.height..cm. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "ab" "a" "ab" "ab" "ab" "ab" "ab" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot8 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Culm.height..cm., colour = haplotype))
my_box_plot8 <- my_box_plot8 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot8 <- my_box_plot8 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot8 <- my_box_plot8 + theme(legend.position = "none")
my_box_plot8 <- my_box_plot8 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot8 <- my_box_plot8 + xlab("") + ylab("Culm height (cm)")
my_box_plot8 <- my_box_plot8 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot8 <- my_box_plot8 + stat_pvalue_manual(test, label = "Tukey", y.position = 35)
my_box_plot8
Output <- TukeyHSD(aov(aov(Leaf.length..cm. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "a" "ab" "a" "ab" "a" "b" "a" "ab"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot9 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Leaf.length..cm., colour = haplotype))
my_box_plot9 <- my_box_plot9 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot9 <- my_box_plot9 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot9 <- my_box_plot9 + theme(legend.position = "none")
my_box_plot9 <- my_box_plot9 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot9 <- my_box_plot9 + xlab("") + ylab("Leaf length (cm)")
my_box_plot9 <- my_box_plot9 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot9 <- my_box_plot9 + stat_pvalue_manual(test, label = "Tukey", y.position = 65)
my_box_plot9
Output <- TukeyHSD(aov(aov(DW..g. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "a" "ab" "b" "ac" "ab" "c" "ab" "c"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot10 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = DW..g., colour = haplotype))
my_box_plot10 <- my_box_plot10 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot10 <- my_box_plot10 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot10 <- my_box_plot10 + theme(legend.position = "none")
my_box_plot10 <- my_box_plot10 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot10 <- my_box_plot10 + xlab("") + ylab("Dry weight (g)")
my_box_plot10 <- my_box_plot10 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot10 <- my_box_plot10 + stat_pvalue_manual(test, label = "Tukey", y.position = 4)
my_box_plot10
Output <- TukeyHSD(aov(aov(Leaf.angle..º. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## $Letters
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "a" "a" "a" "a" "a" "a" "a" "a"
##
## $LetterMatrix
## a
## hap02 TRUE
## hap03 TRUE
## hap04 TRUE
## hap05 TRUE
## hap06 TRUE
## hap07 TRUE
## hap08 TRUE
## hap01 TRUE
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot11 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Leaf.angle..º., colour = haplotype))
my_box_plot11 <- my_box_plot11 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot11 <- my_box_plot11 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot11 <- my_box_plot11 + theme(legend.position = "none")
my_box_plot11 <- my_box_plot11 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot11 <- my_box_plot11 + xlab("") + ylab("Leaf angle (º)")
my_box_plot11 <- my_box_plot11 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot11 <- my_box_plot11 + stat_pvalue_manual(test, label = "Tukey", y.position = 65)
my_box_plot11
Output <- TukeyHSD(aov(aov(Tiller.angle..º. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "a" "ab" "b" "abc" "ac" "ac" "abc" "c"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot12 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Tiller.angle..º., colour = haplotype))
my_box_plot12 <- my_box_plot12 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot12 <- my_box_plot12 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot12 <- my_box_plot12 + theme(legend.position = "none")
my_box_plot12 <- my_box_plot12 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot12 <- my_box_plot12 + xlab("") + ylab("Tiller angle (º)")
my_box_plot12 <- my_box_plot12 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot12 <- my_box_plot12 + stat_pvalue_manual(test, label = "Tukey", y.position = 37)
my_box_plot12
Output <- TukeyHSD(aov(aov(Erectness..º. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "a" "ab" "ab" "ab" "ab" "ab" "ab" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot13 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Erectness..º., colour = haplotype))
my_box_plot13 <- my_box_plot13 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot13 <- my_box_plot13 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot13 <- my_box_plot13 + theme(legend.position = "none")
my_box_plot13 <- my_box_plot13 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot13 <- my_box_plot13 + xlab("") + ylab("Erectness (º)")
my_box_plot13 <- my_box_plot13 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot13 <- my_box_plot13 + stat_pvalue_manual(test, label = "Tukey", y.position = 165)
my_box_plot13
Output <- TukeyHSD(aov(aov(Droopiness..º. ~ haplotype, data = ultimate2)))
P4 = Output$haplotype[,'p adj']
stat.test<- multcompLetters(P4)
stat.test
## hap02 hap03 hap04 hap05 hap06 hap07 hap08 hap01
## "a" "ab" "ab" "ab" "ab" "ab" "ab" "b"
test <- as.data.frame(stat.test$Letters)
test$group1 <- rownames(test)
test$group2 <- rownames(test)
colnames(test)[1] <- "Tukey"
my_box_plot14 <- ggplot(data = ultimate2, mapping = aes(x = haplotype, y = Droopiness..º., colour = haplotype))
my_box_plot14 <- my_box_plot14 + geom_beeswarm(alpha=0.6, priority = "density")
my_box_plot14 <- my_box_plot14 + stat_summary(fun.y=mean, geom="point", shape=95, size=6, color="black", fill="black")
## Warning: `fun.y` is deprecated. Use `fun` instead.
my_box_plot14 <- my_box_plot14 + theme(legend.position = "none")
my_box_plot14 <- my_box_plot14 + scale_colour_manual(values = c("steelblue", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3", "firebrick3"))
my_box_plot14 <- my_box_plot14 + xlab("") + ylab("Droopiness (º)")
my_box_plot14 <- my_box_plot14 + theme(axis.text.x = element_text(angle=90, hjust=0.9, vjust=0.5))
my_box_plot14 <- my_box_plot14 + stat_pvalue_manual(test, label = "Tukey", y.position = 170)
my_box_plot14
Let’s combine all of these figures into one pdf:
pdf("Locus_K_Haplotypes_Os12t0557800-0.pdf", height = 30, width = 12)
plot_grid(my_box_plot, my_box_plot2, my_box_plot3, my_box_plot4, my_box_plot5, my_box_plot6, my_box_plot7, my_box_plot8,
my_box_plot9, my_box_plot10, my_box_plot11, my_box_plot12, my_box_plot13, my_box_plot14,
ncol=2,
align = "hv", labels=c("AUTO"),
label_size = 24)
dev.off()
## quartz_off_screen
## 2