Load stuff
## Warning: package 'cowplot' was built under R version 3.2.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.4
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, last
## Warning: package 'ggmap' was built under R version 3.2.3
##
## Attaching package: 'ggmap'
## The following object is masked from 'package:cowplot':
##
## theme_nothing
Get phenotype, pedigree and genotype data
download.file("https://gist.githubusercontent.com/rossibarra/96ab9d5bc4c13fdcad3a4b189c6215c5/raw/dd69c989915ced1181479845c18da676e313e06e/ab10_phenos","ab10_phenos.txt")
download.file("https://ndownloader.figshare.com/files/5343103?private_link=b16f322f87fbfbcf114b","chr10_v2_gbs2.7_ab10.hmp.txt.gz")
download.file("https://gist.githubusercontent.com/rossibarra/e106872ae538feae6de4bd586b820317/raw/e1c36325395d726ad46a6407445e9ebe3d43a012/ab10_pedigrees","ab10_pedigree.txt")
Using Tassel 5.2.16 to identify markers associated with the R1 locus (tightly linked to Ab10).
Impute
#Note to self: doesn't work beyond Mb148. Not sure why? Those SNPs filtered out?
system("~/src/tassel-5-standalone/run_pipeline.pl -Xmx6g -h chr10_v2_gbs2.7_ab10.hmp.txt.gz -filterAlign -filterAlignMinCount 10 -filterAlignMinFreq 0.01 -filterAlignMaxFreq 0.99 -FSFHapImputationPlugin -pedigrees ab10_pedigree.txt -minHap 2 -windowLD true -phet 0.15 -bc false -maxMissing 0.6 -minMaf 0.01 -logfile logfile.txt -endPLugin -export impute_chr10")
Join imputed datasets and do PCA
system("~/src/tassel-5-standalone/run_pipeline.pl -Xmx6g -fork1 -h impute_chr103.hmp.txt -fork2 -h impute_chr104.hmp.txt -combine3 -input1 -input2 -mergeGenotypeTables -filterAlign -filterAlignStartPos 1 -filterAlignEndPos 138000000 -PrincipalComponentsPlugin -covariance true -ncomponents 3 -endPlugin -export PCall10 -runfork1 -runfork2")
Filter for end, impute, export to new joined file
system("~/src/tassel-5-standalone/run_pipeline.pl -Xmx6g -fork1 -h impute_chr103.hmp.txt -fork2 -h impute_chr104.hmp.txt -combine3 -input1 -input2 -mergeGenotypeTables -filterAlign -filterAlignStartPos 138000000 -filterAlignEndPos 151000000 -runfork1 -runfork2 -export combined_end10")
Join PCA with phenotype file and genotype to run simple GLM with PC’s as covariates
system("~/src/tassel-5-standalone/run_pipeline.pl -Xmx6g -fork1 -h combined_end10.hmp.txt -fork2 -importGuess ab10_phenos.txt -fork3 -importGuess PCall101.txt -combineA -input1 -input2 -input3 -intersect -FixedEffectLMPlugin -saveToFile true -siteFile sites.txt -alleleFile alleles.txt -endPlugin -runfork1 -runfork2 -runfork3")
sites_glm<-read.table("sites.txt",header=T)
alleles_glm<-read.table("alleles.txt",header=T)
all_glm=merge(sites_glm,alleles_glm,by=c("Marker","Trait")) %>%
mutate(Pos=Pos.x/1E6,Chr=Chr.x,p=-1*log10(p+10^-100)) %>%
select(-c(Chr.x,Pos.x,Chr.y,Pos.y))
Plot -log10 p-value for each trait, size is \(R^2\), red line is R1 locus
ggplot(all_glm,aes(x=Pos,y=p,color=Trait,size=marker_Rsq))+
ylab("-log10 p")+xlab("Mb")+geom_point(alpha=0.5)+
scale_color_manual(values=cbPalette[2:3])+
geom_vline(xintercept=138.4, linetype="dashed", color = "red", size=1)
## Warning: Removed 176 rows containing missing values (geom_point).
Find marker with best \(R^2\), find out information on these alleles.
For R. Tons of markers. S10_147009651 C is N10, A is Ab10
filter(all_glm,marker_Rsq>0.5,marker_Rsq != "Inf",Trait=="R") %>%
select(Marker,Pos,p,marker_Rsq,Obs,Allele) %>% arrange(desc(marker_Rsq))
## Marker Pos p marker_Rsq Obs Allele
## 1 S10_140958484 140.9585 12.67330 0.5534540 20 C
## 2 S10_140958484 140.9585 12.67330 0.5534540 31 Y
## 3 S10_140958484 140.9585 12.67330 0.5534540 8 T
## 4 S10_144679393 144.6794 12.07764 0.5466994 18 G
## 5 S10_144679393 144.6794 12.07764 0.5466994 31 K
## 6 S10_144679393 144.6794 12.07764 0.5466994 8 T
## 7 S10_144679394 144.6794 12.07764 0.5466994 18 T
## 8 S10_144679394 144.6794 12.07764 0.5466994 31 Y
## 9 S10_144679394 144.6794 12.07764 0.5466994 8 C
## 10 S10_144679414 144.6794 12.07764 0.5466994 18 G
## 11 S10_144679414 144.6794 12.07764 0.5466994 31 S
## 12 S10_144679414 144.6794 12.07764 0.5466994 8 C
## 13 S10_139877027 139.8770 24.27655 0.5422498 14 G
## 14 S10_139877027 139.8770 24.27655 0.5422498 15 T
## 15 S10_139877027 139.8770 24.27655 0.5422498 53 K
## 16 S10_141962838 141.9628 12.29636 0.5392099 8 G
## 17 S10_141962838 141.9628 12.29636 0.5392099 15 C
## 18 S10_141962838 141.9628 12.29636 0.5392099 31 R
## 19 S10_141962838 141.9628 12.29636 0.5392099 8 A
## 20 S10_143063621 143.0636 13.22369 0.5391788 23 C
## 21 S10_143063621 143.0636 13.22369 0.5391788 31 S
## 22 S10_143063621 143.0636 13.22369 0.5391788 8 G
## 23 S10_143247688 143.2477 13.22369 0.5391788 23 G
## 24 S10_143247688 143.2477 13.22369 0.5391788 31 R
## 25 S10_143247688 143.2477 13.22369 0.5391788 8 A
## 26 S10_143716234 143.7162 13.22369 0.5391788 23 A
## 27 S10_143716234 143.7162 13.22369 0.5391788 31 R
## 28 S10_143716234 143.7162 13.22369 0.5391788 8 G
## 29 S10_144312096 144.3121 13.22369 0.5391788 23 A
## 30 S10_144312096 144.3121 13.22369 0.5391788 31 R
## 31 S10_144312096 144.3121 13.22369 0.5391788 8 G
## 32 S10_144462515 144.4625 13.22369 0.5391788 23 C
## 33 S10_144462515 144.4625 13.22369 0.5391788 31 Y
## 34 S10_144462515 144.4625 13.22369 0.5391788 8 T
## 35 S10_145611012 145.6110 13.22369 0.5391788 23 G
## 36 S10_145611012 145.6110 13.22369 0.5391788 31 K
## 37 S10_145611012 145.6110 13.22369 0.5391788 8 T
## 38 S10_146569798 146.5698 13.22369 0.5391788 23 T
## 39 S10_146569798 146.5698 13.22369 0.5391788 31 K
## 40 S10_146569798 146.5698 13.22369 0.5391788 8 G
## 41 S10_146569799 146.5698 13.22369 0.5391788 23 C
## 42 S10_146569799 146.5698 13.22369 0.5391788 31 Y
## 43 S10_146569799 146.5698 13.22369 0.5391788 8 T
## 44 S10_147049501 147.0495 13.22369 0.5391788 23 C
## 45 S10_147049501 147.0495 13.22369 0.5391788 31 Y
## 46 S10_147049501 147.0495 13.22369 0.5391788 8 T
## 47 S10_147123126 147.1231 13.22369 0.5391788 23 A
## 48 S10_147123126 147.1231 13.22369 0.5391788 31 R
## 49 S10_147123126 147.1231 13.22369 0.5391788 8 G
## 50 S10_148410465 148.4105 13.22369 0.5391788 23 G
## 51 S10_148410465 148.4105 13.22369 0.5391788 31 R
## 52 S10_148410465 148.4105 13.22369 0.5391788 8 A
## 53 S10_148410466 148.4105 13.22369 0.5391788 23 C
## 54 S10_148410466 148.4105 13.22369 0.5391788 31 S
## 55 S10_148410466 148.4105 13.22369 0.5391788 8 G
## 56 S10_148410468 148.4105 13.22369 0.5391788 23 G
## 57 S10_148410468 148.4105 13.22369 0.5391788 31 K
## 58 S10_148410468 148.4105 13.22369 0.5391788 8 T
## 59 S10_148577979 148.5780 13.22369 0.5391788 23 C
## 60 S10_148577979 148.5780 13.22369 0.5391788 31 Y
## 61 S10_148577979 148.5780 13.22369 0.5391788 8 T
## 62 S10_148817784 148.8178 13.22369 0.5391788 23 T
## 63 S10_148817784 148.8178 13.22369 0.5391788 31 Y
## 64 S10_148817784 148.8178 13.22369 0.5391788 8 C
## 65 S10_149029683 149.0297 13.22369 0.5391788 23 G
## 66 S10_149029683 149.0297 13.22369 0.5391788 31 0
## 67 S10_149029683 149.0297 13.22369 0.5391788 8 -
## 68 S10_141500118 141.5001 10.87162 0.5297516 14 G
## 69 S10_141500118 141.5001 10.87162 0.5297516 31 S
## 70 S10_141500118 141.5001 10.87162 0.5297516 8 C
## 71 S10_142367974 142.3680 10.20304 0.5146448 8 A
## 72 S10_142367974 142.3680 10.20304 0.5146448 31 R
## 73 S10_142367974 142.3680 10.20304 0.5146448 15 G
For Ab10-I vs Ab10-II. In spite of below S10_141959097 is best. Ab10-2 has a G, all other (N10 and Ab10-I has an A)
filter(all_glm,marker_Rsq>0.4,marker_Rsq != "Inf",Trait=="type") %>%
select(Marker,Pos,p,marker_Rsq,Obs,Allele) %>% arrange(desc(marker_Rsq))
## Marker Pos p marker_Rsq Obs Allele
## 1 S10_147572004 147.5720 2.378086 0.4620195 12 G
## 2 S10_147572004 147.5720 2.378086 0.4620195 1 -
## 3 S10_142365006 142.3650 9.456322 0.4225598 12 C
## 4 S10_142365006 142.3650 9.456322 0.4225598 15 A
## 5 S10_142365006 142.3650 9.456322 0.4225598 22 M
## 6 S10_142365008 142.3650 9.456322 0.4225598 12 A
## 7 S10_142365008 142.3650 9.456322 0.4225598 15 G
## 8 S10_142365008 142.3650 9.456322 0.4225598 22 R
## 9 S10_142365016 142.3650 9.456322 0.4225598 12 T
## 10 S10_142365016 142.3650 9.456322 0.4225598 15 A
## 11 S10_142365016 142.3650 9.456322 0.4225598 22 W
## 12 S10_148432972 148.4330 9.264924 0.4158316 10 T
## 13 S10_148432972 148.4330 9.264924 0.4158316 16 A
## 14 S10_148432972 148.4330 9.264924 0.4158316 22 W
## 15 S10_148432979 148.4330 9.264924 0.4158316 10 A
## 16 S10_148432979 148.4330 9.264924 0.4158316 16 T
## 17 S10_148432979 148.4330 9.264924 0.4158316 22 W
## 18 S10_138484548 138.4845 9.962950 0.4130784 15 G
## 19 S10_138484548 138.4845 9.962950 0.4130784 15 A
## 20 S10_138484548 138.4845 9.962950 0.4130784 22 R
## 21 S10_141959360 141.9594 8.271540 0.4111276 8 T
## 22 S10_141959360 141.9594 8.271540 0.4111276 31 0
## 23 S10_141959360 141.9594 8.271540 0.4111276 8 +
## 24 S10_141959361 141.9594 8.271540 0.4111276 8 G
## 25 S10_141959361 141.9594 8.271540 0.4111276 31 0
## 26 S10_141959361 141.9594 8.271540 0.4111276 8 +
## 27 S10_141959367 141.9594 8.271540 0.4111276 8 C
## 28 S10_141959367 141.9594 8.271540 0.4111276 31 0
## 29 S10_141959367 141.9594 8.271540 0.4111276 8 +
## 30 S10_141959369 141.9594 8.271540 0.4111276 8 A
## 31 S10_141959369 141.9594 8.271540 0.4111276 31 0
## 32 S10_141959369 141.9594 8.271540 0.4111276 8 +
## 33 S10_142331721 142.3317 8.271540 0.4111276 8 T
## 34 S10_142331721 142.3317 8.271540 0.4111276 31 Y
## 35 S10_142331721 142.3317 8.271540 0.4111276 8 C
## 36 S10_145284375 145.2844 8.271540 0.4111276 8 A
## 37 S10_145284375 145.2844 8.271540 0.4111276 31 M
## 38 S10_145284375 145.2844 8.271540 0.4111276 8 C
## 39 S10_145284381 145.2844 8.271540 0.4111276 8 C
## 40 S10_145284381 145.2844 8.271540 0.4111276 31 0
## 41 S10_145284381 145.2844 8.271540 0.4111276 8 -
## 42 S10_141835607 141.8356 10.533010 0.4052642 18 G
## 43 S10_141835607 141.8356 10.533010 0.4052642 15 T
## 44 S10_141835607 141.8356 10.533010 0.4052642 22 K
## 45 S10_141835646 141.8356 10.533010 0.4052642 18 G
## 46 S10_141835646 141.8356 10.533010 0.4052642 15 A
## 47 S10_141835646 141.8356 10.533010 0.4052642 22 R
## 48 S10_145279161 145.2792 9.887381 0.4038456 13 G
## 49 S10_145279161 145.2792 9.887381 0.4038456 15 A
## 50 S10_145279161 145.2792 9.887381 0.4038456 22 R
## 51 S10_141478608 141.4786 10.660575 0.4033787 10 A
## 52 S10_141478608 141.4786 10.660575 0.4033787 23 C
## 53 S10_141478608 141.4786 10.660575 0.4033787 22 M
## 54 S10_141478637 141.4786 10.660575 0.4033787 10 T
## 55 S10_141478637 141.4786 10.660575 0.4033787 23 C
## 56 S10_141478637 141.4786 10.660575 0.4033787 22 Y
## 57 S10_143716868 143.7169 10.660575 0.4033787 10 T
## 58 S10_143716868 143.7169 10.660575 0.4033787 23 C
## 59 S10_143716868 143.7169 10.660575 0.4033787 22 Y
## 60 S10_143716869 143.7169 10.660575 0.4033787 10 C
## 61 S10_143716869 143.7169 10.660575 0.4033787 23 -
## 62 S10_143716869 143.7169 10.660575 0.4033787 22 0
## 63 S10_143716917 143.7169 10.660575 0.4033787 10 C
## 64 S10_143716917 143.7169 10.660575 0.4033787 23 T
## 65 S10_143716917 143.7169 10.660575 0.4033787 22 Y
## 66 S10_144739272 144.7393 11.284108 0.4003194 21 G
## 67 S10_144739272 144.7393 11.284108 0.4003194 15 C
## 68 S10_144739272 144.7393 11.284108 0.4003194 22 S
## 69 S10_144739284 144.7393 11.284108 0.4003194 21 T
## 70 S10_144739284 144.7393 11.284108 0.4003194 15 C
## 71 S10_144739284 144.7393 11.284108 0.4003194 22 Y
## 72 S10_144739296 144.7393 11.284108 0.4003194 21 T
## 73 S10_144739296 144.7393 11.284108 0.4003194 15 A
## 74 S10_144739296 144.7393 11.284108 0.4003194 22 W
Join imputed datasets and do PCA
system("~/src/tassel-5-standalone/run_pipeline.pl -Xmx6g -fork1 -h combined_end10.hmp.txt -filterAlign -filterAlignStartPos 148000000 -filterAlignEndPos 149000000 -PrincipalComponentsPlugin -covariance true -ncomponents 3 -endPlugin -export PCinvert -runfork1 ")
Plot. Shows 3 clusters suggesting inversion, and PC2 presumable separates Ab10-1 and Ab10-2. Maybe real difference, but may be due to fact they were imputed separately. Value of 3 here is N10
endinversion<-read.table("PCinvert1.txt",skip=2,header=T) %>% mutate(ab10type=ifelse(grepl("r",Taxa),3,ifelse(grepl("AB10II",Taxa),2,ifelse(grepl("AB10I",Taxa),1,ifelse(grepl("W23",Taxa),1,ifelse(grepl("AB20I",Taxa),1,99)))))) %>% mutate(ab10type=factor(ab10type))
ggplot(endinversion,aes(y=PC2,x=PC1,color=factor(ab10type)))+geom_point(,alpha=0.5,size=3)
Read in ch10 palmar chico
system("~/src/tassel-5-standalone/run_pipeline.pl -Xmx6g -h teo_chrom10.hmp.txt.gz -filterAlign -filterAlignStartPos 144000000 -filterAlignEndPos 151000000 -export palmarchico_end10 -exportType Table" )
Get data on mom/dad for each offspring:
download.file("https://gist.githubusercontent.com/rossibarra/494112881b623c80888606321dbe6364/raw/3e9045295f1304ad3ca4b64d5f354e939fa4c92b/mom_dad_ab10_palmar_chico","momdad.txt")
momdad<-read.table("momdad.txt",header=T)
Going with S10_144679393 since that’s the top marker. Get dad, modify taxa names, join with parentage data.
pchico<-data.frame(fread("palmarchico_end10.txt"))
colnames(pchico)=sapply(pchico[1,],function(x) paste("x",x,sep=""))
pchico=pchico[-1,]
taxa=as.vector(sapply(pchico$xTaxa,function(x) strsplit(x,":")[[1]][1]))
pchico_new=cbind(taxa,pchico) %>%
select(taxa,x144679393)
pchico_new$taxa=str_replace(pchico_new$taxa,"_[123]","") %>%
str_replace("_mrg","")
palmarchico<-merge(pchico_new,momdad,by.x="taxa",by.y="Plot") %>%
mutate(Ab10=ifelse(x144679393=="K",1,ifelse(x144679393=="T",2,0)))
Maternal in self progeny
moms=filter(palmarchico,x144679393!="N", Cross=="self") %>%
group_by(GBSMom) %>%
summarize(ab10_alleles=sum(Ab10),kids=n()) %>%
mutate(frac=ab10_alleles/(2*kids)) %>%
filter(frac>0) %>%
arrange(desc(frac)) %>%
data.frame()
moms
## GBSMom ab10_alleles kids frac
## 1 PC_J10_ID1 2 1 1.00000000
## 2 PC_J12_ID1 12 6 1.00000000
## 3 PC_J50_ID1 20 10 1.00000000
## 4 PC_L12_ID2 30 15 1.00000000
## 5 PC_N11_ID1 2 1 1.00000000
## 6 PC_N13_ID1 14 7 1.00000000
## 7 PC_N14_ID2 6 3 1.00000000
## 8 PC_I50_ID2 22 12 0.91666667
## 9 PC_I11_ID2 31 19 0.81578947
## 10 PC_M15_ID1 20 14 0.71428571
## 11 PC_N48_ID1 21 15 0.70000000
## 12 PC_O59_ID2 25 21 0.59523810
## 13 PC_N09_ID1 8 7 0.57142857
## 14 PC_L56_ID1 27 25 0.54000000
## 15 PC_I05_ID1 6 6 0.50000000
## 16 PC_I58_ID1 6 6 0.50000000
## 17 PC_I06_ID1 1 2 0.25000000
## 18 PC_L48_ID2 2 20 0.05000000
## 19 PC_N57_ID2 2 27 0.03703704
What’s the mean (weighted by kids) percent transmission of Ab10?
moms=filter(moms,kids>5) %>% mutate(wfrac=frac*kids)
sum(moms$wfrac)/sum(moms$kids)
## [1] 0.5857143
Paternal in outcross progeny
dads=filter(palmarchico,x144679393!="N", Cross=="outcross") %>%
group_by(GBSDad) %>%
summarize(ab10_alleles=sum(Ab10),kids=n()) %>%
mutate(frac=ab10_alleles/(2*kids)) %>%
filter(frac>0) %>%
arrange(desc(frac)) %>%
data.frame()
dads
## GBSDad ab10_alleles kids frac
## 1 PC_N11_ID1 2 1 1.00000000
## 2 PC_J12_ID1 8 5 0.80000000
## 3 PC_J50_ID1 17 11 0.77272727
## 4 PC_I05_ID1 3 2 0.75000000
## 5 PC_N13_ID1 43 31 0.69354839
## 6 PC_M58_ID1 4 3 0.66666667
## 7 PC_L56_ID1 42 33 0.63636364
## 8 PC_N09_ID1 24 22 0.54545455
## 9 PC_I11_ID2 14 13 0.53846154
## 10 PC_K55_ID1 3 3 0.50000000
## 11 PC_N10_ID1 1 1 0.50000000
## 12 PC_L12_ID2 13 15 0.43333333
## 13 PC_J14_ID1 6 7 0.42857143
## 14 PC_M59_ID1 10 12 0.41666667
## 15 PC_J08_ID1 15 19 0.39473684
## 16 PC_I58_ID1 11 14 0.39285714
## 17 PC_N07_ID1 30 39 0.38461538
## 18 PC_M15_ID1 6 8 0.37500000
## 19 PC_I50_ID2 20 27 0.37037037
## 20 PC_L08_ID1 21 31 0.33870968
## 21 PC_I11_ID1 2 3 0.33333333
## 22 PC_I52_ID1 4 6 0.33333333
## 23 PC_O08_ID1 2 3 0.33333333
## 24 PC_I06_ID1 26 40 0.32500000
## 25 PC_K55_ID2 15 25 0.30000000
## 26 PC_N48_ID1 3 5 0.30000000
## 27 PC_L48_ID1 4 7 0.28571429
## 28 PC_O59_ID2 8 17 0.23529412
## 29 PC_I08_ID1 5 11 0.22727273
## 30 PC_J14_ID2 4 11 0.18181818
## 31 PC_J07_ID1 2 6 0.16666667
## 32 PC_K60_ID1 6 18 0.16666667
## 33 PC_J04_ID1 7 25 0.14000000
## 34 PC_L14_ID1 1 4 0.12500000
## 35 PC_N56_ID1 1 6 0.08333333
## 36 PC_N04_ID1 5 31 0.08064516
## 37 PC_M05_ID1 7 46 0.07608696
## 38 PC_L12_ID1 5 35 0.07142857
## 39 PC_N58_ID2 5 35 0.07142857
## 40 PC_N07_ID2 2 19 0.05263158
## 41 PC_N57_ID2 2 19 0.05263158
## 42 PC_K02_ID1 1 10 0.05000000
## 43 PC_N58_ID1 1 14 0.03571429
## 44 PC_L06_ID2 1 22 0.02272727
## 45 PC_J48_ID2 1 23 0.02173913
What’s the mean (weighted by kids) percent transmission of Ab10?
dads=mutate(dads,wfrac=frac*kids)
sum(dads$wfrac)/sum(dads$kids)
## [1] 0.2798103
Haven’t checked for AB10-1 vs 2. Top SNPs
Top markers for Ab10-1 vs 2.
filter(all_glm,marker_Rsq>0.45,marker_Rsq != "Inf",Trait=="type") %>% select(Marker) %>% unlist() %>% str_replace("S10_","")
## [1] "147572004" "147572004"
Get genotype data. Warning: this part not reproducible as data cannot be shared. Data is free to download at Seeds of Discovery website. Output as table to read in and check for appropriate SNPs
system("~/src/tassel-5-standalone/run_pipeline.pl -h ~/Projects/inversion_seeds/data/AllZeaGBSv2.7_SEED_Beagle4_chr10.hmp.txt -filterAlign -filterAlignStartPos 144000000 -filterAlignEndPos 151000000 -export seeds -exportType Table")
Find taxa with Ab10 SNPs. Going with S10_144679393 since that’s the best marker.
seed_genos<-data.frame(fread("seeds.txt"))
colnames(seed_genos)=sapply(seed_genos[1,],function(x) paste("x",x,sep=""))
seed_genos=seed_genos[-1,]
seeds_taxa=as.vector(sapply(seed_genos$xTaxa,function(x) strsplit(x,":")[[1]][1]))
seed_genos$xTaxa=seeds_taxa
#set Ab10 genotypes for S10_144679393
abgeno<-c("T","K")
ab10_seeds<-select(seed_genos,xTaxa,x144679393) %>% mutate(ab10=ifelse(x144679393 == "K","HET",ifelse(x144679393=="T","AB10","N10")))
head(ab10_seeds)
## xTaxa x144679393 ab10
## 1 SEEDGWAS1 G N10
## 2 SEEDGWAS10 G N10
## 3 SEEDGWAS1000 G N10
## 4 SEEDGWAS1001 G N10
## 5 SEEDGWAS1003 G N10
## 6 SEEDGWAS1004 G N10
Get SeeDs passport data. First get ID conversion.
download.file("https://gist.githubusercontent.com/rossibarra/74cb879eb8691a8302eb20cce7ef01b9/raw/37e97f5f58a40fab6863182790781a0525af4a08/gistfile1.txt","idlist.txt")
idlist<-read.table("idlist.txt",header=T)
head(idlist)
## Sample_ID GID
## 1 SEEDGWAS1 273
## 2 SEEDGWAS10 5509
## 3 SEEDGWAS1000 27004
## 4 SEEDGWAS1001 27012
## 5 SEEDGWAS1003 27086
## 6 SEEDGWAS1004 27110
Then main passport data. Merge assuming “general identifier” == “GID”. Not sure what “id” field is?
download.file("https://gist.githubusercontent.com/rossibarra/ae50aadb26418c22a6d7b17cfdd7aee6/raw/eca28582af70ee145f566df0b81d775fa77bad0a/gistfile1.txt","passport.txt")
pass<-read.table("passport.txt",header=T,sep="\t")
seeds_acc<-data.frame(merge(pass,idlist,by.x="general_identifier",by.y="GID"))
seeds_acc<-merge(seeds_acc,ab10_seeds,by.x="Sample_ID",by.y="xTaxa",all.x=TRUE)
head(seeds_acc)
## Sample_ID general_identifier id name bank_number
## 1 SEEDGWAS10 5509 195 5509 CIMMYTMA-BANK-000459
## 2 SEEDGWAS1000 27004 3491 27004 CIMMYTMA-BANK-017506
## 3 SEEDGWAS1001 27012 3499 27012 CIMMYTMA-BANK-017569
## 4 SEEDGWAS1003 27086 3542 27086 CIMMYTMA-BANK-017296
## 5 SEEDGWAS1004 27110 3553 27110 CIMMYTMA-BANK-018072
## 6 SEEDGWAS1006 27507 3652 27507 CIMMYTMA-BANK-021646
## collnumb colldate locations_latitude locations_longitude x144679393
## 1 VERA133 1/1/52 19.93863 -96.87067 G
## 2 SNLP162 21.83299 -99.63423 G
## 3 SNLP163 22.02337 -99.65377 G
## 4 YUCA34 1/1/48 20.48543 -89.73254 G
## 5 VERA240 1/1/43 20.96905 -97.38226 G
## 6 PAZM01044 2/24/98 -23.26798 -57.30910 G
## ab10
## 1 N10
## 2 N10
## 3 N10
## 4 N10
## 5 N10
## 6 N10
Get map
locale <- c(min(seeds_acc$locations_longitude)-2, min(seeds_acc$locations_latitude)-2, max(seeds_acc$locations_longitude)+2, max(seeds_acc$locations_latitude)+2)
myMap <- get_map(location=locale,source="stamen", maptype="watercolor",zoom=4)
## Map from URL : http://tile.stamen.com/watercolor/4/2/6.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/3/6.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/4/6.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/5/6.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/6/6.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/2/7.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/3/7.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/4/7.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/5/7.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/6/7.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/2/8.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/3/8.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/4/8.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/5/8.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/6/8.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/2/9.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/3/9.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/4/9.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/5/9.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/6/9.jpg
#myMap <- get_map(location=locale,source="google", maptype="satellite",zoom=3)
Plot
ggmap(myMap) +
geom_point(aes(x = locations_longitude, y = locations_latitude,color=ab10),alpha=1,size=0.5,data = filter(seeds_acc,ab10=="N10")) +
geom_point(aes(x = locations_longitude, y = locations_latitude,color=ab10),alpha=1,size=1,data = filter(seeds_acc,ab10 %in% c("AB10","HET"))) +
scale_color_manual(values=c("N10" = cbPalette[9], "HET" = cbPalette[2], "AB10" = cbPalette[7]))