Munge

Load stuff

## Warning: package 'cowplot' was built under R version 3.2.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.4
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, last
## Warning: package 'ggmap' was built under R version 3.2.3
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:cowplot':
## 
##     theme_nothing

Get phenotype, pedigree and genotype data

download.file("https://gist.githubusercontent.com/rossibarra/96ab9d5bc4c13fdcad3a4b189c6215c5/raw/dd69c989915ced1181479845c18da676e313e06e/ab10_phenos","ab10_phenos.txt")
download.file("https://ndownloader.figshare.com/files/5343103?private_link=b16f322f87fbfbcf114b","chr10_v2_gbs2.7_ab10.hmp.txt.gz")
download.file("https://gist.githubusercontent.com/rossibarra/e106872ae538feae6de4bd586b820317/raw/e1c36325395d726ad46a6407445e9ebe3d43a012/ab10_pedigrees","ab10_pedigree.txt")

GWAS

Using Tassel 5.2.16 to identify markers associated with the R1 locus (tightly linked to Ab10).

Impute

#Note to self: doesn't work beyond Mb148. Not sure why? Those SNPs filtered out?
system("~/src/tassel-5-standalone/run_pipeline.pl -Xmx6g  -h chr10_v2_gbs2.7_ab10.hmp.txt.gz -filterAlign -filterAlignMinCount 10 -filterAlignMinFreq 0.01 -filterAlignMaxFreq 0.99 -FSFHapImputationPlugin -pedigrees ab10_pedigree.txt  -minHap 2 -windowLD true -phet 0.15 -bc false -maxMissing 0.6 -minMaf 0.01 -logfile logfile.txt  -endPLugin -export impute_chr10")

Join imputed datasets and do PCA

system("~/src/tassel-5-standalone/run_pipeline.pl -Xmx6g -fork1 -h impute_chr103.hmp.txt -fork2 -h impute_chr104.hmp.txt -combine3 -input1 -input2 -mergeGenotypeTables -filterAlign -filterAlignStartPos 1 -filterAlignEndPos 138000000 -PrincipalComponentsPlugin -covariance true -ncomponents 3 -endPlugin -export PCall10 -runfork1 -runfork2")

Filter for end, impute, export to new joined file

system("~/src/tassel-5-standalone/run_pipeline.pl -Xmx6g -fork1 -h impute_chr103.hmp.txt -fork2 -h impute_chr104.hmp.txt -combine3 -input1 -input2 -mergeGenotypeTables -filterAlign -filterAlignStartPos 138000000 -filterAlignEndPos 151000000 -runfork1 -runfork2 -export combined_end10")

Join PCA with phenotype file and genotype to run simple GLM with PC’s as covariates

system("~/src/tassel-5-standalone/run_pipeline.pl -Xmx6g -fork1 -h combined_end10.hmp.txt -fork2 -importGuess ab10_phenos.txt -fork3 -importGuess PCall101.txt -combineA -input1 -input2 -input3 -intersect -FixedEffectLMPlugin -saveToFile true -siteFile sites.txt -alleleFile alleles.txt -endPlugin -runfork1 -runfork2 -runfork3") 

Investigate GLM

sites_glm<-read.table("sites.txt",header=T)
alleles_glm<-read.table("alleles.txt",header=T)
all_glm=merge(sites_glm,alleles_glm,by=c("Marker","Trait")) %>% 
  mutate(Pos=Pos.x/1E6,Chr=Chr.x,p=-1*log10(p+10^-100)) %>% 
  select(-c(Chr.x,Pos.x,Chr.y,Pos.y))

Plot -log10 p-value for each trait, size is \(R^2\), red line is R1 locus

ggplot(all_glm,aes(x=Pos,y=p,color=Trait,size=marker_Rsq))+
   ylab("-log10 p")+xlab("Mb")+geom_point(alpha=0.5)+
  scale_color_manual(values=cbPalette[2:3])+
  geom_vline(xintercept=138.4, linetype="dashed", color = "red", size=1)
## Warning: Removed 176 rows containing missing values (geom_point).

Find marker with best \(R^2\), find out information on these alleles.

For R. Tons of markers. S10_147009651 C is N10, A is Ab10

filter(all_glm,marker_Rsq>0.5,marker_Rsq != "Inf",Trait=="R") %>% 
  select(Marker,Pos,p,marker_Rsq,Obs,Allele) %>% arrange(desc(marker_Rsq)) 
##           Marker      Pos        p marker_Rsq Obs Allele
## 1  S10_140958484 140.9585 12.67330  0.5534540  20      C
## 2  S10_140958484 140.9585 12.67330  0.5534540  31      Y
## 3  S10_140958484 140.9585 12.67330  0.5534540   8      T
## 4  S10_144679393 144.6794 12.07764  0.5466994  18      G
## 5  S10_144679393 144.6794 12.07764  0.5466994  31      K
## 6  S10_144679393 144.6794 12.07764  0.5466994   8      T
## 7  S10_144679394 144.6794 12.07764  0.5466994  18      T
## 8  S10_144679394 144.6794 12.07764  0.5466994  31      Y
## 9  S10_144679394 144.6794 12.07764  0.5466994   8      C
## 10 S10_144679414 144.6794 12.07764  0.5466994  18      G
## 11 S10_144679414 144.6794 12.07764  0.5466994  31      S
## 12 S10_144679414 144.6794 12.07764  0.5466994   8      C
## 13 S10_139877027 139.8770 24.27655  0.5422498  14      G
## 14 S10_139877027 139.8770 24.27655  0.5422498  15      T
## 15 S10_139877027 139.8770 24.27655  0.5422498  53      K
## 16 S10_141962838 141.9628 12.29636  0.5392099   8      G
## 17 S10_141962838 141.9628 12.29636  0.5392099  15      C
## 18 S10_141962838 141.9628 12.29636  0.5392099  31      R
## 19 S10_141962838 141.9628 12.29636  0.5392099   8      A
## 20 S10_143063621 143.0636 13.22369  0.5391788  23      C
## 21 S10_143063621 143.0636 13.22369  0.5391788  31      S
## 22 S10_143063621 143.0636 13.22369  0.5391788   8      G
## 23 S10_143247688 143.2477 13.22369  0.5391788  23      G
## 24 S10_143247688 143.2477 13.22369  0.5391788  31      R
## 25 S10_143247688 143.2477 13.22369  0.5391788   8      A
## 26 S10_143716234 143.7162 13.22369  0.5391788  23      A
## 27 S10_143716234 143.7162 13.22369  0.5391788  31      R
## 28 S10_143716234 143.7162 13.22369  0.5391788   8      G
## 29 S10_144312096 144.3121 13.22369  0.5391788  23      A
## 30 S10_144312096 144.3121 13.22369  0.5391788  31      R
## 31 S10_144312096 144.3121 13.22369  0.5391788   8      G
## 32 S10_144462515 144.4625 13.22369  0.5391788  23      C
## 33 S10_144462515 144.4625 13.22369  0.5391788  31      Y
## 34 S10_144462515 144.4625 13.22369  0.5391788   8      T
## 35 S10_145611012 145.6110 13.22369  0.5391788  23      G
## 36 S10_145611012 145.6110 13.22369  0.5391788  31      K
## 37 S10_145611012 145.6110 13.22369  0.5391788   8      T
## 38 S10_146569798 146.5698 13.22369  0.5391788  23      T
## 39 S10_146569798 146.5698 13.22369  0.5391788  31      K
## 40 S10_146569798 146.5698 13.22369  0.5391788   8      G
## 41 S10_146569799 146.5698 13.22369  0.5391788  23      C
## 42 S10_146569799 146.5698 13.22369  0.5391788  31      Y
## 43 S10_146569799 146.5698 13.22369  0.5391788   8      T
## 44 S10_147049501 147.0495 13.22369  0.5391788  23      C
## 45 S10_147049501 147.0495 13.22369  0.5391788  31      Y
## 46 S10_147049501 147.0495 13.22369  0.5391788   8      T
## 47 S10_147123126 147.1231 13.22369  0.5391788  23      A
## 48 S10_147123126 147.1231 13.22369  0.5391788  31      R
## 49 S10_147123126 147.1231 13.22369  0.5391788   8      G
## 50 S10_148410465 148.4105 13.22369  0.5391788  23      G
## 51 S10_148410465 148.4105 13.22369  0.5391788  31      R
## 52 S10_148410465 148.4105 13.22369  0.5391788   8      A
## 53 S10_148410466 148.4105 13.22369  0.5391788  23      C
## 54 S10_148410466 148.4105 13.22369  0.5391788  31      S
## 55 S10_148410466 148.4105 13.22369  0.5391788   8      G
## 56 S10_148410468 148.4105 13.22369  0.5391788  23      G
## 57 S10_148410468 148.4105 13.22369  0.5391788  31      K
## 58 S10_148410468 148.4105 13.22369  0.5391788   8      T
## 59 S10_148577979 148.5780 13.22369  0.5391788  23      C
## 60 S10_148577979 148.5780 13.22369  0.5391788  31      Y
## 61 S10_148577979 148.5780 13.22369  0.5391788   8      T
## 62 S10_148817784 148.8178 13.22369  0.5391788  23      T
## 63 S10_148817784 148.8178 13.22369  0.5391788  31      Y
## 64 S10_148817784 148.8178 13.22369  0.5391788   8      C
## 65 S10_149029683 149.0297 13.22369  0.5391788  23      G
## 66 S10_149029683 149.0297 13.22369  0.5391788  31      0
## 67 S10_149029683 149.0297 13.22369  0.5391788   8      -
## 68 S10_141500118 141.5001 10.87162  0.5297516  14      G
## 69 S10_141500118 141.5001 10.87162  0.5297516  31      S
## 70 S10_141500118 141.5001 10.87162  0.5297516   8      C
## 71 S10_142367974 142.3680 10.20304  0.5146448   8      A
## 72 S10_142367974 142.3680 10.20304  0.5146448  31      R
## 73 S10_142367974 142.3680 10.20304  0.5146448  15      G

For Ab10-I vs Ab10-II. In spite of below S10_141959097 is best. Ab10-2 has a G, all other (N10 and Ab10-I has an A)

filter(all_glm,marker_Rsq>0.4,marker_Rsq != "Inf",Trait=="type") %>% 
  select(Marker,Pos,p,marker_Rsq,Obs,Allele) %>% arrange(desc(marker_Rsq))
##           Marker      Pos         p marker_Rsq Obs Allele
## 1  S10_147572004 147.5720  2.378086  0.4620195  12      G
## 2  S10_147572004 147.5720  2.378086  0.4620195   1      -
## 3  S10_142365006 142.3650  9.456322  0.4225598  12      C
## 4  S10_142365006 142.3650  9.456322  0.4225598  15      A
## 5  S10_142365006 142.3650  9.456322  0.4225598  22      M
## 6  S10_142365008 142.3650  9.456322  0.4225598  12      A
## 7  S10_142365008 142.3650  9.456322  0.4225598  15      G
## 8  S10_142365008 142.3650  9.456322  0.4225598  22      R
## 9  S10_142365016 142.3650  9.456322  0.4225598  12      T
## 10 S10_142365016 142.3650  9.456322  0.4225598  15      A
## 11 S10_142365016 142.3650  9.456322  0.4225598  22      W
## 12 S10_148432972 148.4330  9.264924  0.4158316  10      T
## 13 S10_148432972 148.4330  9.264924  0.4158316  16      A
## 14 S10_148432972 148.4330  9.264924  0.4158316  22      W
## 15 S10_148432979 148.4330  9.264924  0.4158316  10      A
## 16 S10_148432979 148.4330  9.264924  0.4158316  16      T
## 17 S10_148432979 148.4330  9.264924  0.4158316  22      W
## 18 S10_138484548 138.4845  9.962950  0.4130784  15      G
## 19 S10_138484548 138.4845  9.962950  0.4130784  15      A
## 20 S10_138484548 138.4845  9.962950  0.4130784  22      R
## 21 S10_141959360 141.9594  8.271540  0.4111276   8      T
## 22 S10_141959360 141.9594  8.271540  0.4111276  31      0
## 23 S10_141959360 141.9594  8.271540  0.4111276   8      +
## 24 S10_141959361 141.9594  8.271540  0.4111276   8      G
## 25 S10_141959361 141.9594  8.271540  0.4111276  31      0
## 26 S10_141959361 141.9594  8.271540  0.4111276   8      +
## 27 S10_141959367 141.9594  8.271540  0.4111276   8      C
## 28 S10_141959367 141.9594  8.271540  0.4111276  31      0
## 29 S10_141959367 141.9594  8.271540  0.4111276   8      +
## 30 S10_141959369 141.9594  8.271540  0.4111276   8      A
## 31 S10_141959369 141.9594  8.271540  0.4111276  31      0
## 32 S10_141959369 141.9594  8.271540  0.4111276   8      +
## 33 S10_142331721 142.3317  8.271540  0.4111276   8      T
## 34 S10_142331721 142.3317  8.271540  0.4111276  31      Y
## 35 S10_142331721 142.3317  8.271540  0.4111276   8      C
## 36 S10_145284375 145.2844  8.271540  0.4111276   8      A
## 37 S10_145284375 145.2844  8.271540  0.4111276  31      M
## 38 S10_145284375 145.2844  8.271540  0.4111276   8      C
## 39 S10_145284381 145.2844  8.271540  0.4111276   8      C
## 40 S10_145284381 145.2844  8.271540  0.4111276  31      0
## 41 S10_145284381 145.2844  8.271540  0.4111276   8      -
## 42 S10_141835607 141.8356 10.533010  0.4052642  18      G
## 43 S10_141835607 141.8356 10.533010  0.4052642  15      T
## 44 S10_141835607 141.8356 10.533010  0.4052642  22      K
## 45 S10_141835646 141.8356 10.533010  0.4052642  18      G
## 46 S10_141835646 141.8356 10.533010  0.4052642  15      A
## 47 S10_141835646 141.8356 10.533010  0.4052642  22      R
## 48 S10_145279161 145.2792  9.887381  0.4038456  13      G
## 49 S10_145279161 145.2792  9.887381  0.4038456  15      A
## 50 S10_145279161 145.2792  9.887381  0.4038456  22      R
## 51 S10_141478608 141.4786 10.660575  0.4033787  10      A
## 52 S10_141478608 141.4786 10.660575  0.4033787  23      C
## 53 S10_141478608 141.4786 10.660575  0.4033787  22      M
## 54 S10_141478637 141.4786 10.660575  0.4033787  10      T
## 55 S10_141478637 141.4786 10.660575  0.4033787  23      C
## 56 S10_141478637 141.4786 10.660575  0.4033787  22      Y
## 57 S10_143716868 143.7169 10.660575  0.4033787  10      T
## 58 S10_143716868 143.7169 10.660575  0.4033787  23      C
## 59 S10_143716868 143.7169 10.660575  0.4033787  22      Y
## 60 S10_143716869 143.7169 10.660575  0.4033787  10      C
## 61 S10_143716869 143.7169 10.660575  0.4033787  23      -
## 62 S10_143716869 143.7169 10.660575  0.4033787  22      0
## 63 S10_143716917 143.7169 10.660575  0.4033787  10      C
## 64 S10_143716917 143.7169 10.660575  0.4033787  23      T
## 65 S10_143716917 143.7169 10.660575  0.4033787  22      Y
## 66 S10_144739272 144.7393 11.284108  0.4003194  21      G
## 67 S10_144739272 144.7393 11.284108  0.4003194  15      C
## 68 S10_144739272 144.7393 11.284108  0.4003194  22      S
## 69 S10_144739284 144.7393 11.284108  0.4003194  21      T
## 70 S10_144739284 144.7393 11.284108  0.4003194  15      C
## 71 S10_144739284 144.7393 11.284108  0.4003194  22      Y
## 72 S10_144739296 144.7393 11.284108  0.4003194  21      T
## 73 S10_144739296 144.7393 11.284108  0.4003194  15      A
## 74 S10_144739296 144.7393 11.284108  0.4003194  22      W

Inversion PCA

Join imputed datasets and do PCA

system("~/src/tassel-5-standalone/run_pipeline.pl -Xmx6g -fork1 -h combined_end10.hmp.txt -filterAlign -filterAlignStartPos 148000000 -filterAlignEndPos 149000000 -PrincipalComponentsPlugin -covariance true -ncomponents 3 -endPlugin  -export PCinvert -runfork1 ")

Plot. Shows 3 clusters suggesting inversion, and PC2 presumable separates Ab10-1 and Ab10-2. Maybe real difference, but may be due to fact they were imputed separately. Value of 3 here is N10

endinversion<-read.table("PCinvert1.txt",skip=2,header=T) %>% mutate(ab10type=ifelse(grepl("r",Taxa),3,ifelse(grepl("AB10II",Taxa),2,ifelse(grepl("AB10I",Taxa),1,ifelse(grepl("W23",Taxa),1,ifelse(grepl("AB20I",Taxa),1,99)))))) %>% mutate(ab10type=factor(ab10type))
ggplot(endinversion,aes(y=PC2,x=PC1,color=factor(ab10type)))+geom_point(,alpha=0.5,size=3) 

Check in Palmar Chico

Read in ch10 palmar chico

system("~/src/tassel-5-standalone/run_pipeline.pl -Xmx6g -h teo_chrom10.hmp.txt.gz -filterAlign -filterAlignStartPos 144000000 -filterAlignEndPos 151000000 -export palmarchico_end10 -exportType Table" )

Get data on mom/dad for each offspring:

download.file("https://gist.githubusercontent.com/rossibarra/494112881b623c80888606321dbe6364/raw/3e9045295f1304ad3ca4b64d5f354e939fa4c92b/mom_dad_ab10_palmar_chico","momdad.txt")
momdad<-read.table("momdad.txt",header=T)

Going with S10_144679393 since that’s the top marker. Get dad, modify taxa names, join with parentage data.

pchico<-data.frame(fread("palmarchico_end10.txt"))
colnames(pchico)=sapply(pchico[1,],function(x) paste("x",x,sep=""))
pchico=pchico[-1,]
taxa=as.vector(sapply(pchico$xTaxa,function(x) strsplit(x,":")[[1]][1]))
pchico_new=cbind(taxa,pchico) %>% 
  select(taxa,x144679393)

pchico_new$taxa=str_replace(pchico_new$taxa,"_[123]","") %>%
  str_replace("_mrg","")

palmarchico<-merge(pchico_new,momdad,by.x="taxa",by.y="Plot") %>%
  mutate(Ab10=ifelse(x144679393=="K",1,ifelse(x144679393=="T",2,0))) 

Calculate rates of transmission

Maternal in self progeny

moms=filter(palmarchico,x144679393!="N", Cross=="self") %>%
  group_by(GBSMom) %>% 
  summarize(ab10_alleles=sum(Ab10),kids=n()) %>% 
  mutate(frac=ab10_alleles/(2*kids)) %>% 
  filter(frac>0) %>%
  arrange(desc(frac)) %>%
  data.frame() 
moms
##        GBSMom ab10_alleles kids       frac
## 1  PC_J10_ID1            2    1 1.00000000
## 2  PC_J12_ID1           12    6 1.00000000
## 3  PC_J50_ID1           20   10 1.00000000
## 4  PC_L12_ID2           30   15 1.00000000
## 5  PC_N11_ID1            2    1 1.00000000
## 6  PC_N13_ID1           14    7 1.00000000
## 7  PC_N14_ID2            6    3 1.00000000
## 8  PC_I50_ID2           22   12 0.91666667
## 9  PC_I11_ID2           31   19 0.81578947
## 10 PC_M15_ID1           20   14 0.71428571
## 11 PC_N48_ID1           21   15 0.70000000
## 12 PC_O59_ID2           25   21 0.59523810
## 13 PC_N09_ID1            8    7 0.57142857
## 14 PC_L56_ID1           27   25 0.54000000
## 15 PC_I05_ID1            6    6 0.50000000
## 16 PC_I58_ID1            6    6 0.50000000
## 17 PC_I06_ID1            1    2 0.25000000
## 18 PC_L48_ID2            2   20 0.05000000
## 19 PC_N57_ID2            2   27 0.03703704

What’s the mean (weighted by kids) percent transmission of Ab10?

moms=filter(moms,kids>5) %>% mutate(wfrac=frac*kids) 
sum(moms$wfrac)/sum(moms$kids)
## [1] 0.5857143

Paternal in outcross progeny

dads=filter(palmarchico,x144679393!="N", Cross=="outcross") %>% 
  group_by(GBSDad) %>%
  summarize(ab10_alleles=sum(Ab10),kids=n()) %>% 
  mutate(frac=ab10_alleles/(2*kids)) %>% 
  filter(frac>0) %>%
  arrange(desc(frac)) %>% 
  data.frame() 
dads
##        GBSDad ab10_alleles kids       frac
## 1  PC_N11_ID1            2    1 1.00000000
## 2  PC_J12_ID1            8    5 0.80000000
## 3  PC_J50_ID1           17   11 0.77272727
## 4  PC_I05_ID1            3    2 0.75000000
## 5  PC_N13_ID1           43   31 0.69354839
## 6  PC_M58_ID1            4    3 0.66666667
## 7  PC_L56_ID1           42   33 0.63636364
## 8  PC_N09_ID1           24   22 0.54545455
## 9  PC_I11_ID2           14   13 0.53846154
## 10 PC_K55_ID1            3    3 0.50000000
## 11 PC_N10_ID1            1    1 0.50000000
## 12 PC_L12_ID2           13   15 0.43333333
## 13 PC_J14_ID1            6    7 0.42857143
## 14 PC_M59_ID1           10   12 0.41666667
## 15 PC_J08_ID1           15   19 0.39473684
## 16 PC_I58_ID1           11   14 0.39285714
## 17 PC_N07_ID1           30   39 0.38461538
## 18 PC_M15_ID1            6    8 0.37500000
## 19 PC_I50_ID2           20   27 0.37037037
## 20 PC_L08_ID1           21   31 0.33870968
## 21 PC_I11_ID1            2    3 0.33333333
## 22 PC_I52_ID1            4    6 0.33333333
## 23 PC_O08_ID1            2    3 0.33333333
## 24 PC_I06_ID1           26   40 0.32500000
## 25 PC_K55_ID2           15   25 0.30000000
## 26 PC_N48_ID1            3    5 0.30000000
## 27 PC_L48_ID1            4    7 0.28571429
## 28 PC_O59_ID2            8   17 0.23529412
## 29 PC_I08_ID1            5   11 0.22727273
## 30 PC_J14_ID2            4   11 0.18181818
## 31 PC_J07_ID1            2    6 0.16666667
## 32 PC_K60_ID1            6   18 0.16666667
## 33 PC_J04_ID1            7   25 0.14000000
## 34 PC_L14_ID1            1    4 0.12500000
## 35 PC_N56_ID1            1    6 0.08333333
## 36 PC_N04_ID1            5   31 0.08064516
## 37 PC_M05_ID1            7   46 0.07608696
## 38 PC_L12_ID1            5   35 0.07142857
## 39 PC_N58_ID2            5   35 0.07142857
## 40 PC_N07_ID2            2   19 0.05263158
## 41 PC_N57_ID2            2   19 0.05263158
## 42 PC_K02_ID1            1   10 0.05000000
## 43 PC_N58_ID1            1   14 0.03571429
## 44 PC_L06_ID2            1   22 0.02272727
## 45 PC_J48_ID2            1   23 0.02173913

What’s the mean (weighted by kids) percent transmission of Ab10?

dads=mutate(dads,wfrac=frac*kids) 
sum(dads$wfrac)/sum(dads$kids)
## [1] 0.2798103

Haven’t checked for AB10-1 vs 2. Top SNPs

Top markers for Ab10-1 vs 2.

filter(all_glm,marker_Rsq>0.45,marker_Rsq != "Inf",Trait=="type") %>% select(Marker) %>% unlist() %>% str_replace("S10_","") 
## [1] "147572004" "147572004"

Check in SeeDs

Get genotype data. Warning: this part not reproducible as data cannot be shared. Data is free to download at Seeds of Discovery website. Output as table to read in and check for appropriate SNPs

system("~/src/tassel-5-standalone/run_pipeline.pl -h ~/Projects/inversion_seeds/data/AllZeaGBSv2.7_SEED_Beagle4_chr10.hmp.txt  -filterAlign -filterAlignStartPos 144000000 -filterAlignEndPos 151000000 -export seeds -exportType Table")

Find taxa with Ab10 SNPs. Going with S10_144679393 since that’s the best marker.

seed_genos<-data.frame(fread("seeds.txt"))
colnames(seed_genos)=sapply(seed_genos[1,],function(x) paste("x",x,sep=""))
seed_genos=seed_genos[-1,]
seeds_taxa=as.vector(sapply(seed_genos$xTaxa,function(x) strsplit(x,":")[[1]][1]))
seed_genos$xTaxa=seeds_taxa
#set Ab10 genotypes for S10_144679393
abgeno<-c("T","K")
ab10_seeds<-select(seed_genos,xTaxa,x144679393) %>% mutate(ab10=ifelse(x144679393 == "K","HET",ifelse(x144679393=="T","AB10","N10")))
head(ab10_seeds)
##          xTaxa x144679393 ab10
## 1    SEEDGWAS1          G  N10
## 2   SEEDGWAS10          G  N10
## 3 SEEDGWAS1000          G  N10
## 4 SEEDGWAS1001          G  N10
## 5 SEEDGWAS1003          G  N10
## 6 SEEDGWAS1004          G  N10

Get SeeDs passport data. First get ID conversion.

download.file("https://gist.githubusercontent.com/rossibarra/74cb879eb8691a8302eb20cce7ef01b9/raw/37e97f5f58a40fab6863182790781a0525af4a08/gistfile1.txt","idlist.txt")
idlist<-read.table("idlist.txt",header=T)
head(idlist)
##      Sample_ID   GID
## 1    SEEDGWAS1   273
## 2   SEEDGWAS10  5509
## 3 SEEDGWAS1000 27004
## 4 SEEDGWAS1001 27012
## 5 SEEDGWAS1003 27086
## 6 SEEDGWAS1004 27110

Then main passport data. Merge assuming “general identifier” == “GID”. Not sure what “id” field is?

download.file("https://gist.githubusercontent.com/rossibarra/ae50aadb26418c22a6d7b17cfdd7aee6/raw/eca28582af70ee145f566df0b81d775fa77bad0a/gistfile1.txt","passport.txt")
pass<-read.table("passport.txt",header=T,sep="\t")
seeds_acc<-data.frame(merge(pass,idlist,by.x="general_identifier",by.y="GID"))
seeds_acc<-merge(seeds_acc,ab10_seeds,by.x="Sample_ID",by.y="xTaxa",all.x=TRUE)
head(seeds_acc)
##      Sample_ID general_identifier   id  name          bank_number
## 1   SEEDGWAS10               5509  195  5509 CIMMYTMA-BANK-000459
## 2 SEEDGWAS1000              27004 3491 27004 CIMMYTMA-BANK-017506
## 3 SEEDGWAS1001              27012 3499 27012 CIMMYTMA-BANK-017569
## 4 SEEDGWAS1003              27086 3542 27086 CIMMYTMA-BANK-017296
## 5 SEEDGWAS1004              27110 3553 27110 CIMMYTMA-BANK-018072
## 6 SEEDGWAS1006              27507 3652 27507 CIMMYTMA-BANK-021646
##    collnumb colldate locations_latitude locations_longitude x144679393
## 1   VERA133   1/1/52           19.93863           -96.87067          G
## 2   SNLP162                    21.83299           -99.63423          G
## 3   SNLP163                    22.02337           -99.65377          G
## 4    YUCA34   1/1/48           20.48543           -89.73254          G
## 5   VERA240   1/1/43           20.96905           -97.38226          G
## 6 PAZM01044  2/24/98          -23.26798           -57.30910          G
##   ab10
## 1  N10
## 2  N10
## 3  N10
## 4  N10
## 5  N10
## 6  N10

Map

Get map

locale <- c(min(seeds_acc$locations_longitude)-2, min(seeds_acc$locations_latitude)-2, max(seeds_acc$locations_longitude)+2, max(seeds_acc$locations_latitude)+2)
myMap <- get_map(location=locale,source="stamen", maptype="watercolor",zoom=4) 
## Map from URL : http://tile.stamen.com/watercolor/4/2/6.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/3/6.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/4/6.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/5/6.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/6/6.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/2/7.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/3/7.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/4/7.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/5/7.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/6/7.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/2/8.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/3/8.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/4/8.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/5/8.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/6/8.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/2/9.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/3/9.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/4/9.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/5/9.jpg
## Map from URL : http://tile.stamen.com/watercolor/4/6/9.jpg
#myMap <- get_map(location=locale,source="google", maptype="satellite",zoom=3) 

Plot

ggmap(myMap) + 
  geom_point(aes(x = locations_longitude, y = locations_latitude,color=ab10),alpha=1,size=0.5,data = filter(seeds_acc,ab10=="N10")) + 
  geom_point(aes(x = locations_longitude, y = locations_latitude,color=ab10),alpha=1,size=1,data = filter(seeds_acc,ab10 %in% c("AB10","HET"))) +
  scale_color_manual(values=c("N10" = cbPalette[9], "HET" = cbPalette[2], "AB10" = cbPalette[7]))