Analysis of Population Structure in 24 Populations using genomic SSR data

Development of robust genomic simple sequence repeat markers for estimation of genetic diversity within and among bulb onion (Allium cepa L.) populations

See http://link.springer.com/article/10.1007%2Fs11032-012-9727-6

library(adegenet)
## Loading required package: ade4
##    ==========================
##     adegenet 1.4-2 is loaded
##    ==========================
## 
##  - to start, type '?adegenet'
##  - to browse adegenet website, type 'adegenetWeb()'
##  - to post questions/comments: adegenet-forum@lists.r-forge.r-project.org
GFHS<-read.genepop('GFHSApril2011no145.gen')
## 
##  Converting data from a Genepop .gen file to a genind object... 
## 
## 
## File description:  Title line:"Onion diversity2011" 
## 
## ...done.
summary(GFHS)
## 
##  # Total number of genotypes:  276 
## 
##  # Population sample sizes:  
##            P12          W202A            P34      Nasik Red        3000163 
##             12             12             12             12             12 
##        3000181        3000187        3000161        3000172        3000152 
##             12             12             12             12             12 
##        3000153        3000156        3000192        3000148        3000112 
##             12             12             12             12             12 
##        3000143  VioletdeGalmi Faridpuri self        3000154        3000149 
##             12             12             12             12             12 
##        3000159        3000164        3000110 
##             12             12             12 
## 
##  # Number of alleles per locus:  
## L01 L02 L03 L04 L05 L06 L07 L08 L09 L10 L11 L12 L13 L14 L15 L16 L17 L18 
##  20   8   5   8   4   6   4  17  12  13   4   4   6   6   7   5   6   4 
## L19 L20 
##  14   5 
## 
##  # Number of alleles per population:  
## 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 
## 54 33 48 43 59 65 64 47 46 70 57 49 49 45 56 45 52 41 54 53 60 58 47 
## 
##  # Percentage of missing data:  
## [1] 8.75
## 
##  # Observed heterozygosity:  
##     L01     L02     L03     L04     L05     L06     L07     L08     L09 
## 0.48018 0.28980 0.21484 0.19912 0.29545 0.32482 0.22015 0.40226 0.56855 
##     L10     L11     L12     L13     L14     L15     L16     L17     L18 
## 0.48855 0.19024 0.18889 0.19763 0.24336 0.35688 0.26872 0.02335 0.26296 
##     L19     L20 
## 0.31349 0.15074 
## 
##  # Expected heterozygosity:  
##    L01    L02    L03    L04    L05    L06    L07    L08    L09    L10 
## 0.8405 0.5757 0.3343 0.3432 0.5149 0.4760 0.3339 0.7210 0.7805 0.6966 
##    L11    L12    L13    L14    L15    L16    L17    L18    L19    L20 
## 0.2219 0.2313 0.2785 0.5161 0.5106 0.3700 0.6411 0.3759 0.8039 0.2614

Perform heterogeneity test

summaryGFHS<-summary(GFHS) 
## 
##  # Total number of genotypes:  276 
## 
##  # Population sample sizes:  
##            P12          W202A            P34      Nasik Red        3000163 
##             12             12             12             12             12 
##        3000181        3000187        3000161        3000172        3000152 
##             12             12             12             12             12 
##        3000153        3000156        3000192        3000148        3000112 
##             12             12             12             12             12 
##        3000143  VioletdeGalmi Faridpuri self        3000154        3000149 
##             12             12             12             12             12 
##        3000159        3000164        3000110 
##             12             12             12 
## 
##  # Number of alleles per locus:  
## L01 L02 L03 L04 L05 L06 L07 L08 L09 L10 L11 L12 L13 L14 L15 L16 L17 L18 
##  20   8   5   8   4   6   4  17  12  13   4   4   6   6   7   5   6   4 
## L19 L20 
##  14   5 
## 
##  # Number of alleles per population:  
## 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 
## 54 33 48 43 59 65 64 47 46 70 57 49 49 45 56 45 52 41 54 53 60 58 47 
## 
##  # Percentage of missing data:  
## [1] 8.75
## 
##  # Observed heterozygosity:  
##     L01     L02     L03     L04     L05     L06     L07     L08     L09 
## 0.48018 0.28980 0.21484 0.19912 0.29545 0.32482 0.22015 0.40226 0.56855 
##     L10     L11     L12     L13     L14     L15     L16     L17     L18 
## 0.48855 0.19024 0.18889 0.19763 0.24336 0.35688 0.26872 0.02335 0.26296 
##     L19     L20 
## 0.31349 0.15074 
## 
##  # Expected heterozygosity:  
##    L01    L02    L03    L04    L05    L06    L07    L08    L09    L10 
## 0.8405 0.5757 0.3343 0.3432 0.5149 0.4760 0.3339 0.7210 0.7805 0.6966 
##    L11    L12    L13    L14    L15    L16    L17    L18    L19    L20 
## 0.2219 0.2313 0.2785 0.5161 0.5106 0.3700 0.6411 0.3759 0.8039 0.2614
bartlett.test(list(summaryGFHS$Hexp,summaryGFHS$Hobs))
## 
##  Bartlett test of homogeneity of variances
## 
## data:  list(summaryGFHS$Hexp, summaryGFHS$Hobs)
## Bartlett's K-squared = 3.611, df = 1, p-value = 0.0574

Perform T -test of Obs Vs Expected Heterozygosity

t.test(summaryGFHS$Hexp,summaryGFHS$Hobs,pair=T,var.equal=T,alter="greater")
## 
##  Paired t-test
## 
## data:  summaryGFHS$Hexp and summaryGFHS$Hobs
## t = 6.2, df = 19, p-value = 2.947e-06
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.1495    Inf
## sample estimates:
## mean of the differences 
##                  0.2074

Get F statistics

fstat(GFHS)
## Loading required package: hierfstat
## Warning: package 'hierfstat' was built under R version 3.1.1
## Loading required package: gtools
## 
## Attaching package: 'hierfstat'
## 
## The following object is masked from 'package:adegenet':
## 
##     read.fstat
##          pop    Ind
## Total 0.2648 0.4299
## pop   0.0000 0.2245

High within population variation as expected


Run G-statistic Tests of Population Structuring

gtest <- gstat.randtest(GFHS,nsim=99)
gtest
## Monte-Carlo test
## Call: gstat.randtest(x = GFHS, nsim = 99)
## 
## Observation: 2490 
## 
## Based on 99 replicates
## Simulated p-value: 0.01 
## Alternative hypothesis: greater 
## 
##     Std.Obs Expectation    Variance 
##       8.712    1797.718    6307.578

Cluster With find.clusters

Supervised clustering showed ~ 80 PC asymptote and ~ 12 clusters

grp.BIC <- find.clusters.genind(GFHS,n.pca = 80, n.clust = 12,choose=FALSE)
dapc1 <- dapc.genind(GFHS, grp.BIC$grp,n.pca=80,n.da=4)
scatter(dapc1)

plot of chunk unnamed-chunk-6