load libraries

library(ade4)
library(adegenet)
## 
##    /// adegenet 2.1.10 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()
library(here)
## here() starts at /gpfs/gibbs/pi/caccone/mkc54/albo
library(dartR) #to convert genind object
## Loading required package: ggplot2
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: dartR.data
## Registered S3 method overwritten by 'pegas':
##   method      from
##   print.amova ade4
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## Registered S3 method overwritten by 'genetics':
##   method      from 
##   [.haplotype pegas
## **** Welcome to dartR [Version 2.9.7 ] ****
## Be aware that owing to CRAN requirements and compatibility reasons not all functions of the package may run after the basic installation, as some packages could still be missing. Hence for a most enjoyable experience we recommend to run the function
## gl.install.vanilla.dartR()
## This installs all missing and required packages for your version of dartR. In case something fails during installation please refer to this tutorial: https://github.com/green-striped-gecko/dartR/wiki/Installation-tutorial.
## 
## For information how to cite dartR, please use:
## citation('dartR')
## Global verbosity is set to: 2
## 
## **** Have fun using dartR! ****
library(Cairo)

European dataset

module load PLINK/1.9b_6.21-x86_64

1. Create data file for Set 2 (r2<0.1) for Europe

cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.1 \
--keep-fam output/fst/pops_4fst.txt \
--make-bed \
--out output/dapc/dapc \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc.log

47484 variants loaded from .bim file. –keep-fam: 407 people remaining. Total genotyping rate in remaining samples is 0.97243. 47484 variants and 407 people pass filters and QC.

Convert to raw format

plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/dapc \
--recodeA \
--out output/dapc/dapc \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc.log

47484 variants loaded from .bim file. 47484 variants and 407 people pass filters and QC.

Import the data and covert it to genind format

# import the data
albo <-
  read.PLINK(
    here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/dapc/dapc.raw"),
    quiet = FALSE,
    chunkSize = 1000,
    parallel = require("parallel"),
    n.cores = 4
  )
## 
##  Reading PLINK raw format into a genlight object...
## Loading required package: parallel
## 
##  Reading loci information... 
## 
##  Reading and converting genotypes... 
## .
##  Building final object... 
## 
## ...done.
# convert to genind
albo1 <- gl2gi(albo, probar = TRUE, verbose = NULL)
## Starting gl2gi 
##   Processing genlight object with SNP data
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |=========                                                             |  14%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=====================================                                 |  54%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |==========================================================            |  84%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi

Save the genind object

saveRDS(albo1, here(
  "scripts", "RMarkdowns", "output", "europe", "dapc", "albo1.rds"
))

Load the genind object

albo1 <- readRDS(here(
  "scripts", "RMarkdowns", "output", "europe", "dapc", "albo1.rds"
))

albo1
## /// GENIND OBJECT /////////
## 
##  // 407 individuals; 47,484 loci; 94,968 alleles; size: 174.1 Mb
## 
##  // Basic content
##    @tab:  407 x 94968 matrix of allele counts
##    @loc.n.all: number of alleles per locus (range: 2-2)
##    @loc.fac: locus factor for the 94968 columns of @tab
##    @all.names: list of allele names for each locus
##    @ploidy: ploidy of each individual  (range: 2-2)
##    @type:  codom
##    @call: df2genind(X = xx[, ], sep = "/", ncode = 1, ind.names = x@ind.names, 
##     pop = x@pop, NA.char = "-", ploidy = 2)
## 
##  // Optional content
##    @pop: population of each individual (group size range: 4-16)
##    @other: a list containing: sex  phenotype  pat  mat

2. DAPC for Europe

Find clusters

grp <- find.clusters(albo1, max.n.clust=40)
#retained 400 PCs
#Choose the number of clusters (>=2): 3

Save it

saveRDS(
  grp, here(
    "scripts/RMarkdowns/output/europe/dapc/grp.rds"
  )
)

To load it

grp <- readRDS(
  here(
    "scripts/RMarkdowns/output/europe/dapc/grp.rds"
  )
)
names(grp)
## [1] "Kstat" "stat"  "grp"   "size"
grp$size #group sizes for our 3 clusters
## [1] 235  66 106
table(pop(albo1), grp$grp)
##      
##        1  2  3
##   ALD 10  0  0
##   ALU  0  0 12
##   ALV 12  0  0
##   ARM  0  0 10
##   BAR 12  0  0
##   BRE  0 13  0
##   BUL 10  0  0
##   CES  0 14  0
##   CRO 12  0  0
##   DES  0 16  0
##   FRS 12  0  0
##   GES  0  0 12
##   GRA 11  0  0
##   GRC 10  0  0
##   IMP  4  0  0
##   ITB  3  2  0
##   ITP  8  0  0
##   ITR 12  0  0
##   KER  0  0 12
##   KRA  0  0 12
##   MAL 12  0  0
##   POP 10  2  0
##   RAR  0  0 12
##   ROM  4  0  0
##   ROS 11  0  0
##   SER  4  0  0
##   SEV  0  0 12
##   SIC  9  0  0
##   SLO 12  0  0
##   SOC  0  0 12
##   SPB  0  8  0
##   SPC  0  6  0
##   SPM  2  3  0
##   SPS  8  0  0
##   STS 12  0  0
##   TIK  0  0 12
##   TIR  4  0  0
##   TRE 10  2  0
##   TUA  9  0  0
##   TUH 12  0  0
table.value(table(pop(albo1), grp$grp), col.lab=paste("inf", 1:3), #inferred groups
row.lab=paste("ori", 1:40)) #original groups

dapc1 <- dapc(albo1, grp$grp)

Save it

saveRDS(
  dapc1, here(
    "scripts/RMarkdowns/output/europe/dapc/dapc1.rds"
  )
)

To load it

dapc1 <- readRDS(
  here(
    "scripts/RMarkdowns/output/europe/dapc/dapc1.rds"
  )
)
dapc1
##  #################################################
##  # Discriminant Analysis of Principal Components #
##  #################################################
## class: dapc
## $call: dapc.genind(x = albo1, pop = grp$grp)
## 
## $n.pca: 400 first PCs of PCA used
## $n.da: 2 discriminant functions saved
## $var (proportion of conserved variance): 0.998
## 
## $eig (eigenvalues): 74050000 2101000  vector    length content                   
## 1 $eig      2      eigenvalues               
## 2 $grp      407    prior group assignment    
## 3 $prior    3      prior group probabilities 
## 4 $assign   407    posterior group assignment
## 5 $pca.cent 94968  centring vector of PCA    
## 6 $pca.norm 94968  scaling vector of PCA     
## 7 $pca.eig  406    eigenvalues of PCA        
## 
##   data.frame    nrow  ncol content                                          
## 1 $tab          407   400  retained PCs of PCA                              
## 2 $means        3     400  group means                                      
## 3 $loadings     400   2    loadings of variables                            
## 4 $ind.coord    407   2    coordinates of individuals (principal components)
## 5 $grp.coord    3     2    coordinates of groups                            
## 6 $posterior    407   3    posterior membership probabilities               
## 7 $pca.loadings 94968 400  PCA loadings of original variables               
## 8 $var.contr    94968 2    contribution of original variables
scatter(dapc1)

Calculating the optimum PC number to rerun DAPC

optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=10) #calculating optimal number of PCs 

## $pop.score
## $pop.score$`1`
##         1         2         3 
## 0.0000000 0.6212121 0.9528302 
## 
## $pop.score$`50`
##         1         2         3 
## 0.1034043 0.7545455 0.7226415 
## 
## $pop.score$`100`
##         1         2         3 
## 0.1565957 0.5227273 0.4962264 
## 
## $pop.score$`150`
##         1         2         3 
## 0.1429787 0.3712121 0.3962264 
## 
## $pop.score$`200`
##          1          2          3 
## 0.09957447 0.23333333 0.23867925 
## 
## $pop.score$`250`
##          1          2          3 
## 0.06468085 0.17878788 0.15754717 
## 
## $pop.score$`300`
##          1          2          3 
## 0.03957447 0.09393939 0.09245283 
## 
## $pop.score$`350`
##          1          2          3 
## 0.01446809 0.05757576 0.04056604 
## 
## $pop.score$`400`
##           1           2           3 
## 0.002553191 0.003030303 0.004716981 
## 
## 
## $mean
##           1          50         100         150         200         250 
## 0.524680770 0.526863740 0.391849811 0.303472420 0.190529016 0.133671967 
##         300         350         400 
## 0.075322231 0.037536627 0.003433492 
## 
## $pred
## $pred$x
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## [307] 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## [325] 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## [343] 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## [361] 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## [379] 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## [397] 397 398 399 400
## 
## $pred$y
##   [1] 0.540436866 0.539771052 0.539104713 0.538437271 0.537768147 0.537096766
##   [7] 0.536422548 0.535744917 0.535063295 0.534377105 0.533685768 0.532988708
##  [13] 0.532285347 0.531575106 0.530857410 0.530131679 0.529397337 0.528653806
##  [19] 0.527900509 0.527136867 0.526362304 0.525576241 0.524778102 0.523967308
##  [25] 0.523143283 0.522305448 0.521453226 0.520586039 0.519703310 0.518804462
##  [31] 0.517888916 0.516956096 0.516005423 0.515036320 0.514048210 0.513040515
##  [37] 0.512012658 0.510964060 0.509894144 0.508802334 0.507688050 0.506550716
##  [43] 0.505389755 0.504204587 0.502994637 0.501759326 0.500498078 0.499210313
##  [49] 0.497895455 0.496552927 0.495182335 0.493784028 0.492358539 0.490906401
##  [55] 0.489428147 0.487924311 0.486395425 0.484842022 0.483264637 0.481663801
##  [61] 0.480040048 0.478393912 0.476725924 0.475036620 0.473326530 0.471596190
##  [67] 0.469846131 0.468076888 0.466288992 0.464482978 0.462659378 0.460818726
##  [73] 0.458961554 0.457088396 0.455199785 0.453296255 0.451378337 0.449446566
##  [79] 0.447501474 0.445543595 0.443573462 0.441591607 0.439598565 0.437594868
##  [85] 0.435581049 0.433557642 0.431525179 0.429484194 0.427435220 0.425378790
##  [91] 0.423315437 0.421245694 0.419170095 0.417089172 0.415003459 0.412913489
##  [97] 0.410819795 0.408722910 0.406623367 0.404521699 0.402418363 0.400313503
## [103] 0.398207187 0.396099484 0.393990461 0.391880186 0.389768727 0.387656152
## [109] 0.385542530 0.383427927 0.381312412 0.379196053 0.377078917 0.374961073
## [115] 0.372842589 0.370723532 0.368603971 0.366483973 0.364363606 0.362242939
## [121] 0.360122039 0.358000973 0.355879811 0.353758620 0.351637468 0.349516423
## [127] 0.347395552 0.345274924 0.343154607 0.341034668 0.338915176 0.336796198
## [133] 0.334677802 0.332560057 0.330443030 0.328326790 0.326211403 0.324096938
## [139] 0.321983463 0.319871047 0.317759756 0.315649658 0.313540823 0.311433317
## [145] 0.309327208 0.307222565 0.305119456 0.303017948 0.300918109 0.298820007
## [151] 0.296723739 0.294629516 0.292537576 0.290448159 0.288361505 0.286277853
## [157] 0.284197442 0.282120511 0.280047300 0.277978048 0.275912995 0.273852379
## [163] 0.271796441 0.269745419 0.267699552 0.265659081 0.263624244 0.261595281
## [169] 0.259572431 0.257555933 0.255546027 0.253542952 0.251546948 0.249558253
## [175] 0.247577107 0.245603750 0.243638420 0.241681358 0.239732802 0.237792991
## [181] 0.235862166 0.233940564 0.232028427 0.230125992 0.228233500 0.226351190
## [187] 0.224479300 0.222618071 0.220767742 0.218928551 0.217100739 0.215284544
## [193] 0.213480207 0.211687965 0.209908060 0.208140729 0.206386213 0.204644750
## [199] 0.202916580 0.201201942 0.199501011 0.197813700 0.196139857 0.194479331
## [205] 0.192831971 0.191197623 0.189576138 0.187967362 0.186371146 0.184787336
## [211] 0.183215782 0.181656332 0.180108834 0.178573136 0.177049087 0.175536536
## [217] 0.174035330 0.172545319 0.171066349 0.169598271 0.168140932 0.166694180
## [223] 0.165257865 0.163831833 0.162415935 0.161010018 0.159613930 0.158227520
## [229] 0.156850636 0.155483127 0.154124841 0.152775626 0.151435331 0.150103804
## [235] 0.148780894 0.147466448 0.146160316 0.144862346 0.143572385 0.142290284
## [241] 0.141015888 0.139749049 0.138489612 0.137237428 0.135992344 0.134754208
## [247] 0.133522870 0.132298177 0.131079978 0.129868122 0.128662479 0.127463015
## [253] 0.126269717 0.125082575 0.123901575 0.122726706 0.121557956 0.120395313
## [259] 0.119238765 0.118088300 0.116943906 0.115805572 0.114673284 0.113547032
## [265] 0.112426803 0.111312586 0.110204368 0.109102137 0.108005882 0.106915591
## [271] 0.105831251 0.104752851 0.103680379 0.102613822 0.101553169 0.100498408
## [277] 0.099449528 0.098406515 0.097369358 0.096338045 0.095312565 0.094292904
## [283] 0.093279052 0.092270996 0.091268725 0.090272226 0.089281487 0.088296497
## [289] 0.087317243 0.086343714 0.085375898 0.084413782 0.083457355 0.082506605
## [295] 0.081561520 0.080622088 0.079688296 0.078760134 0.077837589 0.076920649
## [301] 0.076009292 0.075103458 0.074203075 0.073308074 0.072418383 0.071533932
## [307] 0.070654650 0.069780466 0.068911310 0.068047111 0.067187798 0.066333301
## [313] 0.065483548 0.064638470 0.063797996 0.062962054 0.062130575 0.061303487
## [319] 0.060480720 0.059662203 0.058847865 0.058037636 0.057231445 0.056429222
## [325] 0.055630895 0.054836395 0.054045649 0.053258589 0.052475142 0.051695238
## [331] 0.050918808 0.050145778 0.049376081 0.048609643 0.047846396 0.047086267
## [337] 0.046329188 0.045575085 0.044823890 0.044075532 0.043329939 0.042587041
## [343] 0.041846767 0.041109047 0.040373810 0.039640986 0.038910502 0.038182290
## [349] 0.037456278 0.036732396 0.036010577 0.035290776 0.034572951 0.033857062
## [355] 0.033143067 0.032430924 0.031720593 0.031012033 0.030305201 0.029600058
## [361] 0.028896562 0.028194671 0.027494344 0.026795541 0.026098220 0.025402339
## [367] 0.024707858 0.024014735 0.023322930 0.022632401 0.021943106 0.021255005
## [373] 0.020568056 0.019882218 0.019197450 0.018513711 0.017830960 0.017149155
## [379] 0.016468255 0.015788219 0.015109005 0.014430573 0.013752882 0.013075889
## [385] 0.012399555 0.011723837 0.011048694 0.010374086 0.009699971 0.009026308
## [391] 0.008353055 0.007680172 0.007007618 0.006335350 0.005663328 0.004991511
## [397] 0.004319857 0.003648325 0.002976875 0.002305464
## 
## 
## $best
## [1] 1
#Optimal number = 31

Rerun DAPC with optimum PCs (31)

myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#FFED7F", "#6a8fe0", "#FDCDAC", "#8c61cd", "#f365e7", "#871550","#f6c8de", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b", "#FFFF33", "#FF7F00","#2524f9", "#cddb9b","#799d10", "#CCCCCC", "#B3E2CD", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A","#FDC086", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5")


dapc2 <- dapc(albo1, var.contrib = TRUE, scale = FALSE, n.pca = 31, n.da = 9) #change PCs to the optimal
op <- par(cex = 0.65)
scatter(dapc2, pch = 20, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomright", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="bottomleft")

eig_percent <- round((dapc2$eig/(sum(dapc2$eig)))*100,2)
eig_percent #35.15 19.07 11.59  6.87  5.12  4.56  3.49  2.93  2.30  1.51  1.41  1.30  0.94  0.80 0.63  0.50  0.40  0.29  0.26 0.25  0.15  0.12  0.10  0.08  0.05  0.05  0.04  0.02  0.02  0.01  0.00
##  [1] 35.15 19.07 11.59  6.87  5.12  4.56  3.49  2.93  2.30  1.51  1.41  1.30
## [13]  0.94  0.80  0.63  0.50  0.40  0.29  0.26  0.25  0.15  0.12  0.10  0.08
## [25]  0.05  0.05  0.04  0.02  0.02  0.01  0.00

Changing symbols for localities

good.shapes = c(1:25)

op <- par(cex = 0.65)
scatter(dapc2, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomright", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="bottomleft")

3. Create data file for Set 3 (r2<0.01 & MAF<0.01) for Europe

cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.01_b \
--keep-fam output/fst/pops_4fst.txt \
--make-bed \
--out output/dapc/dapc_01_b \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc_01_b.log

20968 variants loaded from .bim file. –keep-fam: 408 people remaining. Total genotyping rate in remaining samples is 0.971028. 20968 variants and 408 people pass filters and QC.

Convert to raw format

plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/dapc_01_b \
--recodeA \
--out output/dapc/dapc_01_b \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc_01_b.log

20968 variants loaded from .bim file. 20968 variants and 408 people pass filters and QC.

Import the data and covert it to genind format

# import the data
albo <-
  read.PLINK(
    here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/dapc/dapc_01_b.raw"),
    quiet = FALSE,
    chunkSize = 1000,
    parallel = require("parallel"),
    n.cores = 4
  )
## 
##  Reading PLINK raw format into a genlight object... 
## 
## 
##  Reading loci information... 
## 
##  Reading and converting genotypes... 
## .
##  Building final object... 
## 
## ...done.
# convert to genind
albo1 <- gl2gi(albo, probar = TRUE, verbose = NULL)
## Starting gl2gi 
##   Processing genlight object with SNP data
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |===================================================                   |  74%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi

Save the genind object

saveRDS(albo1, here(
  "scripts", "RMarkdowns", "output", "europe", "dapc", "albo1_MAF01.rds"
))

Load the genind object

albo1 <- readRDS(here(
  "scripts", "RMarkdowns", "output", "europe", "dapc", "albo1_MAF01.rds"
))

albo1
## /// GENIND OBJECT /////////
## 
##  // 408 individuals; 20,968 loci; 41,936 alleles; size: 77.1 Mb
## 
##  // Basic content
##    @tab:  408 x 41936 matrix of allele counts
##    @loc.n.all: number of alleles per locus (range: 2-2)
##    @loc.fac: locus factor for the 41936 columns of @tab
##    @all.names: list of allele names for each locus
##    @ploidy: ploidy of each individual  (range: 2-2)
##    @type:  codom
##    @call: df2genind(X = xx[, ], sep = "/", ncode = 1, ind.names = x@ind.names, 
##     pop = x@pop, NA.char = "-", ploidy = 2)
## 
##  // Optional content
##    @pop: population of each individual (group size range: 4-16)
##    @other: a list containing: sex  phenotype  pat  mat

4. DAPC for Europe for SNP Set 3

Find clusters

grp <- find.clusters(albo1, max.n.clust=40)
#retained 400 PCs
#Choose the number of clusters (>=2): 3

Save it

saveRDS(
  grp, here(
    "scripts/RMarkdowns/output/europe/dapc/grp_MAF01.rds"
  )
)

To load it

grp <- readRDS(
  here(
    "scripts/RMarkdowns/output/europe/dapc/grp_MAF01.rds"
  )
)
names(grp)
## [1] "Kstat" "stat"  "grp"   "size"
grp$size #group sizes for our 3 clusters
## [1]  68 106 234
table(pop(albo1), grp$grp)
##      
##        1  2  3
##   ALD  0  0 10
##   ALU  0 12  0
##   ALV  0  0 12
##   ARM  0 10  0
##   BAR  0  0 12
##   BRE 13  0  0
##   BUL  0  0 10
##   CES 14  0  0
##   CRO  0  0 12
##   DES 16  0  0
##   FRS  0  0 12
##   GES  0 12  0
##   GRA  0  0 11
##   GRC  0  0 10
##   IMP  0  0  4
##   ITB  2  0  3
##   ITP  0  0  8
##   ITR  0  0 12
##   KER  0 12  0
##   KRA  0 12  0
##   MAL  0  0 12
##   POP  2  0 10
##   RAR  0 12  0
##   ROM  0  0  4
##   ROS  0  0 11
##   SER  0  0  4
##   SEV  0 12  0
##   SIC  0  0  9
##   SLO  0  0 12
##   SOC  0 12  0
##   SPB  8  0  0
##   SPC  6  0  0
##   SPM  4  0  2
##   SPS  0  0  8
##   STS  0  0 12
##   TIK  0 12  0
##   TIR  0  0  4
##   TRE  3  0  9
##   TUA  0  0  9
##   TUH  0  0 12
dapc1 <- dapc(albo1, grp$grp)

Save it

saveRDS(
  dapc1, here(
    "scripts/RMarkdowns/output/europe/dapc/dapc_MAF01.rds"
  )
)

To load it

dapc1 <- readRDS(
  here(
    "scripts/RMarkdowns/output/europe/dapc/dapc_MAF01.rds"
  )
)
dapc1
##  #################################################
##  # Discriminant Analysis of Principal Components #
##  #################################################
## class: dapc
## $call: dapc.genind(x = albo1, pop = grp$grp)
## 
## $n.pca: 400 first PCs of PCA used
## $n.da: 2 discriminant functions saved
## $var (proportion of conserved variance): 0.998
## 
## $eig (eigenvalues): 59260000 2313000  vector    length content                   
## 1 $eig      2      eigenvalues               
## 2 $grp      408    prior group assignment    
## 3 $prior    3      prior group probabilities 
## 4 $assign   408    posterior group assignment
## 5 $pca.cent 41936  centring vector of PCA    
## 6 $pca.norm 41936  scaling vector of PCA     
## 7 $pca.eig  407    eigenvalues of PCA        
## 
##   data.frame    nrow  ncol content                                          
## 1 $tab          408   400  retained PCs of PCA                              
## 2 $means        3     400  group means                                      
## 3 $loadings     400   2    loadings of variables                            
## 4 $ind.coord    408   2    coordinates of individuals (principal components)
## 5 $grp.coord    3     2    coordinates of groups                            
## 6 $posterior    408   3    posterior membership probabilities               
## 7 $pca.loadings 41936 400  PCA loadings of original variables               
## 8 $var.contr    41936 2    contribution of original variables
scatter(dapc1)

Calculating the optimum PC number to rerun DAPC

optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=10) #calculating optimal number of PCs 

## $pop.score
## $pop.score$`1`
##         1         2         3 
## 0.6176471 0.9339623 0.0000000 
## 
## $pop.score$`50`
##         1         2         3 
## 0.7558824 0.7047170 0.1123932 
## 
## $pop.score$`100`
##         1         2         3 
## 0.5632353 0.5075472 0.1576923 
## 
## $pop.score$`150`
##         1         2         3 
## 0.3750000 0.3867925 0.1435897 
## 
## $pop.score$`200`
##          1          2          3 
## 0.22794118 0.25943396 0.09957265 
## 
## $pop.score$`250`
##          1          2          3 
## 0.15000000 0.15000000 0.06111111 
## 
## $pop.score$`300`
##          1          2          3 
## 0.12500000 0.09245283 0.04572650 
## 
## $pop.score$`350`
##          1          2          3 
## 0.05147059 0.05000000 0.02393162 
## 
## $pop.score$`400`
##           1           2           3 
## 0.002941176 0.006603774 0.002991453 
## 
## 
## $mean
##           1          50         100         150         200         250 
## 0.517203108 0.524330832 0.409491591 0.301794065 0.195649263 0.120370370 
##         300         350         400 
## 0.087726442 0.041800737 0.004178801 
## 
## $pred
## $pred$x
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## [307] 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## [325] 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## [343] 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## [361] 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## [379] 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## [397] 397 398 399 400
## 
## $pred$y
##   [1] 0.517203108 0.518005371 0.518806173 0.519603859 0.520396778 0.521183274
##   [7] 0.521961695 0.522730386 0.523487695 0.524231968 0.524961551 0.525674791
##  [13] 0.526370034 0.527045628 0.527699917 0.528331249 0.528937970 0.529518427
##  [19] 0.530070967 0.530593935 0.531085678 0.531544542 0.531968875 0.532357022
##  [25] 0.532707331 0.533018146 0.533287816 0.533514687 0.533697104 0.533833415
##  [31] 0.533921966 0.533961103 0.533949173 0.533884522 0.533765497 0.533590445
##  [37] 0.533357711 0.533065643 0.532712586 0.532296888 0.531816894 0.531270952
##  [43] 0.530657407 0.529974606 0.529220896 0.528394623 0.527494134 0.526517775
##  [49] 0.525463892 0.524330832 0.523117576 0.521825643 0.520457186 0.519014358
##  [55] 0.517499313 0.515914203 0.514261182 0.512542403 0.510760019 0.508916184
##  [61] 0.507013051 0.505052772 0.503037502 0.500969393 0.498850599 0.496683273
##  [67] 0.494469568 0.492211637 0.489911634 0.487571711 0.485194023 0.482780721
##  [73] 0.480333960 0.477855893 0.475348673 0.472814453 0.470255386 0.467673626
##  [79] 0.465071325 0.462450638 0.459813717 0.457162715 0.454499786 0.451827083
##  [85] 0.449146759 0.446460968 0.443771862 0.441081594 0.438392319 0.435706189
##  [91] 0.433025358 0.430351979 0.427688204 0.425036187 0.422398082 0.419776042
##  [97] 0.417172219 0.414588768 0.412027840 0.409491591 0.406981692 0.404497901
## [103] 0.402039493 0.399605744 0.397195932 0.394809333 0.392445222 0.390102876
## [109] 0.387781571 0.385480585 0.383199192 0.380936670 0.378692295 0.376465343
## [115] 0.374255091 0.372060814 0.369881789 0.367717293 0.365566602 0.363428992
## [121] 0.361303739 0.359190120 0.357087412 0.354994890 0.352911830 0.350837510
## [127] 0.348771206 0.346712193 0.344659749 0.342613149 0.340571670 0.338534588
## [133] 0.336501180 0.334470721 0.332442489 0.330415760 0.328389809 0.326363914
## [139] 0.324337350 0.322309394 0.320279322 0.318246411 0.316209937 0.314169177
## [145] 0.312123405 0.310071900 0.308013937 0.305948793 0.303875744 0.301794065
## [151] 0.299703235 0.297603524 0.295495408 0.293379359 0.291255850 0.289125355
## [157] 0.286988346 0.284845297 0.282696680 0.280542970 0.278384639 0.276222160
## [163] 0.274056007 0.271886652 0.269714569 0.267540231 0.265364111 0.263186682
## [169] 0.261008417 0.258829790 0.256651274 0.254473342 0.252296466 0.250121121
## [175] 0.247947779 0.245776913 0.243608997 0.241444504 0.239283906 0.237127678
## [181] 0.234976292 0.232830221 0.230689939 0.228555918 0.226428632 0.224308554
## [187] 0.222196157 0.220091915 0.217996300 0.215909785 0.213832844 0.211765950
## [193] 0.209709576 0.207664195 0.205630281 0.203608305 0.201598743 0.199602066
## [199] 0.197618748 0.195649263 0.193694043 0.191753367 0.189827472 0.187916596
## [205] 0.186020978 0.184140856 0.182276468 0.180428051 0.178595845 0.176780086
## [211] 0.174981013 0.173198865 0.171433879 0.169686293 0.167956346 0.166244275
## [217] 0.164550319 0.162874716 0.161217703 0.159579520 0.157960403 0.156360592
## [223] 0.154780323 0.153219836 0.151679368 0.150159158 0.148659443 0.147180462
## [229] 0.145722453 0.144285653 0.142870301 0.141476635 0.140104894 0.138755314
## [235] 0.137428134 0.136123593 0.134841929 0.133583378 0.132348181 0.131136574
## [241] 0.129948796 0.128785084 0.127645678 0.126530814 0.125440732 0.124375669
## [247] 0.123335863 0.122321553 0.121332976 0.120370370 0.119433791 0.118522562
## [253] 0.117635820 0.116772707 0.115932361 0.115113921 0.114316528 0.113539320
## [259] 0.112781437 0.112042018 0.111320203 0.110615131 0.109925942 0.109251775
## [265] 0.108591769 0.107945064 0.107310799 0.106688114 0.106076147 0.105474040
## [271] 0.104880930 0.104295957 0.103718261 0.103146981 0.102581256 0.102020226
## [277] 0.101463031 0.100908809 0.100356701 0.099805844 0.099255380 0.098704447
## [283] 0.098152185 0.097597733 0.097040231 0.096478818 0.095912633 0.095340815
## [289] 0.094762505 0.094176842 0.093582965 0.092980013 0.092367126 0.091743443
## [295] 0.091108104 0.090460247 0.089799014 0.089123542 0.088432972 0.087726442
## [301] 0.087003323 0.086263903 0.085508705 0.084738248 0.083953051 0.083153636
## [307] 0.082340523 0.081514232 0.080675283 0.079824197 0.078961494 0.078087693
## [313] 0.077203316 0.076308883 0.075404914 0.074491928 0.073570448 0.072640991
## [319] 0.071704080 0.070760234 0.069809974 0.068853819 0.067892291 0.066925909
## [325] 0.065955193 0.064980664 0.064002843 0.063022248 0.062039402 0.061054823
## [331] 0.060069033 0.059082551 0.058095898 0.057109594 0.056124159 0.055140114
## [337] 0.054157979 0.053178274 0.052201519 0.051228234 0.050258941 0.049294159
## [343] 0.048334408 0.047380209 0.046432083 0.045490548 0.044556126 0.043629336
## [349] 0.042710700 0.041800737 0.040899851 0.040007973 0.039124921 0.038250509
## [355] 0.037384552 0.036526868 0.035677271 0.034835577 0.034001601 0.033175160
## [361] 0.032356069 0.031544142 0.030739197 0.029941049 0.029149512 0.028364404
## [367] 0.027585539 0.026812733 0.026045802 0.025284561 0.024528826 0.023778413
## [373] 0.023033137 0.022292814 0.021557259 0.020826288 0.020099717 0.019377361
## [379] 0.018659036 0.017944558 0.017233741 0.016526402 0.015822357 0.015121421
## [385] 0.014423409 0.013728137 0.013035422 0.012345077 0.011656920 0.010970765
## [391] 0.010286429 0.009603726 0.008922473 0.008242486 0.007563579 0.006885568
## [397] 0.006208269 0.005531498 0.004855070 0.004178801
## 
## 
## $best
## [1] 32
#Optimal number = 33

Rerun DAPC with optimum PCs (33)

myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#FFED7F", "#6a8fe0", "#FDCDAC", "#8c61cd", "#f365e7", "#871550","#f6c8de", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b", "#FFFF33", "#FF7F00","#2524f9", "#cddb9b","#799d10", "#CCCCCC", "#B3E2CD", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A","#FDC086", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5")


dapc2 <- dapc(albo1, var.contrib = TRUE, scale = FALSE, n.pca = 33, n.da = 9) #change PCs to the optimal
op <- par(cex = 0.65)
scatter(dapc2, pch = 20, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomright", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="bottomleft")

eig_percent <- round((dapc2$eig/(sum(dapc2$eig)))*100,2)
eig_percent 
##  [1] 34.21 19.62 11.20  6.77  4.96  4.59  3.56  3.08  2.30  1.88  1.49  1.34
## [13]  0.95  0.80  0.74  0.45  0.38  0.33  0.30  0.25  0.16  0.13  0.13  0.10
## [25]  0.08  0.06  0.05  0.04  0.03  0.02  0.01  0.00  0.00

Changing symbols for localities

good.shapes = c(1:25)

op <- par(cex = 0.65)
scatter(dapc2, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomright", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="bottomleft")

Global dataset

1. Create files (for SNP Set 3)

cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.1_b \
--make-bed \
--out output/dapc/dapc_01b \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc_01b.log

66317 variants loaded from .bim file. 66317 variants and 688 people pass filters and QC.

Convert to raw format

plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/dapc_01b \
--recodeA \
--out output/dapc/dapc_01b \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc_01b.log

66317 variants loaded from .bim file. 66317 variants and 688 people pass filters and QC.

Import the data and covert it to genind format

# import the data
snp <-
  read.PLINK(
    here("euro_global/output/dapc/dapc_01b.raw"),
    quiet = FALSE,
    chunkSize = 1000,
    parallel = require("parallel"),
    n.cores = 4
  )

nInd(snp)
nLoc(snp)
nPop(snp)
indNames(snp)

# convert to genind
snp2 <- gl2gi(snp, probar = TRUE, verbose = NULL)

Save it

saveRDS(
  snp2, here(
    "euro_global/output/dapc/MAF_1/snp2.rds"
  )
)

To load it

snp2 <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/snp2.rds"
  )
)

2. Get Sample Locations

Import sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
strata(snp2) <- data.frame(other(snp2))

# Currently set on just 
head(pop(snp2)) 
## [1] OKI OKI OKI OKI OKI OKI
## 73 Levels: ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS ... YUN

Load the csv

countr <- read.csv(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "DAPC_countries.csv"
))
df <- as.data.frame(countr)

head(df)
##   pop country
## 1 OKI   Japan
## 2 OKI   Japan
## 3 OKI   Japan
## 4 OKI   Japan
## 5 OKI   Japan
## 6 OKI   Japan
snp2@pop <- as.factor(df$country)
snp2$pop
##   [1] Japan        Japan        Japan        Japan        Japan       
##   [6] Japan        Japan        Japan        Japan        Japan       
##  [11] Japan        Japan        China        China        China       
##  [16] China        China        China        China        China       
##  [21] China        China        China        China        Russia      
##  [26] Russia       Russia       Russia       Russia       Russia      
##  [31] Russia       Russia       Russia       Russia       Russia      
##  [36] Russia       Ukraine      Ukraine      Ukraine      Ukraine     
##  [41] Ukraine      Ukraine      Ukraine      Ukraine      Ukraine     
##  [46] Ukraine      Ukraine      Ukraine      China        China       
##  [51] China        China        China        China        China       
##  [56] China        China        Nepal        Nepal        Nepal       
##  [61] Nepal        Georgia      Georgia      Georgia      Georgia     
##  [66] Georgia      Georgia      Georgia      Georgia      Georgia     
##  [71] Georgia      Georgia      Georgia      Ukraine      Ukraine     
##  [76] Ukraine      Ukraine      Ukraine      Ukraine      Ukraine     
##  [81] Ukraine      Ukraine      Ukraine      Ukraine      Ukraine     
##  [86] Russia       Russia       Russia       Russia       Russia      
##  [91] Russia       Russia       Russia       Russia       Russia      
##  [96] Russia       Russia       Russia       Russia       Russia      
## [101] Russia       Russia       Russia       Russia       Russia      
## [106] Russia       Russia       Russia       Russia       Russia      
## [111] Russia       Russia       Russia       Taiwan       Taiwan      
## [116] Taiwan       Taiwan       Taiwan       Taiwan       Taiwan      
## [121] China        China        China        China        China       
## [126] China        China        China        China        China       
## [131] China        China        Vietnam      Vietnam      Vietnam     
## [136] Vietnam      Italy        Italy        Italy        Italy       
## [141] Italy        Italy        Italy        Italy        Italy       
## [146] Italy        Italy        Italy        Ukraine      Ukraine     
## [151] Ukraine      Ukraine      Ukraine      Ukraine      Ukraine     
## [156] Ukraine      Ukraine      Ukraine      Ukraine      Ukraine     
## [161] Malaysia     Malaysia     Malaysia     Malaysia     France      
## [166] France       France       France       France       France      
## [171] France       France       Italy        Italy        Italy       
## [176] Italy        Italy        Italy        Italy        Italy       
## [181] Italy        Vietnam      Vietnam      Vietnam      Vietnam     
## [186] Vietnam      Vietnam      Vietnam      Vietnam      Vietnam     
## [191] Vietnam      Vietnam      Vietnam      Vietnam      Vietnam     
## [196] Vietnam      Vietnam      Vietnam      Vietnam      Thailand    
## [201] Thailand     Thailand     Thailand     Thailand     Thailand    
## [206] Thailand     Thailand     Thailand     Thailand     Thailand    
## [211] Thailand     Thailand     Thailand     Thailand     Thailand    
## [216] Thailand     Thailand     Thailand     Thailand     Thailand    
## [221] Thailand     Italy        Italy        Italy        Italy       
## [226] Italy        Italy        Italy        Japan        Japan       
## [231] Japan        Japan        Japan        Japan        Japan       
## [236] Japan        Japan        Japan        Japan        Japan       
## [241] Japan        Japan        Japan        Japan        Japan       
## [246] Japan        Japan        Japan        Japan        Japan       
## [251] Japan        France       France       France       France      
## [256] Italy        Italy        Italy        Italy        Italy       
## [261] Italy        Italy        Italy        Italy        Italy       
## [266] Italy        Italy        Italy        Italy        Italy       
## [271] Italy        Italy        Italy        Italy        Italy       
## [276] Italy        Italy        Italy        Italy        Russia      
## [281] Russia       Russia       Russia       Russia       Russia      
## [286] Russia       Russia       Brazil       Brazil       Brazil      
## [291] Brazil       Brazil       Brazil       Brazil       Brazil      
## [296] Brazil       Brazil       Brazil       Albania      Albania     
## [301] Albania      Albania      Italy        Italy        Italy       
## [306] Italy        Italy        Italy        Italy        Italy       
## [311] Cambodia     Cambodia     Cambodia     Cambodia     Cambodia    
## [316] Cambodia     Japan        Cambodia     Cambodia     Cambodia    
## [321] Cambodia     Cambodia     Cambodia     Italy        Italy       
## [326] Italy        Japan        Italy        Italy        Italy       
## [331] Italy        Italy        Japan        Greece       Japan       
## [336] Italy        Italy        Italy        Italy        Japan       
## [341] Japan        Japan        Japan        Japan        Japan       
## [346] Japan        Japan        India        India        India       
## [351] India        India        India        India        India       
## [356] India        India        India        India        Thailand    
## [361] Thailand     Thailand     Thailand     Thailand     Thailand    
## [366] Thailand     Thailand     Thailand     Spain        Spain       
## [371] Spain        Spain        Spain        Spain        Spain       
## [376] Spain        Spain        Spain        Spain        Spain       
## [381] USA          USA          USA          USA          USA         
## [386] USA          USA          USA          USA          USA         
## [391] USA          USA          USA          USA          USA         
## [396] USA          USA          USA          USA          USA         
## [401] USA          USA          USA          Bhutan       Bhutan      
## [406] Nepal        Nepal        Sri\xa0Lanka Sri\xa0Lanka Thailand    
## [411] Thailand     Thailand     Thailand     Thailand     Thailand    
## [416] Thailand     Thailand     Thailand     Thailand     Thailand    
## [421] Indonesia    Indonesia    Indonesia    Indonesia    Indonesia   
## [426] Indonesia    Indonesia    Indonesia    Indonesia    Indonesia   
## [431] Indonesia    Indonesia    Maldives     Maldives     Maldives    
## [436] Maldives     Brazil       Brazil       Brazil       Brazil      
## [441] Brazil       Brazil       Brazil       Brazil       Brazil      
## [446] Brazil       Brazil       Brazil       Bulgaria     Bulgaria    
## [451] Bulgaria     Bulgaria     Bulgaria     Bulgaria     Bulgaria    
## [456] Bulgaria     Bulgaria     Bulgaria     Croatia      Croatia     
## [461] Croatia      Croatia      Croatia      Croatia      Croatia     
## [466] Croatia      Croatia      Croatia      Croatia      Croatia     
## [471] Greece       Greece       Greece       Greece       Greece      
## [476] Greece       Greece       Greece       Greece       Greece      
## [481] Greece       Greece       Greece       Greece       Greece      
## [486] Greece       Greece       Greece       Greece       Greece      
## [491] Italy        Italy        Italy        Italy        Italy       
## [496] Malta        Malta        Malta        Malta        Malta       
## [501] Malta        Malta        Malta        Malta        Malta       
## [506] Malta        Malta        Spain        Spain        Spain       
## [511] Spain        Spain        Turkey       Turkey       Turkey      
## [516] Turkey       Turkey       Turkey       Turkey       Turkey      
## [521] Turkey       Turkey       Turkey       Turkey       Turkey      
## [526] Turkey       Turkey       Turkey       Turkey       Turkey      
## [531] Turkey       Turkey       Turkey       Albania      Albania     
## [536] Albania      Albania      Albania      Albania      Albania     
## [541] Albania      Albania      Albania      France       France      
## [546] France       France       France       France       France      
## [551] France       France       France       France       France      
## [556] Italy        Italy        Italy        Italy        Italy       
## [561] Italy        Italy        Italy        Italy        Portugal    
## [566] Portugal     Portugal     Portugal     Portugal     Portugal    
## [571] Portugal     Portugal     Portugal     Portugal     Portugal    
## [576] Portugal     Portugal     Portugal     Romania      Romania     
## [581] Romania      Romania      Romania      Romania      Romania     
## [586] Romania      Romania      Romania      Romania      Serbia      
## [591] Serbia       Serbia       Serbia       Slovenia     Slovenia    
## [596] Slovenia     Slovenia     Slovenia     Slovenia     Slovenia    
## [601] Slovenia     Slovenia     Slovenia     Slovenia     Slovenia    
## [606] Spain        Spain        Spain        Spain        Spain       
## [611] Spain        Spain        Spain        Spain        Spain       
## [616] Spain        Spain        Spain        Spain        Spain       
## [621] Spain        Spain        Spain        Armenia      Armenia     
## [626] Armenia      Armenia      Armenia      Armenia      Armenia     
## [631] Armenia      Armenia      Armenia      Spain        Spain       
## [636] Spain        Spain        Albania      Albania      Albania     
## [641] Albania      Albania      Albania      Albania      Albania     
## [646] Albania      Albania      Albania      Albania      Italy       
## [651] Italy        Italy        Italy        Italy        Italy       
## [656] Italy        Italy        Italy        Italy        Italy       
## [661] Italy        Indonesia    Indonesia    Indonesia    Indonesia   
## [666] Indonesia    Indonesia    Indonesia    Indonesia    Indonesia   
## [671] Indonesia    Indonesia    Indonesia    Indonesia    Indonesia   
## [676] Indonesia    Malaysia     Malaysia     Malaysia     Malaysia    
## [681] Malaysia     Malaysia     Malaysia     Malaysia     Malaysia    
## [686] Malaysia     Malaysia     Malaysia    
## 32 Levels: Albania Armenia Bhutan Brazil Bulgaria Cambodia China ... Vietnam

Save the genind object

saveRDS(snp2, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "snp_country.rds"
))

Load the genind object

snp_country <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "snp_country.rds"
))

3. Scale and find clusters

Scale

snp_country <- scaleGen(snp2, NA.method="mean")
class(snp_country)
## [1] "matrix" "array"
dim(snp_country)
## [1]    688 132634
snp_country[1:5,1:5]
##      AX-583033342_C.C AX-583033342_C.G AX-583035163_A.A AX-583035163_A.T
## 1001        0.3976471       -0.3976471        0.5372938       -0.5372938
## 1002        0.3976471       -0.3976471        0.5372938       -0.5372938
## 1003        0.3976471       -0.3976471        0.5372938       -0.5372938
## 1004        0.3976471       -0.3976471        0.5372938       -0.5372938
## 1005        0.3976471       -0.3976471        0.5372938       -0.5372938
##      AX-583033370_G.T
## 1001        1.0219800
## 1002        0.0000000
## 1003       -0.2722724
## 1004       -0.2722724
## 1005       -0.2722724
# Get the populations from the genlight object
populations <- snp2$pop

3.1 Find clusters

grp <- find.clusters(snp_country, max.n.clust=25)
#retained 600
#Choose the number of clusters (>=2): 6

Save the genind object

saveRDS(grp, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "grp.rds"
))

Load the genind object

grp <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "grp.rds"
))
names(grp)
## [1] "Kstat" "stat"  "grp"   "size"
grp$size #group sizes for our clusters
## [1] 134  59 144 106 216  29
table(pop(snp2), grp$grp)
##               
##                 1  2  3  4  5  6
##   Albania       0 26  0  0  0  0
##   Armenia       0  0  0 10  0  0
##   Bhutan        2  0  0  0  0  0
##   Brazil       12  0 11  0  0  0
##   Bulgaria      0  0  0  0 10  0
##   Cambodia     12  0  0  0  0  0
##   China        13  0  0  0 20  0
##   Croatia       0 12  0  0  0  0
##   France        0  0  0  0 24  0
##   Georgia       0  0  0 12  0  0
##   Greece        0 21  0  0  0  0
##   India        12  0  0  0  0  0
##   Indonesia     0  0  0  0  0 27
##   Italy         0  0 49  0 49  0
##   Japan         0  0 35  0 12  0
##   Malaysia     16  0  0  0  0  0
##   Maldives      4  0  0  0  0  0
##   Malta         0  0  0  0 12  0
##   Nepal         4  0  0  0  0  2
##   Portugal      0  0  9  0  5  0
##   Romania       0  0  0  0 11  0
##   Russia        0  0  0 48  0  0
##   Serbia        0  0  0  0  4  0
##   Slovenia      0  0  0  0 12  0
##   Spain         0  0 17  0 22  0
##   Sri\xa0Lanka  2  0  0  0  0  0
##   Taiwan        0  0  0  0  7  0
##   Thailand     42  0  0  0  0  0
##   Turkey        0  0  0  0 21  0
##   Ukraine       0  0  0 36  0  0
##   USA           0  0 23  0  0  0
##   Vietnam      15  0  0  0  7  0
table.value(table(pop(snp2), grp$grp), col.lab=paste("inf", 1:6), #inferred groups
row.lab=paste("ori", 1:32)) #original groups - 32 countries

dapc1 <- dapc(snp_country, grp$grp)
#100 PCs retained
#5 discriminant functions retained

Save the genind object

saveRDS(dapc1, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "dapc1.rds"
))

Load the genind object

dapc1 <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "dapc1.rds"
))
dapc1
##  #################################################
##  # Discriminant Analysis of Principal Components #
##  #################################################
## class: dapc
## $call: dapc.data.frame(x = as.data.frame(x), grp = ..1)
## 
## $n.pca: 100 first PCs of PCA used
## $n.da: 5 discriminant functions saved
## $var (proportion of conserved variance): 0.399
## 
## $eig (eigenvalues): 12760 6391 3147 2836 1369  vector    length content                   
## 1 $eig      5      eigenvalues               
## 2 $grp      688    prior group assignment    
## 3 $prior    6      prior group probabilities 
## 4 $assign   688    posterior group assignment
## 5 $pca.cent 132634 centring vector of PCA    
## 6 $pca.norm 132634 scaling vector of PCA     
## 7 $pca.eig  687    eigenvalues of PCA        
## 
##   data.frame    nrow   ncol content                                          
## 1 $tab          688    100  retained PCs of PCA                              
## 2 $means        6      100  group means                                      
## 3 $loadings     100    5    loadings of variables                            
## 4 $ind.coord    688    5    coordinates of individuals (principal components)
## 5 $grp.coord    6      5    coordinates of groups                            
## 6 $posterior    688    6    posterior membership probabilities               
## 7 $pca.loadings 132634 100  PCA loadings of original variables               
## 8 $var.contr    132634 5    contribution of original variables

3.2 Calculating the optimum PC number to rerun DAPC

optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=10) #calculating optimal number of PCs 13
#Optimal number = 13

3.3. Now do cross-validation

Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.

xvalDapc(snp_country, populations, n.pca.max = 200, n.da = NULL,
              training.set = 0.9, result = c("groupMean", "overall"),
              center = TRUE, scale = FALSE,
              n.pca=NULL, n.rep = 30, xval.plot = TRUE) 

Number of PCs Achieving Highest Mean Success “140” Number of PCs Achieving Lowest MSE “140” n.pca: 140 first PCs of PCA used n.da: 31 discriminant functions saved var (proportion of conserved variance): 0.469

So the cross-validation gives us a very different # of PCAs to retain (140) compared to the a-score (13)

Run DAPC with object

dapc_snp1 <- dapc(snp_country, n.pca = 13, n.da = 5, grp = populations)

$n.pca: 13 first PCs of PCA used $n.da: 5 discriminant functions saved $var (proportion of conserved variance): 0.156

Save it

saveRDS(
  dapc_snp1, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "dapc_snp1.rds"
  )
)

To load it

dapc_snp1 <- readRDS(
  here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "dapc_snp1.rds"
  )
)

Most contributing alleles

set.seed(4) 
contrib<-loadingplot(dapc_snp1$var.contr,axis=2, thres=.0002,lab.jitter=3)

Run DAPC with object using #pcs from cross-validation

dapc_snp2 <- dapc(snp_country, n.pca = 140, n.da = 31, grp = populations)
dapc_snp2

$n.pca: 140 first PCs of PCA used $n.da: 31 discriminant functions saved $var (proportion of conserved variance): 0.469

Save it

saveRDS(
  dapc_snp2, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "dapc_snp2.rds"
  )
)

To load it

dapc_snp2 <- readRDS(
  here("scripts", "RMarkdowns", "output", "euro_global", "dapc", "MAF_1", "dapc_snp2.rds"))
grp <- dapc_snp2$grp
levels(grp) <- iconv(levels(grp), to = "ASCII//TRANSLIT")
levels(grp) <- gsub("[^[:alnum:][:space:]]", "", levels(grp))
grp <- factor(grp)
dapc_snp2$grp <- grp
#dapc_snp2$grp <- iconv(dapc_snp2$grp, to = "ASCII//TRANSLIT")
#dapc_snp2$grp <- gsub("[^[:alnum:][:space:]]", "", dapc_snp2$grp)
#Sys.setlocale("LC_ALL", "C")
#dapc_snp2 <- dapc_snp2$label <- iconv(dapc_snp2$label, to = "ASCII//TRANSLIT")
scatter(dapc_snp2) 

3.4 Plot

grp <- dapc_snp1$grp
levels(grp) <- iconv(levels(grp), to = "ASCII//TRANSLIT")
levels(grp) <- gsub("[^[:alnum:][:space:]]", "", levels(grp))
grp <- factor(grp)
dapc_snp1$grp <- grp
scatter(dapc_snp1)

Even highest contributors have VERY low loadings though, so no one variant is driving the pattern

myCol2 <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b",  "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")

Check R symbols for plot

#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,35:38,43,60,62:64)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
  scale_shape_manual(values=good.shapes[1:N]) +
  geom_point()
## Warning: Removed 671 rows containing missing values or values outside the scale range
## (`geom_point()`).

#1:25,28:31,36,55:57

Plot using different discriminant functions

PCs 1 & 2

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/dapc_euro_global_r1_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,43,60,62:64)

op <- par(cex = 0.39)
scatter(dapc_snp1, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
good.shapes = c(1:25,35:38,43,60,62:64)

op <- par(cex = 0.39)
scatter(dapc_snp1, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

3.5 Try dapc with 7 discriminant functions, since we have 6 clusters

These are the plots I kept for results

Run DAPC with object

dapc_snp3 <- dapc(snp_country, n.pca = 13, n.da = 7, grp = populations)
dapc_snp3

Save it

saveRDS(
  dapc_snp3, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "dapc_snp3.rds"
  )
)

To load it

dapc_snp3 <- readRDS(
  here("scripts", "RMarkdowns", "output", "euro_global", "dapc", "MAF_1", "dapc_snp3.rds"))
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/dapc3_euro_global_r1_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,43,60,62:64)

op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
grp <- dapc_snp3$grp
levels(grp) <- iconv(levels(grp), to = "ASCII//TRANSLIT")
levels(grp) <- gsub("[^[:alnum:][:space:]]", "", levels(grp))
grp <- factor(grp)
dapc_snp3$grp <- grp

good.shapes = c(1:25,35:38,43,60,62:64)

op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

Try new colors - by region

myCol2 <- c ("#a113b2", "goldenrod", "#146c45", "#66C2A5", "goldenrod", "#2524f9", "#c41A1C", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#146c45", "#2524f9", "#a113b2", "#c41A1C", "#2524f9", "#146c45", "#a113b2", "#146c45", "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#146c45", "#c41A1C", "#2524f9", "goldenrod", "goldenrod", "#66C2A5", "#2524f9")

# "#a41415"
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/dapc_euro_global_region_r1_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,43,60,62:64)

op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
good.shapes = c(1:25,35:38,43,60,62:64)

op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

PCs 1 & 3

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/dapc_euro_global_region_r1_PC1_3.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,43,60,62:64)

op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

dev.off()
good.shapes = c(1:25,35:38,43,60,62:64)

op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

PCs 1 & 4

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/dapc_euro_global_region_r1_PC1_4.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches

good.shapes = c(1:25,35:38,43,60,62:64)

op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

dev.off()
good.shapes = c(1:25,35:38,43,60,62:64)

op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

DAPC for subsets of pops

module load PLINK/1.9b_6.21-x86_64

4. DAPC for Italy + US + native range

Using r2<0.01 LD pruning and MAF 1% (scaled and plotting additional PCs)

Create files

cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--keep-fam output/neuroadmixture/native_italy_all_and_US.txt \
--bfile output/snps_sets/r2_0.01_b \
--make-bed \
--export vcf \
--out output/dapc/dapc_italy_all_and_US \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc_italy_all_and_US.log

22642 variants loaded from .bim file. –keep-fam: 353 people remaining. Total genotyping rate in remaining samples is 0.965364. 22642 variants and 353 people pass filters and QC.

Convert to raw format

plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/dapc_italy_all_and_US \
--recodeA \
--out output/dapc/dapc_italy_all_and_US \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc_italy_all_and_US.log

22642 variants loaded from .bim file. 22642 variants and 353 people pass filters and QC.

Import the data and covert it to genind format

# import the data
snp <-
  read.PLINK(
    here("euro_global/output/dapc/dapc_italy_all_and_US.raw"),
    quiet = FALSE,
    chunkSize = 1000,
    parallel = require("parallel"),
    n.cores = 4
  )
## 
##  Reading PLINK raw format into a genlight object... 
## 
## 
##  Reading loci information... 
## 
##  Reading and converting genotypes... 
## .
##  Building final object... 
## 
## ...done.
nInd(snp)
## [1] 353
nLoc(snp)
## [1] 22642
nPop(snp)
## [1] 40
indNames(snp)
##   [1] "1001" "1002" "1003" "1004" "1005" "1006" "1007" "1008" "1009" "1010"
##  [11] "1011" "1012" "1053" "1054" "1055" "1056" "1057" "1058" "1059" "1060"
##  [21] "1061" "1062" "1063" "1064" "1089" "1090" "1091" "1092" "1093" "1094"
##  [31] "1095" "1101" "1102" "1103" "1105" "1106" "1107" "1161" "1162" "1163"
##  [41] "1165" "1166" "1167" "1168" "1169" "1170" "1171" "1172" "1173" "1174"
##  [51] "1175" "1176" "1177" "1178" "1179" "1180" "1181" "1182" "1183" "1184"
##  [61] "1185" "1186" "1187" "1188" "1189" "1190" "1191" "1192" "1193" "1194"
##  [71] "1195" "1201" "1214" "1215" "1216" "1217" "1226" "1227" "1228" "1229"
##  [81] "1230" "1232" "1233" "1234" "1237" "1238" "1239" "1240" "1241" "1242"
##  [91] "1243" "1244" "1245" "1246" "1247" "1249" "1250" "1251" "1252" "1253"
## [101] "1254" "1255" "1256" "1257" "1258" "1259" "1260" "1261" "1262" "1263"
## [111] "1264" "1265" "1266" "1267" "1268" "1270" "1271" "1272" "1273" "1274"
## [121] "1276" "1282" "1283" "1285" "1286" "1287" "1288" "1289" "1292" "1293"
## [131] "1294" "1295" "1325" "1326" "1328" "1329" "1330" "1331" "1332" "1333"
## [141] "1334" "1335" "1336" "1373" "1374" "1375" "1376" "1377" "1378" "1379"
## [151] "1380" "1381" "1382" "1383" "1384" "1430" "1431" "1432" "1433" "1434"
## [161] "1435" "1436" "1437" "1438" "1439" "1440" "1441" "1443" "1444" "1446"
## [171] "1447" "1449" "1451" "1452" "1454" "1456" "1458" "1460" "1461" "197" 
## [181] "198"  "199"  "200"  "201"  "202"  "203"  "204"  "2174" "2175" "2176"
## [191] "2177" "2178" "2179" "217"  "2180" "2181" "2182" "2183" "2184" "2185"
## [201] "2187" "2188" "2189" "218"  "2191" "2192" "2193" "2194" "2195" "219" 
## [211] "220"  "2215" "2216" "2217" "2218" "221"  "222"  "223"  "224"  "225" 
## [221] "226"  "227"  "230"  "255"  "256"  "257"  "258"  "261"  "262"  "263" 
## [231] "264"  "265"  "266"  "267"  "268"  "269"  "270"  "271"  "272"  "273" 
## [241] "275"  "276"  "277"  "278"  "294"  "295"  "296"  "297"  "298"  "299" 
## [251] "301"  "302"  "303"  "304"  "305"  "435"  "436"  "437"  "438"  "439" 
## [261] "440"  "441"  "442"  "443"  "444"  "445"  "446"  "602"  "603"  "604" 
## [271] "607"  "609"  "610"  "623"  "624"  "625"  "626"  "627"  "628"  "629" 
## [281] "630"  "631"  "632"  "633"  "666"  "669"  "670"  "671"  "672"  "673" 
## [291] "674"  "675"  "676"  "677"  "678"  "679"  "680"  "681"  "682"  "683" 
## [301] "747"  "749"  "750"  "751"  "752"  "824"  "825"  "826"  "827"  "829" 
## [311] "830"  "831"  "833"  "834"  "928"  "929"  "930"  "931"  "932"  "933" 
## [321] "934"  "935"  "936"  "937"  "938"  "939"  "964"  "965"  "966"  "967" 
## [331] "972"  "973"  "975"  "976"  "977"  "978"  "979"  "980"  "981"  "982" 
## [341] "983"  "984"  "985"  "986"  "987"  "988"  "989"  "990"  "991"  "992" 
## [351] "993"  "994"  "995"
# convert to genind
snp2 <- gl2gi(snp, probar = TRUE, verbose = NULL)
## Starting gl2gi 
##   Processing genlight object with SNP data
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=====================================                                 |  54%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |==========================================================            |  84%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi

Save it

saveRDS(
  snp2, here(
    "euro_global/output/dapc/MAF_1/snp2_italy_all_and_US.rds"
  )
)

To load it

snp2 <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/snp2_italy_all_and_US.rds"
  )
)

Scale

snp3 <- scaleGen(snp2, NA.method="mean")
class(snp3)

I get a warning saying “Some scaling values are null. Corresponding alleles are removed”. It seems this error is because one or more allele is fixed in this subset of populations. These were removed.

Save it

saveRDS(
  snp3, here(
    "euro_global/output/dapc/MAF_1/snp3_italy_all_and_US.rds"
  )
)

To load it

snp3 <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/snp3_italy_all_and_US.rds"
  )
)
dim(snp3)
## [1]   353 45280
snp3[1:5,1:5]
##      AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1001        0.8736826       -0.8736826        0.9129240       -0.9129240
## 1002        0.0000000        0.0000000       -1.4519198        1.4519198
## 1003       -0.4541610        0.4541610       -1.4519198        1.4519198
## 1004       -0.4541610        0.4541610       -1.4519198        1.4519198
## 1005       -0.4541610        0.4541610       -0.2694979        0.2694979
##      AX-583036983_C.T
## 1001       1.21831144
## 1002      -0.07035841
## 1003       1.21831144
## 1004      -0.07035841
## 1005       1.21831144
# Get the populations from the genlight object
populations <- snp$pop

4.1 Find clusters

grp <- find.clusters(snp3, max.n.clust=310)
#retained 350
#Choose the number of clusters (>=2): 4

Save it

saveRDS(
  grp, here(
    "euro_global/output/dapc/MAF_1/grp_italy_US.rds"
  )
)

To load it

grp <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/grp_italy_US.rds"
  )
)

Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.

xvalDapc(snp3, populations, n.pca.max = 200, n.da = NULL,
              training.set = 0.9, result = c("groupMean", "overall"),
              center = TRUE, scale = FALSE,
              n.pca=NULL, n.rep = 40, xval.plot = TRUE) 

Number of PCs Achieving Highest Mean Success"60" Number of PCs Achieving Lowest MSE “60” $n.pca: 60 first PCs of PCA used $n.da: 39 discriminant functions saved $var (proportion of conserved variance): 0.408

Run dapc using these #s

dapc1 <- dapc(snp3, snp$pop)
#60 PCs retained
#39 discriminant functions retained

Save the genind object

saveRDS(dapc1, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "italy_US", "dapc1.rds"
))

Load the genind object

dapc1 <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "italy_US", "dapc1.rds"
))

Calculating the optimum PC number to rerun DAPC

optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=10) #calculating optimal number of PCs 

## $pop.score
## $pop.score$`1`
##         BEN         BER         BRE         CAM         CES         CHA 
##  0.91666667  0.75000000 -0.06153846  0.50000000  0.40714286  0.10833333 
##         DES         GEL         HAI         HAN         HOC         HUN 
##  0.05000000  0.00000000  0.58333333  1.00000000  0.14285714  0.66666667 
##         IMP         INJ         INW         ITB         ITP         ITR 
##  0.00000000  0.90909091  1.00000000  0.00000000  0.66666667  0.64166667 
##         JAF         KAC         KAG         KAN         KAT         KLP 
##  0.00000000  0.00000000  0.42500000  0.18181818  0.00000000  0.00000000 
##         KUN         LAM         MAT         OKI         PAL         QNC 
##  0.00000000  0.00000000  0.66666667  0.75000000  0.20000000 -0.05454545 
##         ROM         SIC         SON         SSK         SUF         SUU 
##  0.00000000  0.22222222  0.00000000  0.36666667  0.00000000  1.00000000 
##         TAI         TRE         UTS         YUN 
##  0.00000000  0.16666667  0.00000000  0.00000000 
## 
## $pop.score$`5`
##         BEN         BER         BRE         CAM         CES         CHA 
##  0.78333333  0.98333333  0.63846154  0.58333333  0.94285714  0.45833333 
##         DES         GEL         HAI         HAN         HOC         HUN 
##  0.61250000 -0.05000000  0.54166667  0.97500000  0.24285714  0.98333333 
##         IMP         INJ         INW         ITB         ITP         ITR 
##  0.20000000  0.98181818  0.90000000  0.10000000  0.87777778  1.00000000 
##         JAF         KAC         KAG         KAN         KAT         KLP 
## -0.15000000 -0.05000000  0.91666667  0.60909091  0.00000000 -0.05000000 
##         KUN         LAM         MAT         OKI         PAL         QNC 
##  0.87500000 -0.06666667  0.76666667  0.66666667  0.95454545  0.95454545 
##         ROM         SIC         SON         SSK         SUF         SUU 
## -0.07500000  0.43333333 -0.10000000  0.61666667  0.30000000  0.98333333 
##         TAI         TRE         UTS         YUN 
##  0.78571429  0.69166667  0.84166667 -0.02222222 
## 
## $pop.score$`10`
##         BEN         BER         BRE         CAM         CES         CHA 
##  0.91666667  0.97500000  0.82307692  0.64166667  0.82857143  0.74166667 
##         DES         GEL         HAI         HAN         HOC         HUN 
##  0.81250000 -0.30000000  0.65833333  0.82500000  0.60000000  0.91666667 
##         IMP         INJ         INW         ITB         ITP         ITR 
##  0.55000000  0.91818182  0.87500000  0.52000000  0.93333333  0.64166667 
##         JAF         KAC         KAG         KAN         KAT         KLP 
## -0.30000000 -0.08333333  0.92500000  0.63636364 -0.06666667  0.17500000 
##         KUN         LAM         MAT         OKI         PAL         QNC 
##  0.82500000 -0.02222222  0.77500000  0.94166667  0.88181818  0.89090909 
##         ROM         SIC         SON         SSK         SUF         SUU 
##  0.42500000  0.37777778 -0.20000000  0.53333333  0.23333333  0.95000000 
##         TAI         TRE         UTS         YUN 
##  0.94285714  0.74166667  0.84166667 -0.03333333 
## 
## $pop.score$`15`
##         BEN         BER         BRE         CAM         CES         CHA 
##  0.91666667  0.87500000  0.76923077  0.60000000  0.83571429  0.65833333 
##         DES         GEL         HAI         HAN         HOC         HUN 
##  0.79375000  0.75000000  0.70833333  0.80000000  0.78571429  0.90000000 
##         IMP         INJ         INW         ITB         ITP         ITR 
##  0.30000000  0.92727273  0.82500000  0.70000000  0.90000000  0.64166667 
##         JAF         KAC         KAG         KAN         KAT         KLP 
## -0.30000000 -0.08333333  0.90000000  0.82727273  0.08333333  0.82500000 
##         KUN         LAM         MAT         OKI         PAL         QNC 
##  0.75000000  0.47777778  0.75000000  0.92500000  0.85454545  0.95454545 
##         ROM         SIC         SON         SSK         SUF         SUU 
##  0.40000000  0.37777778 -0.23333333  0.45000000  0.48333333  0.93333333 
##         TAI         TRE         UTS         YUN 
##  0.85714286  0.58333333  0.92500000  0.18888889 
## 
## $pop.score$`20`
##         BEN         BER         BRE         CAM         CES         CHA 
##  0.90833333  0.82500000  0.79230769  0.63333333  0.86428571  0.54166667 
##         DES         GEL         HAI         HAN         HOC         HUN 
##  0.84375000  0.50000000  0.45833333  0.80000000  0.88571429  0.77500000 
##         IMP         INJ         INW         ITB         ITP         ITR 
##  0.25000000  0.88181818  0.75000000  0.58000000  0.90000000  0.75833333 
##         JAF         KAC         KAG         KAN         KAT         KLP 
## -0.30000000  0.18333333  0.88333333  0.84545455  0.50000000  0.75000000 
##         KUN         LAM         MAT         OKI         PAL         QNC 
##  0.77500000  0.57777778  0.74166667  0.89166667  0.90909091  0.82727273 
##         ROM         SIC         SON         SSK         SUF         SUU 
##  0.37500000  0.43333333 -0.06666667  0.27500000  0.61666667  0.81666667 
##         TAI         TRE         UTS         YUN 
##  0.85714286  0.74166667  0.93333333  0.11111111 
## 
## $pop.score$`25`
##        BEN        BER        BRE        CAM        CES        CHA        DES 
##  0.8500000  0.8583333  0.8076923  0.5916667  0.8071429  0.4083333  0.8000000 
##        GEL        HAI        HAN        HOC        HUN        IMP        INJ 
##  0.5500000  0.3833333  0.6750000  0.8000000  0.8666667  0.1750000  0.8363636 
##        INW        ITB        ITP        ITR        JAF        KAC        KAG 
##  0.7500000  0.6200000  0.8222222  0.8083333 -0.4500000  0.1833333  0.8083333 
##        KAN        KAT        KLP        KUN        LAM        MAT        OKI 
##  0.8454545  0.4666667  0.6250000  0.6250000  0.8000000  0.6333333  0.8666667 
##        PAL        QNC        ROM        SIC        SON        SSK        SUF 
##  0.7454545  0.8909091  0.7000000  0.4222222  0.1333333  0.4583333  0.6166667 
##        SUU        TAI        TRE        UTS        YUN 
##  0.6500000  0.8428571  0.7083333  0.8750000  0.1444444 
## 
## $pop.score$`30`
##        BEN        BER        BRE        CAM        CES        CHA        DES 
##  0.8500000  0.8666667  0.7615385  0.5833333  0.8857143  0.2750000  0.8250000 
##        GEL        HAI        HAN        HOC        HUN        IMP        INJ 
##  0.4500000  0.6666667  0.6250000  0.7142857  0.7250000  0.0750000  0.8636364 
##        INW        ITB        ITP        ITR        JAF        KAC        KAG 
##  0.7000000  0.4600000  0.8444444  0.7166667 -0.0500000  0.5166667  0.8583333 
##        KAN        KAT        KLP        KUN        LAM        MAT        OKI 
##  0.8181818  0.4166667  0.6250000  0.5750000  0.7000000  0.7166667  0.7833333 
##        PAL        QNC        ROM        SIC        SON        SSK        SUF 
##  0.8454545  0.7454545  0.6250000  0.6222222  0.2666667  0.4500000  0.5333333 
##        SUU        TAI        TRE        UTS        YUN 
##  0.7333333  0.6571429  0.7500000  0.8416667  0.2333333 
## 
## $pop.score$`35`
##        BEN        BER        BRE        CAM        CES        CHA        DES 
## 0.77500000 0.80000000 0.72307692 0.55833333 0.74285714 0.63333333 0.79375000 
##        GEL        HAI        HAN        HOC        HUN        IMP        INJ 
## 0.45000000 0.69166667 0.52500000 0.74285714 0.80833333 0.45000000 0.70000000 
##        INW        ITB        ITP        ITR        JAF        KAC        KAG 
## 0.67500000 0.40000000 0.77777778 0.65833333 0.35000000 0.20000000 0.76666667 
##        KAN        KAT        KLP        KUN        LAM        MAT        OKI 
## 0.76363636 0.30000000 0.55000000 0.60000000 0.72222222 0.59166667 0.82500000 
##        PAL        QNC        ROM        SIC        SON        SSK        SUF 
## 0.80000000 0.74545455 0.57500000 0.62222222 0.03333333 0.35000000 0.51666667 
##        SUU        TAI        TRE        UTS        YUN 
## 0.63333333 0.65714286 0.72500000 0.84166667 0.33333333 
## 
## $pop.score$`40`
##       BEN       BER       BRE       CAM       CES       CHA       DES       GEL 
## 0.6083333 0.8333333 0.7538462 0.5416667 0.7785714 0.6583333 0.8125000 0.3000000 
##       HAI       HAN       HOC       HUN       IMP       INJ       INW       ITB 
## 0.7500000 0.5250000 0.7285714 0.7750000 0.3000000 0.6818182 0.6500000 0.6600000 
##       ITP       ITR       JAF       KAC       KAG       KAN       KAT       KLP 
## 0.7222222 0.5750000 0.3500000 0.2666667 0.7500000 0.8545455 0.2333333 0.5750000 
##       KUN       LAM       MAT       OKI       PAL       QNC       ROM       SIC 
## 0.5250000 0.7000000 0.5583333 0.8083333 0.7545455 0.7909091 0.2000000 0.6333333 
##       SON       SSK       SUF       SUU       TAI       TRE       UTS       YUN 
## 0.4333333 0.3500000 0.4833333 0.5666667 0.6428571 0.7833333 0.7666667 0.4555556 
## 
## $pop.score$`45`
##       BEN       BER       BRE       CAM       CES       CHA       DES       GEL 
## 0.6333333 0.8166667 0.7692308 0.5416667 0.7714286 0.6416667 0.7312500 0.3000000 
##       HAI       HAN       HOC       HUN       IMP       INJ       INW       ITB 
## 0.6500000 0.5250000 0.7000000 0.7333333 0.2000000 0.7818182 0.6250000 0.5800000 
##       ITP       ITR       JAF       KAC       KAG       KAN       KAT       KLP 
## 0.6888889 0.5833333 0.4000000 0.2833333 0.7250000 0.7454545 0.2833333 0.5000000 
##       KUN       LAM       MAT       OKI       PAL       QNC       ROM       SIC 
## 0.5250000 0.7000000 0.4583333 0.7916667 0.7727273 0.7636364 0.4000000 0.5777778 
##       SON       SSK       SUF       SUU       TAI       TRE       UTS       YUN 
## 0.4000000 0.3000000 0.4666667 0.5833333 0.6142857 0.7833333 0.8166667 0.3777778 
## 
## $pop.score$`50`
##       BEN       BER       BRE       CAM       CES       CHA       DES       GEL 
## 0.5833333 0.7750000 0.7846154 0.4333333 0.8000000 0.7333333 0.6937500 0.3000000 
##       HAI       HAN       HOC       HUN       IMP       INJ       INW       ITB 
## 0.6500000 0.5000000 0.6714286 0.7750000 0.1250000 0.7000000 0.3000000 0.5600000 
##       ITP       ITR       JAF       KAC       KAG       KAN       KAT       KLP 
## 0.5555556 0.5583333 0.3000000 0.2166667 0.7166667 0.7818182 0.1166667 0.3750000 
##       KUN       LAM       MAT       OKI       PAL       QNC       ROM       SIC 
## 0.5000000 0.7222222 0.4916667 0.7500000 0.6181818 0.7727273 0.4000000 0.5777778 
##       SON       SSK       SUF       SUU       TAI       TRE       UTS       YUN 
## 0.4666667 0.1083333 0.3000000 0.6333333 0.6285714 0.6916667 0.7250000 0.4222222 
## 
## $pop.score$`55`
##       BEN       BER       BRE       CAM       CES       CHA       DES       GEL 
## 0.6000000 0.7583333 0.6769231 0.4083333 0.7428571 0.7083333 0.7375000 0.3500000 
##       HAI       HAN       HOC       HUN       IMP       INJ       INW       ITB 
## 0.6500000 0.3250000 0.6142857 0.6666667 0.1750000 0.7363636 0.3500000 0.4200000 
##       ITP       ITR       JAF       KAC       KAG       KAN       KAT       KLP 
## 0.6222222 0.5083333 0.2500000 0.3166667 0.7500000 0.6000000 0.1666667 0.3750000 
##       KUN       LAM       MAT       OKI       PAL       QNC       ROM       SIC 
## 0.3750000 0.6444444 0.4500000 0.7250000 0.6272727 0.6363636 0.4750000 0.4777778 
##       SON       SSK       SUF       SUU       TAI       TRE       UTS       YUN 
## 0.5000000 0.1000000 0.3833333 0.5166667 0.7142857 0.6916667 0.7333333 0.4555556 
## 
## $pop.score$`60`
##           BEN           BER           BRE           CAM           CES 
##  5.083333e-01  7.083333e-01  7.230769e-01  3.500000e-01  6.571429e-01 
##           CHA           DES           GEL           HAI           HAN 
##  6.583333e-01  6.312500e-01  2.500000e-01  6.083333e-01  4.000000e-01 
##           HOC           HUN           IMP           INJ           INW 
##  5.571429e-01  7.250000e-01  7.500000e-02  6.909091e-01  4.750000e-01 
##           ITB           ITP           ITR           JAF           KAC 
##  5.200000e-01  6.111111e-01  4.583333e-01  2.000000e-01  4.000000e-01 
##           KAG           KAN           KAT           KLP           KUN 
##  7.500000e-01  5.818182e-01  1.666667e-01  4.250000e-01  3.750000e-01 
##           LAM           MAT           OKI           PAL           QNC 
##  6.444444e-01  4.583333e-01  6.750000e-01  6.181818e-01  6.363636e-01 
##           ROM           SIC           SON           SSK           SUF 
##  4.500000e-01  5.333333e-01  3.666667e-01 -1.665091e-17  4.333333e-01 
##           SUU           TAI           TRE           UTS           YUN 
##  5.333333e-01  5.000000e-01  6.750000e-01  5.750000e-01  4.111111e-01 
## 
## 
## $mean
##         1         5        10        15        20        25        30        35 
## 0.3051345 0.5409070 0.5817042 0.6404075 0.6406431 0.6250357 0.6287852 0.6101916 
##        40        45        50        55        60 
## 0.6033986 0.5885236 0.5453468 0.5253546 0.5003971 
## 
## $pred
## $pred$x
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51 52 53 54 55 56 57 58 59 60
## 
## $pred$y
##  [1] 0.3337460 0.3781713 0.4210422 0.4607936 0.4958603 0.5250879 0.5489646
##  [8] 0.5683894 0.5842613 0.5974795 0.6087986 0.6183976 0.6263111 0.6325738
## [15] 0.6372203 0.6403146 0.6420375 0.6425993 0.6422102 0.6410803 0.6394157
## [22] 0.6374067 0.6352392 0.6330995 0.6311736 0.6295918 0.6282608 0.6270314
## [29] 0.6257544 0.6242807 0.6225021 0.6204748 0.6182960 0.6160631 0.6138732
## [36] 0.6117901 0.6097433 0.6076287 0.6053423 0.6027801 0.5998435 0.5964568
## [43] 0.5925494 0.5880511 0.5828914 0.5770513 0.5707169 0.5641257 0.5575151
## [50] 0.5511224 0.5451325 0.5395197 0.5342056 0.5291121 0.5241607 0.5192840
## [57] 0.5144585 0.5096713 0.5049098 0.5001612
## 
## 
## $best
## [1] 18
#Optimal number = 18

Run DAPC with object

dapc_snp <- dapc(snp3, n.pca = 18, n.da = 18, grp = populations)

Save it

saveRDS(
  dapc_snp, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1",  "italy_US", "dapc_snp.rds"
  )
)

To load it

dapc_snp <- readRDS(
  here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "italy_US", "dapc_snp.rds"
  )
)

Check R symbols for plot

#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,35:38,43,60,62:64)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
  scale_shape_manual(values=good.shapes[1:N]) +
  geom_point()
## Warning: Removed 679 rows containing missing values or values outside the scale range
## (`geom_point()`).

#1:25,28:31,36,55:57
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b",  "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")

Plot using different discriminant functions PCs 1 & 2

# 1 and 2
scatter(
  dapc_snp,
  bg = "white",
  scree.da = TRUE,
  cex = 1,
  pch = 20,
  cex.lab = 0.1,
  col = myCol,
  xax = 1, 
  yax = 2  
)

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/italy_US/dapc_euro_global_r1_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25, 53:84)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)

dev.off()
good.shapes = c(1:25, 53:84)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter

4.2 Plot grouped by country (instead of pop)

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_native_temporal.rds"))
head(sampling_loc)
##    Pop_City  Location Latitude Longitude Continent Abbreviation Year
## 1   Brescia   Brescia 45.53373 10.204450    Europe          BRE 1995
## 2    Cesena    Cesena 44.15287 12.244265    Europe          CES 1995
## 3 Desenzano Desenzano 45.46289 10.549140    Europe          DES 1995
## 4   Bologna   Bologna 44.48478 11.366584    Europe          ITB 2017
## 5   Imperia   Imperia 43.87159  8.003559    Europe          IMP 2017
## 6    Puglia    Puglia 41.12213 16.844107    Europe          ITP 2016
##           Region   Subregion order order2 orderold
## 1   Italy (1995) West Europe    20     12       12
## 2   Italy (1995) West Europe    24     16       16
## 3   Italy (1995) West Europe    21     13       13
## 4 Italy (modern) West Europe    23     15       15
## 5 Italy (modern) West Europe    18     10       10
## 6 Italy (modern) West Europe    28     20       20
strata(snp2) <- data.frame(other(snp2))

# Currently set on just 
head(pop(snp2)) 
## [1] OKI OKI OKI OKI OKI OKI
## 40 Levels: BEN BER BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ... YUN

Load the csv

countr <- read.csv(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc",  "MAF_1", "italy_US", "DAPC_countries_italy_US.csv"
))
df <- as.data.frame(countr)

head(df)
##   pop country
## 1 OKI   Japan
## 2 OKI   Japan
## 3 OKI   Japan
## 4 OKI   Japan
## 5 OKI   Japan
## 6 OKI   Japan
snp2@pop <- as.factor(df$country)
snp2$pop
##   [1] Japan           Japan           Japan           Japan          
##   [5] Japan           Japan           Japan           Japan          
##   [9] Japan           Japan           Japan           Japan          
##  [13] China           China           China           China          
##  [17] China           China           China           China          
##  [21] China           China           China           China          
##  [25] China           China           China           China          
##  [29] China           China           China           China          
##  [33] China           Nepal           Nepal           Nepal          
##  [37] Nepal           Taiwan          Taiwan          Taiwan         
##  [41] Taiwan          Taiwan          Taiwan          Taiwan         
##  [45] China           China           China           China          
##  [49] China           China           China           China          
##  [53] China           China           China           China          
##  [57] Vietnam         Vietnam         Vietnam         Vietnam        
##  [61] Trentino        Trentino        Trentino        Trentino       
##  [65] Trentino        Trentino        Trentino        Trentino       
##  [69] Trentino        Trentino        Trentino        Trentino       
##  [73] Malaysia        Malaysia        Malaysia        Malaysia       
##  [77] Sicilia         Sicilia         Sicilia         Sicilia        
##  [81] Sicilia         Sicilia         Sicilia         Sicilia        
##  [85] Sicilia         Vietnam         Vietnam         Vietnam        
##  [89] Vietnam         Vietnam         Vietnam         Vietnam        
##  [93] Vietnam         Vietnam         Vietnam         Vietnam        
##  [97] Vietnam         Vietnam         Vietnam         Vietnam        
## [101] Vietnam         Vietnam         Vietnam         Thailand       
## [105] Thailand        Thailand        Thailand        Thailand       
## [109] Thailand        Thailand        Thailand        Thailand       
## [113] Thailand        Thailand        Thailand        Thailand       
## [117] Thailand        Thailand        Thailand        Thailand       
## [121] Thailand        Thailand        Thailand        Thailand       
## [125] Thailand        Brescia         Brescia         Brescia        
## [129] Desenzano       Desenzano       Cesena          Cesena         
## [133] Japan           Japan           Japan           Japan          
## [137] Japan           Japan           Japan           Japan          
## [141] Japan           Japan           Japan           Japan          
## [145] Japan           Japan           Japan           Japan          
## [149] Japan           Japan           Japan           Japan          
## [153] Japan           Japan           Japan           Brescia        
## [157] Brescia         Brescia         Brescia         Brescia        
## [161] Brescia         Brescia         Brescia         Brescia        
## [165] Brescia         Desenzano       Desenzano       Desenzano      
## [169] Desenzano       Desenzano       Desenzano       Desenzano      
## [173] Cesena          Cesena          Cesena          Cesena         
## [177] Cesena          Cesena          Cesena          Imperia        
## [181] Imperia         Imperia         Imperia         Rome (Sapienza)
## [185] Rome (Sapienza) Rome (Sapienza) Rome (Sapienza) Cambodia       
## [189] Cambodia        Cambodia        Cambodia        Cambodia       
## [193] Cambodia        Japan           Cambodia        Cambodia       
## [197] Cambodia        Cambodia        Cambodia        Cambodia       
## [201] Cesena          Cesena          Cesena          Japan          
## [205] Cesena          Cesena          Desenzano       Desenzano      
## [209] Desenzano       Japan           Japan           Desenzano      
## [213] Desenzano       Desenzano       Desenzano       Japan          
## [217] Japan           Japan           Japan           Japan          
## [221] Japan           Japan           Japan           India          
## [225] India           India           India           India          
## [229] India           India           India           India          
## [233] India           India           India           Thailand       
## [237] Thailand        Thailand        Thailand        Thailand       
## [241] Thailand        Thailand        Thailand        Thailand       
## [245] USA             USA             USA             USA            
## [249] USA             USA             USA             USA            
## [253] USA             USA             USA             USA            
## [257] USA             USA             USA             USA            
## [261] USA             USA             USA             USA            
## [265] USA             USA             USA             Bhutan         
## [269] Bhutan          Nepal           Nepal           Sri Lanka      
## [273] Sri Lanka       Thailand        Thailand        Thailand       
## [277] Thailand        Thailand        Thailand        Thailand       
## [281] Thailand        Thailand        Thailand        Thailand       
## [285] Indonesia       Indonesia       Indonesia       Indonesia      
## [289] Indonesia       Indonesia       Indonesia       Indonesia      
## [293] Indonesia       Indonesia       Indonesia       Indonesia      
## [297] Maldives        Maldives        Maldives        Maldives       
## [301] Bologna         Bologna         Bologna         Bologna        
## [305] Bologna         Puglia          Puglia          Puglia         
## [309] Puglia          Puglia          Puglia          Puglia         
## [313] Puglia          Puglia          Rome (Trappola) Rome (Trappola)
## [317] Rome (Trappola) Rome (Trappola) Rome (Trappola) Rome (Trappola)
## [321] Rome (Trappola) Rome (Trappola) Rome (Trappola) Rome (Trappola)
## [325] Rome (Trappola) Rome (Trappola) Indonesia       Indonesia      
## [329] Indonesia       Indonesia       Indonesia       Indonesia      
## [333] Indonesia       Indonesia       Indonesia       Indonesia      
## [337] Indonesia       Indonesia       Indonesia       Indonesia      
## [341] Indonesia       Malaysia        Malaysia        Malaysia       
## [345] Malaysia        Malaysia        Malaysia        Malaysia       
## [349] Malaysia        Malaysia        Malaysia        Malaysia       
## [353] Malaysia       
## 24 Levels: Bhutan Bologna Brescia Cambodia Cesena China Desenzano ... Vietnam

Save the genind object

saveRDS(snp2, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc",  "MAF_1",  "italy_US", "snp_country.rds"
))

Load the genind object

snp_country <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc",  "MAF_1",  "italy_US", "snp_country.rds"
))

Scale

snp_country <- scaleGen(snp_country, NA.method="mean")
class(snp_country)
dim(snp_country)
## NULL
snp_country[1:5,1:5]
## /// GENIND OBJECT /////////
## 
##  // 5 individuals; 3 loci; 5 alleles; size: 11.5 Kb
## 
##  // Basic content
##    @tab:  5 x 5 matrix of allele counts
##    @loc.n.all: number of alleles per locus (range: 1-2)
##    @loc.fac: locus factor for the 5 columns of @tab
##    @all.names: list of allele names for each locus
##    @ploidy: ploidy of each individual  (range: 2-2)
##    @type:  codom
##    @call: .local(x = x, i = i, j = j, drop = drop)
## 
##  // Optional content
##    @pop: population of each individual (group size range: 5-5)
##    @strata: a data frame with 4 columns ( sex, phenotype, pat, mat )
##    @other: a list containing: sex  phenotype  pat  mat
# Get the populations from the genlight object
populations <- snp2$pop

4.2.1 Find clusters

grp <- find.clusters(snp_country, max.n.clust=10)
#retained 350
#Choose the number of clusters (>=2): 4

Save the genind object

saveRDS(grp, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "grp_country1.rds"
))

Load the genind object

grp <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "grp_country1.rds"
))
dapc_country_1 <- dapc(snp_country, grp$grp)
#300 PCs retained
#5 discriminant functions retained

Save the genind object

saveRDS(dapc_country_1, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "dapc_country_1.rds"
))

Load the genind object

dapc_country_1 <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "dapc_country_1.rds"
))

Calculating the optimum PC number to rerun DAPC

optim.a.score(dapc_country_1, n.pca=1:ncol(dapc_country_1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=10) #calculating optimal number of PCs 

## $pop.score
## $pop.score$`1`
##         1         2         3         4 
## 0.0000000 0.8078431 0.3684211 0.5814815 
## 
## $pop.score$`50`
##         1         2         3         4 
## 0.7034483 0.5274510 0.4657895 0.4703704 
## 
## $pop.score$`100`
##         1         2         3         4 
## 0.4551724 0.3862745 0.3938596 0.3962963 
## 
## $pop.score$`150`
##         1         2         3         4 
## 0.2310345 0.2774510 0.2640351 0.2361111 
## 
## $pop.score$`200`
##         1         2         3         4 
## 0.1758621 0.1833333 0.1675439 0.1907407 
## 
## $pop.score$`250`
##         1         2         3         4 
## 0.1379310 0.1294118 0.1254386 0.1342593 
## 
## $pop.score$`300`
##          1          2          3          4 
## 0.04482759 0.06274510 0.06491228 0.04814815 
## 
## 
## $mean
##          1         50        100        150        200        250        300 
## 0.43943642 0.54176478 0.40790072 0.25215792 0.17937000 0.13176016 0.05515828 
## 
## $pred
## $pred$x
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300
## 
## $pred$y
##   [1] 0.43943642 0.44272601 0.44601287 0.44929395 0.45256626 0.45582676
##   [7] 0.45907244 0.46230027 0.46550724 0.46869033 0.47184652 0.47497279
##  [13] 0.47806612 0.48112348 0.48414187 0.48711825 0.49004961 0.49293294
##  [19] 0.49576520 0.49854339 0.50126448 0.50392545 0.50652328 0.50905496
##  [25] 0.51151746 0.51390776 0.51622284 0.51845969 0.52061528 0.52268660
##  [31] 0.52467062 0.52656433 0.52836470 0.53006872 0.53167336 0.53317561
##  [37] 0.53457245 0.53586085 0.53703780 0.53810028 0.53904527 0.53986975
##  [43] 0.54057069 0.54114508 0.54158991 0.54190214 0.54207876 0.54211676
##  [49] 0.54201310 0.54176478 0.54136980 0.54083034 0.54014961 0.53933082
##  [55] 0.53837717 0.53729187 0.53607812 0.53473915 0.53327815 0.53169833
##  [61] 0.53000289 0.52819506 0.52627803 0.52425502 0.52212923 0.51990386
##  [67] 0.51758214 0.51516725 0.51266242 0.51007085 0.50739575 0.50464033
##  [73] 0.50180779 0.49890134 0.49592419 0.49287954 0.48977062 0.48660061
##  [79] 0.48337274 0.48009020 0.47675621 0.47337398 0.46994671 0.46647761
##  [85] 0.46296988 0.45942675 0.45585140 0.45224706 0.44861693 0.44496421
##  [91] 0.44129212 0.43760386 0.43390264 0.43019167 0.42647416 0.42275331
##  [97] 0.41903234 0.41531444 0.41160283 0.40790072 0.40421088 0.40053438
## [103] 0.39687184 0.39322391 0.38959122 0.38597440 0.38237408 0.37879089
## [109] 0.37522548 0.37167847 0.36815049 0.36464218 0.36115418 0.35768711
## [115] 0.35424161 0.35081831 0.34741785 0.34404086 0.34068797 0.33735981
## [121] 0.33405702 0.33078023 0.32753008 0.32430720 0.32111222 0.31794577
## [127] 0.31480849 0.31170101 0.30862397 0.30557800 0.30256372 0.29958178
## [133] 0.29663281 0.29371744 0.29083630 0.28799003 0.28517926 0.28240462
## [139] 0.27966675 0.27696628 0.27430384 0.27168008 0.26909561 0.26655107
## [145] 0.26404710 0.26158433 0.25916339 0.25678492 0.25444955 0.25215792
## [151] 0.24991042 0.24770658 0.24554569 0.24342704 0.24134992 0.23931362
## [157] 0.23731744 0.23536066 0.23344257 0.23156247 0.22971965 0.22791340
## [163] 0.22614300 0.22440776 0.22270696 0.22103990 0.21940586 0.21780413
## [169] 0.21623401 0.21469479 0.21318576 0.21170621 0.21025543 0.20883272
## [175] 0.20743736 0.20606864 0.20472586 0.20340831 0.20211527 0.20084605
## [181] 0.19959992 0.19837619 0.19717414 0.19599307 0.19483226 0.19369101
## [187] 0.19256861 0.19146434 0.19037751 0.18930739 0.18825329 0.18721450
## [193] 0.18619030 0.18517998 0.18418284 0.18319817 0.18222526 0.18126340
## [199] 0.18031189 0.17937000 0.17843706 0.17751248 0.17659568 0.17568608
## [205] 0.17478313 0.17388625 0.17299486 0.17210840 0.17122629 0.17034797
## [211] 0.16947286 0.16860039 0.16772999 0.16686109 0.16599311 0.16512550
## [217] 0.16425766 0.16338904 0.16251907 0.16164716 0.16077276 0.15989529
## [223] 0.15901417 0.15812884 0.15723872 0.15634325 0.15544186 0.15453396
## [229] 0.15361900 0.15269639 0.15176557 0.15082598 0.14987702 0.14891814
## [235] 0.14794877 0.14696833 0.14597625 0.14497195 0.14395488 0.14292446
## [241] 0.14188011 0.14082126 0.13974736 0.13865781 0.13755205 0.13642952
## [247] 0.13528964 0.13413183 0.13295553 0.13176016 0.13054532 0.12931124
## [253] 0.12805831 0.12678693 0.12549748 0.12419037 0.12286598 0.12152471
## [259] 0.12016695 0.11879309 0.11740354 0.11599868 0.11457890 0.11314460
## [265] 0.11169618 0.11023402 0.10875852 0.10727008 0.10576908 0.10425592
## [271] 0.10273099 0.10119470 0.09964742 0.09808955 0.09652150 0.09494364
## [277] 0.09335638 0.09176010 0.09015521 0.08854209 0.08692114 0.08529275
## [283] 0.08365732 0.08201523 0.08036689 0.07871268 0.07705300 0.07538824
## [289] 0.07371880 0.07204506 0.07036743 0.06868630 0.06700205 0.06531509
## [295] 0.06362580 0.06193458 0.06024183 0.05854793 0.05685328 0.05515828
## 
## 
## $best
## [1] 48
#Optimal number = 47

Run DAPC with object

dapc_snp_country <- dapc(snp_country, n.pca = 47, n.da = 5, grp = populations)

Save it

saveRDS(
  dapc_snp_country, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "dapc_snp_country.rds"
  )
)

To load it

dapc_snp_country <- readRDS(
  here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "dapc_snp_country.rds"
  )
)

Plot with new colors - by region

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/italy_US/dapc_snp_country2_euro_global_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,43,60,62:64)

myCol2 <- c("#146c45", "#a113b2", "magenta", "#2524f9", "magenta", "#c41A1C", "magenta", "#a113b2",  "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#a113b2", "#a113b2", "#a113b2", "#a113b2",  "#146c45", "#c41A1C", "#2524f9", "#a113b2", "#66C2A5",  "#2524f9")
  

op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
good.shapes = c(1:25,35:38,43,60,62:64)

myCol2 <- c("#146c45", "#a113b2", "magenta", "#2524f9", "magenta", "#c41A1C", "magenta", "#a113b2",  "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#a113b2", "#a113b2", "#a113b2", "#a113b2",  "#146c45", "#c41A1C", "#2524f9", "#a113b2", "#66C2A5",  "#2524f9")
  

op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

Plot with new colors - by region

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/italy_US/dapc_snp_country2_euro_global_PC1_3.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,43,60,62:64)

myCol2 <- c("#146c45", "#a113b2", "magenta", "#2524f9", "magenta", "#c41A1C", "magenta", "#a113b2",  "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#a113b2", "#a113b2", "#a113b2", "#a113b2",  "#146c45", "#c41A1C", "#2524f9", "#a113b2", "#66C2A5",  "#2524f9")
  

op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

dev.off()
good.shapes = c(1:25,35:38,43,60,62:64)

myCol2 <- c("#146c45", "#a113b2", "magenta", "#2524f9", "magenta", "#c41A1C", "magenta", "#a113b2",  "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#a113b2", "#a113b2", "#a113b2", "#a113b2",  "#146c45", "#c41A1C", "#2524f9", "#a113b2", "#66C2A5",  "#2524f9")
  

op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

5. DAPC for native_albania_croatia_greece_US

Set 3 Using r2<0.01 LD pruning and MAF 1% (scaled and plotting additional PCs)

5.1 Create files

cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--keep-fam output/neuroadmixture/native_albania_croatia_greece_US.txt \
--bfile output/snps_sets/r2_0.01_b \
--make-bed \
--out output/dapc/MAF_1/dapc_albania_croatia_greece_US \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_albania_croatia_greece_US.log

22642 variants loaded from .bim file. –keep-fam: 314 people remaining. Total genotyping rate in remaining samples is 0.968879. 22642 variants and 314 people pass filters and QC.

Convert to raw format

plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/MAF_1/dapc_albania_croatia_greece_US \
--recodeA \
--out output/dapc/MAF_1/dapc_albania_croatia_greece_US \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_albania_croatia_greece_US.log

22642 variants loaded from .bim file. 22642 variants and 314 people pass filters and QC.

Import the data and covert it to genind format

# import the data
snp <-
  read.PLINK(
    here("euro_global/output/dapc/MAF_1/dapc_albania_croatia_greece_US.raw"),
    quiet = FALSE,
    chunkSize = 1000,
    parallel = require("parallel"),
    n.cores = 4
  )
## 
##  Reading PLINK raw format into a genlight object... 
## 
## 
##  Reading loci information... 
## 
##  Reading and converting genotypes... 
## .
##  Building final object... 
## 
## ...done.
nInd(snp)
## [1] 314
nLoc(snp)
## [1] 22642
nPop(snp)
## [1] 36
indNames(snp)
##   [1] "1001" "1002" "1003" "1004" "1005" "1006" "1007" "1008" "1009" "1010"
##  [11] "1011" "1012" "1053" "1054" "1055" "1056" "1057" "1058" "1059" "1060"
##  [21] "1061" "1062" "1063" "1064" "1089" "1090" "1091" "1092" "1093" "1094"
##  [31] "1095" "1101" "1102" "1103" "1105" "1106" "1107" "1161" "1162" "1163"
##  [41] "1165" "1166" "1167" "1168" "1169" "1170" "1171" "1172" "1173" "1174"
##  [51] "1175" "1176" "1177" "1178" "1179" "1180" "1181" "1182" "1183" "1184"
##  [61] "1214" "1215" "1216" "1217" "1238" "1239" "1240" "1241" "1242" "1243"
##  [71] "1244" "1245" "1246" "1247" "1249" "1250" "1251" "1252" "1253" "1254"
##  [81] "1255" "1256" "1257" "1258" "1259" "1260" "1261" "1262" "1263" "1264"
##  [91] "1265" "1266" "1267" "1268" "1270" "1271" "1272" "1273" "1274" "1276"
## [101] "1282" "1283" "1285" "1286" "1325" "1326" "1328" "1329" "1330" "1331"
## [111] "1332" "1333" "1334" "1335" "1336" "1373" "1374" "1375" "1376" "1377"
## [121] "1378" "1379" "1380" "1381" "1382" "1383" "1384" "193"  "194"  "195" 
## [131] "196"  "2174" "2175" "2176" "2177" "2178" "2179" "217"  "2180" "2181"
## [141] "2182" "2183" "2184" "2185" "218"  "219"  "2202" "220"  "221"  "222" 
## [151] "223"  "224"  "225"  "226"  "227"  "230"  "255"  "256"  "257"  "258" 
## [161] "261"  "262"  "263"  "264"  "265"  "266"  "267"  "268"  "269"  "270" 
## [171] "271"  "272"  "273"  "275"  "276"  "277"  "278"  "294"  "295"  "296" 
## [181] "297"  "298"  "299"  "301"  "302"  "303"  "304"  "305"  "435"  "436" 
## [191] "437"  "438"  "439"  "440"  "441"  "442"  "443"  "444"  "445"  "446" 
## [201] "602"  "603"  "604"  "607"  "609"  "610"  "623"  "624"  "625"  "626" 
## [211] "627"  "628"  "629"  "630"  "631"  "632"  "633"  "666"  "669"  "670" 
## [221] "671"  "672"  "673"  "674"  "675"  "676"  "677"  "678"  "679"  "680" 
## [231] "681"  "682"  "683"  "711"  "712"  "713"  "714"  "715"  "716"  "717" 
## [241] "718"  "719"  "720"  "721"  "722"  "723"  "724"  "725"  "726"  "727" 
## [251] "728"  "729"  "730"  "731"  "732"  "733"  "735"  "736"  "737"  "741" 
## [261] "742"  "743"  "744"  "745"  "746"  "801"  "802"  "803"  "804"  "805" 
## [271] "806"  "807"  "808"  "809"  "810"  "916"  "917"  "918"  "919"  "920" 
## [281] "921"  "922"  "923"  "924"  "925"  "926"  "927"  "964"  "965"  "966" 
## [291] "967"  "972"  "973"  "975"  "976"  "977"  "978"  "979"  "980"  "981" 
## [301] "982"  "983"  "984"  "985"  "986"  "987"  "988"  "989"  "990"  "991" 
## [311] "992"  "993"  "994"  "995"
# convert to genind
snp2 <- gl2gi(snp, probar = TRUE, verbose = NULL)
## Starting gl2gi 
##   Processing genlight object with SNP data
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=====================================                                 |  54%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |===================================================                   |  74%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi

5.2 Find clusters

grp <- find.clusters(snp3, max.n.clust=10) 
#retained 300
#Choose the number of clusters (>=2): 4

Save it

saveRDS(
  grp, here(
    "euro_global/output/dapc/MAF_1/grp_albania_croatia_greece_US.rds"
  )
)

To load it

grp <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/grp_albania_croatia_greece_US.rds"
  )
)

Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.

xvalDapc(snp3, populations, n.pca.max = 200, n.da = NULL,
              training.set = 0.9, result = c("groupMean", "overall"),
              center = TRUE, scale = FALSE,
              n.pca=NULL, n.rep = 40, xval.plot = TRUE) 

## $`Cross-Validation Results`
##     n.pca   success
## 1      20 0.7121733
## 2      20 0.7586347
## 3      20 0.6918624
## 4      20 0.7410415
## 5      20 0.7175208
## 6      20 0.7450930
## 7      20 0.7247377
## 8      20 0.7209678
## 9      20 0.7653311
## 10     20 0.7004817
## 11     20 0.6883289
## 12     20 0.7660484
## 13     20 0.7263472
## 14     20 0.6799508
## 15     20 0.7295107
## 16     20 0.6956206
## 17     20 0.7390754
## 18     20 0.7329402
## 19     20 0.7433662
## 20     20 0.7288815
## 21     20 0.7191869
## 22     20 0.7221908
## 23     20 0.7323017
## 24     20 0.6526194
## 25     20 0.7740501
## 26     20 0.6995899
## 27     20 0.7732676
## 28     20 0.7341519
## 29     20 0.7722779
## 30     20 0.6951946
## 31     20 0.7453346
## 32     20 0.7078346
## 33     20 0.6717171
## 34     20 0.7100867
## 35     20 0.6456569
## 36     20 0.7573269
## 37     20 0.7407391
## 38     20 0.7231782
## 39     20 0.7625194
## 40     20 0.7108108
## 41     40 0.7872334
## 42     40 0.8047725
## 43     40 0.7506963
## 44     40 0.7829471
## 45     40 0.7737122
## 46     40 0.7607249
## 47     40 0.7670157
## 48     40 0.7880247
## 49     40 0.7763592
## 50     40 0.7683322
## 51     40 0.7126060
## 52     40 0.7412828
## 53     40 0.7988809
## 54     40 0.7512279
## 55     40 0.7894947
## 56     40 0.7298717
## 57     40 0.7756443
## 58     40 0.7635027
## 59     40 0.7412984
## 60     40 0.7625959
## 61     40 0.7571691
## 62     40 0.7736241
## 63     40 0.7422269
## 64     40 0.7367106
## 65     40 0.6928590
## 66     40 0.7687290
## 67     40 0.7787173
## 68     40 0.7284152
## 69     40 0.7464795
## 70     40 0.7623122
## 71     40 0.7983358
## 72     40 0.7745735
## 73     40 0.7524213
## 74     40 0.7544142
## 75     40 0.7568142
## 76     40 0.7862862
## 77     40 0.7027281
## 78     40 0.7609155
## 79     40 0.7411608
## 80     40 0.7065247
## 81     60 0.7117034
## 82     60 0.8206816
## 83     60 0.7835784
## 84     60 0.6940216
## 85     60 0.7856909
## 86     60 0.7854925
## 87     60 0.7762663
## 88     60 0.7500422
## 89     60 0.7215764
## 90     60 0.7938463
## 91     60 0.7446195
## 92     60 0.8283497
## 93     60 0.8073296
## 94     60 0.7742530
## 95     60 0.6899884
## 96     60 0.8169860
## 97     60 0.8125671
## 98     60 0.7964840
## 99     60 0.7750554
## 100    60 0.7800412
## 101    60 0.7627904
## 102    60 0.8183007
## 103    60 0.7678630
## 104    60 0.7331934
## 105    60 0.7968416
## 106    60 0.7832020
## 107    60 0.7113562
## 108    60 0.7777544
## 109    60 0.7966737
## 110    60 0.7598681
## 111    60 0.7681694
## 112    60 0.7930614
## 113    60 0.7706816
## 114    60 0.7649849
## 115    60 0.7740342
## 116    60 0.7145804
## 117    60 0.8203975
## 118    60 0.7679243
## 119    60 0.7829739
## 120    60 0.7852241
## 121    80 0.7716737
## 122    80 0.7270440
## 123    80 0.7882207
## 124    80 0.7269758
## 125    80 0.7275444
## 126    80 0.7418149
## 127    80 0.7753239
## 128    80 0.7122958
## 129    80 0.7425335
## 130    80 0.7573267
## 131    80 0.7649860
## 132    80 0.7173098
## 133    80 0.8571078
## 134    80 0.7978350
## 135    80 0.7109185
## 136    80 0.7289714
## 137    80 0.7410685
## 138    80 0.7631507
## 139    80 0.7340155
## 140    80 0.7674213
## 141    80 0.7318715
## 142    80 0.6978466
## 143    80 0.7661969
## 144    80 0.8400444
## 145    80 0.7481437
## 146    80 0.7455644
## 147    80 0.7636220
## 148    80 0.8302638
## 149    80 0.7188299
## 150    80 0.7942927
## 151    80 0.7202213
## 152    80 0.7635958
## 153    80 0.7534097
## 154    80 0.7398664
## 155    80 0.7243697
## 156    80 0.7272100
## 157    80 0.8030229
## 158    80 0.7645396
## 159    80 0.7650083
## 160    80 0.7839140
## 161   100 0.7428077
## 162   100 0.7369927
## 163   100 0.7509891
## 164   100 0.6631589
## 165   100 0.7152859
## 166   100 0.7313603
## 167   100 0.7140929
## 168   100 0.7083683
## 169   100 0.7190892
## 170   100 0.7956437
## 171   100 0.7561975
## 172   100 0.7708699
## 173   100 0.7579423
## 174   100 0.7074872
## 175   100 0.7812829
## 176   100 0.6964257
## 177   100 0.7625257
## 178   100 0.7078023
## 179   100 0.7369384
## 180   100 0.7309198
## 181   100 0.7822798
## 182   100 0.7727328
## 183   100 0.7325251
## 184   100 0.6587404
## 185   100 0.7311424
## 186   100 0.7127013
## 187   100 0.7770396
## 188   100 0.7460872
## 189   100 0.7988110
## 190   100 0.7309351
## 191   100 0.7485477
## 192   100 0.7217729
## 193   100 0.7802229
## 194   100 0.6795781
## 195   100 0.7243270
## 196   100 0.7139239
## 197   100 0.7027385
## 198   100 0.7604313
## 199   100 0.6487817
## 200   100 0.7528653
## 201   120 0.6535000
## 202   120 0.7735360
## 203   120 0.7083683
## 204   120 0.7543910
## 205   120 0.7547656
## 206   120 0.7105639
## 207   120 0.6470180
## 208   120 0.7002172
## 209   120 0.7606140
## 210   120 0.6569351
## 211   120 0.7610833
## 212   120 0.8014082
## 213   120 0.7396046
## 214   120 0.7459742
## 215   120 0.7159671
## 216   120 0.6845434
## 217   120 0.6523006
## 218   120 0.7464957
## 219   120 0.7606612
## 220   120 0.7586363
## 221   120 0.8278916
## 222   120 0.7433367
## 223   120 0.7065314
## 224   120 0.6771465
## 225   120 0.7456203
## 226   120 0.8095377
## 227   120 0.7199033
## 228   120 0.6962063
## 229   120 0.7338055
## 230   120 0.7225392
## 231   120 0.7245345
## 232   120 0.7657571
## 233   120 0.7756660
## 234   120 0.7656546
## 235   120 0.7025064
## 236   120 0.7486481
## 237   120 0.7329994
## 238   120 0.6776547
## 239   120 0.7169407
## 240   120 0.7792343
## 241   140 0.5953957
## 242   140 0.6932313
## 243   140 0.5946500
## 244   140 0.5992452
## 245   140 0.6186539
## 246   140 0.6145183
## 247   140 0.6839249
## 248   140 0.6430357
## 249   140 0.6266671
## 250   140 0.6692819
## 251   140 0.6345279
## 252   140 0.6683775
## 253   140 0.6403557
## 254   140 0.6472984
## 255   140 0.7664450
## 256   140 0.6961100
## 257   140 0.6697948
## 258   140 0.6627671
## 259   140 0.6674179
## 260   140 0.6294109
## 261   140 0.6401575
## 262   140 0.6056796
## 263   140 0.7561352
## 264   140 0.5294660
## 265   140 0.6332691
## 266   140 0.6751694
## 267   140 0.6863031
## 268   140 0.6639830
## 269   140 0.5989413
## 270   140 0.6473471
## 271   140 0.6995742
## 272   140 0.6961458
## 273   140 0.6987286
## 274   140 0.5928510
## 275   140 0.6832247
## 276   140 0.5680849
## 277   140 0.6930560
## 278   140 0.6518435
## 279   140 0.6510469
## 280   140 0.6939725
## 281   160 0.5259826
## 282   160 0.5273388
## 283   160 0.4974642
## 284   160 0.5130137
## 285   160 0.6147667
## 286   160 0.5140835
## 287   160 0.5214828
## 288   160 0.5593421
## 289   160 0.5502144
## 290   160 0.5089491
## 291   160 0.5217682
## 292   160 0.5915813
## 293   160 0.5858521
## 294   160 0.5535258
## 295   160 0.4531839
## 296   160 0.5319293
## 297   160 0.5298387
## 298   160 0.5638113
## 299   160 0.4681568
## 300   160 0.5030138
## 301   160 0.6223696
## 302   160 0.5675059
## 303   160 0.5906784
## 304   160 0.5598905
## 305   160 0.5190702
## 306   160 0.5134123
## 307   160 0.5304884
## 308   160 0.5295380
## 309   160 0.5107148
## 310   160 0.5034419
## 311   160 0.5927687
## 312   160 0.5803295
## 313   160 0.5392707
## 314   160 0.5082775
## 315   160 0.5629371
## 316   160 0.4415218
## 317   160 0.5358065
## 318   160 0.4849450
## 319   160 0.5568267
## 320   160 0.5379564
## 
## $`Median and Confidence Interval for Random Chance`
##       2.5%        50%      97.5% 
## 0.02446939 0.04021864 0.06202084 
## 
## $`Mean Successful Assignment by Number of PCs of PCA`
##        20        40        60        80       100       120       140       160 
## 0.7239487 0.7586910 0.7724612 0.7558843 0.7340591 0.7314674 0.6521522 0.5355762 
## 
## $`Number of PCs Achieving Highest Mean Success`
## [1] "60"
## 
## $`Root Mean Squared Error by Number of PCs of PCA`
##        20        40        60        80       100       120       140       160 
## 0.2778009 0.2427225 0.2301483 0.2466599 0.2683058 0.2718915 0.3509615 0.4661233 
## 
## $`Number of PCs Achieving Lowest MSE`
## [1] "60"
## 
## $DAPC
##  #################################################
##  # Discriminant Analysis of Principal Components #
##  #################################################
## class: dapc
## $call: dapc.data.frame(x = as.data.frame(x), grp = ..1, n.pca = ..2, 
##     n.da = ..3)
## 
## $n.pca: 60 first PCs of PCA used
## $n.da: 23 discriminant functions saved
## $var (proportion of conserved variance): 0.408
## 
## $eig (eigenvalues): 2269 1325 823.1 592.4 450 ...
## 
##   vector    length content                   
## 1 $eig      23     eigenvalues               
## 2 $grp      353    prior group assignment    
## 3 $prior    24     prior group probabilities 
## 4 $assign   353    posterior group assignment
## 5 $pca.cent 45280  centring vector of PCA    
## 6 $pca.norm 45280  scaling vector of PCA     
## 7 $pca.eig  352    eigenvalues of PCA        
## 
##   data.frame    nrow  ncol content                                          
## 1 $tab          353   60   retained PCs of PCA                              
## 2 $means        24    60   group means                                      
## 3 $loadings     60    23   loadings of variables                            
## 4 $ind.coord    353   23   coordinates of individuals (principal components)
## 5 $grp.coord    24    23   coordinates of groups                            
## 6 $posterior    353   24   posterior membership probabilities               
## 7 $pca.loadings 45280 60   PCA loadings of original variables               
## 8 $var.contr    45280 23   contribution of original variables

$n.pca: 60 first PCs of PCA used $n.da: 35 discriminant functions saved $var (proportion of conserved variance): 0.408

Run dapc using these #s from CV

dapc1 <- dapc(snp3, snp$pop)
#60 PCs retained
#39 discriminant functions retained

Save the genind object

saveRDS(dapc1, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc1.rds"
))

Load the genind object

dapc1 <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc1.rds"
))

Calculating the optimum PC number to rerun DAPC

optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs 

## $pop.score
## $pop.score$`1`
##         ALD         ALV         BEN         BER         CAM         CHA 
##  0.49000000  0.24583333  0.88750000  0.65000000  0.37500000  0.25833333 
##         CRO         GEL         GRA         GRC         HAI         HAN 
##  0.46250000  0.00000000  0.78181818 -0.01000000  0.50416667  1.00000000 
##         HOC         HUN         INJ         INW         JAF         KAC 
##  0.28571429  0.71250000  0.86818182  1.00000000  0.00000000  0.00000000 
##         KAG         KAN         KAT         KLP         KUN         LAM 
##  0.57083333  0.52272727  0.00000000  0.00000000  0.00000000  0.00000000 
##         MAT         OKI         PAL         QNC         SON         SSK 
##  0.49583333  0.62916667  0.54545455  0.07272727  0.00000000  0.51250000 
##         SUF         SUU         TAI         TIR         UTS         YUN 
##  0.00000000  1.00000000  0.00000000  0.00000000  0.62083333  0.00000000 
## 
## $pop.score$`5`
##         ALD         ALV         BEN         BER         CAM         CHA 
##  0.65000000  0.46666667  0.88333333  0.57500000  0.54583333  0.30000000 
##         CRO         GEL         GRA         GRC         HAI         HAN 
##  0.57916667 -0.07500000  0.94090909  0.84500000  0.39166667  0.90000000 
##         HOC         HUN         INJ         INW         JAF         KAC 
##  0.23571429  0.91666667  0.94545455  0.96250000 -0.12500000 -0.04166667 
##         KAG         KAN         KAT         KLP         KUN         LAM 
##  0.75000000  0.70454545 -0.05833333 -0.07500000  0.96250000 -0.02777778 
##         MAT         OKI         PAL         QNC         SON         SSK 
##  0.70833333  0.79583333  0.89545455  0.95909091 -0.05000000  0.67500000 
##         SUF         SUU         TAI         TIR         UTS         YUN 
##  0.30833333  0.90833333  0.91428571 -0.05000000  0.96250000 -0.02222222 
## 
## $pop.score$`10`
##        ALD        ALV        BEN        BER        CAM        CHA        CRO 
##  0.7350000  0.4625000  0.8583333  0.9208333  0.6625000  0.6333333  0.7291667 
##        GEL        GRA        GRC        HAI        HAN        HOC        HUN 
##  0.7250000  0.8454545  0.8400000  0.5416667  0.8250000  0.6071429  0.8291667 
##        INJ        INW        JAF        KAC        KAG        KAN        KAT 
##  0.9272727  0.7625000 -0.1750000 -0.0750000  0.8916667  0.7318182 -0.1083333 
##        KLP        KUN        LAM        MAT        OKI        PAL        QNC 
##  0.9125000  0.8500000  0.3722222  0.7291667  0.9166667  0.9272727  0.7954545 
##        SON        SSK        SUF        SUU        TAI        TIR        UTS 
## -0.2000000  0.6500000  0.2833333  0.9166667  0.9000000  0.4125000  0.9208333 
##        YUN 
## -0.1111111 
## 
## $pop.score$`15`
##         ALD         ALV         BEN         BER         CAM         CHA 
##  0.65000000  0.42916667  0.83750000  0.92916667  0.63333333  0.52916667 
##         CRO         GEL         GRA         GRC         HAI         HAN 
##  0.78750000  0.60000000  0.89545455  0.82500000  0.52083333  0.81250000 
##         HOC         HUN         INJ         INW         JAF         KAC 
##  0.83571429  0.90000000  0.90454545  0.75000000 -0.32500000 -0.08333333 
##         KAG         KAN         KAT         KLP         KUN         LAM 
##  0.89166667  0.90909091  0.55000000  0.85000000  0.82500000  0.55555556 
##         MAT         OKI         PAL         QNC         SON         SSK 
##  0.73333333  0.92500000  0.95000000  0.90000000  0.20000000  0.44583333 
##         SUF         SUU         TAI         TIR         UTS         YUN 
##  0.71666667  0.77500000  0.88571429  0.31250000  0.90833333  0.09444444 
## 
## $pop.score$`20`
##         ALD         ALV         BEN         BER         CAM         CHA 
##  0.67000000  0.48333333  0.84583333  0.89166667  0.64166667  0.58333333 
##         CRO         GEL         GRA         GRC         HAI         HAN 
##  0.81250000  0.47500000  0.88181818  0.74000000  0.73333333  0.63750000 
##         HOC         HUN         INJ         INW         JAF         KAC 
##  0.85000000  0.86250000  0.83636364  0.77500000 -0.42500000  0.13333333 
##         KAG         KAN         KAT         KLP         KUN         LAM 
##  0.87500000  0.85000000  0.48333333  0.76250000  0.76250000  0.72222222 
##         MAT         OKI         PAL         QNC         SON         SSK 
##  0.73333333  0.87500000  0.85454545  0.86363636  0.40000000  0.45416667 
##         SUF         SUU         TAI         TIR         UTS         YUN 
##  0.62500000  0.84166667  0.87857143  0.30000000  0.85833333  0.08888889 
## 
## $pop.score$`25`
##         ALD         ALV         BEN         BER         CAM         CHA 
##  0.73000000  0.58750000  0.82083333  0.85416667  0.59166667  0.67500000 
##         CRO         GEL         GRA         GRC         HAI         HAN 
##  0.77083333  0.40000000  0.80454545  0.75500000  0.65833333  0.75000000 
##         HOC         HUN         INJ         INW         JAF         KAC 
##  0.80714286  0.81250000  0.85000000  0.73750000 -0.05000000  0.40000000 
##         KAG         KAN         KAT         KLP         KUN         LAM 
##  0.87083333  0.77727273  0.47500000  0.75000000  0.65000000  0.80555556 
##         MAT         OKI         PAL         QNC         SON         SSK 
##  0.77500000  0.85000000  0.80454545  0.85454545  0.18333333  0.43750000 
##         SUF         SUU         TAI         TIR         UTS         YUN 
##  0.60833333  0.78333333  0.75714286  0.47500000  0.85416667  0.07777778 
## 
## $pop.score$`30`
##       ALD       ALV       BEN       BER       CAM       CHA       CRO       GEL 
## 0.6800000 0.6625000 0.8041667 0.7666667 0.5666667 0.6500000 0.8708333 0.3750000 
##       GRA       GRC       HAI       HAN       HOC       HUN       INJ       INW 
## 0.8272727 0.6750000 0.7875000 0.6125000 0.7571429 0.8333333 0.8363636 0.6750000 
##       JAF       KAC       KAG       KAN       KAT       KLP       KUN       LAM 
## 0.4250000 0.2416667 0.8041667 0.7909091 0.4250000 0.6500000 0.5500000 0.7944444 
##       MAT       OKI       PAL       QNC       SON       SSK       SUF       SUU 
## 0.6041667 0.8583333 0.8090909 0.8590909 0.2333333 0.2416667 0.5333333 0.6750000 
##       TAI       TIR       UTS       YUN 
## 0.7642857 0.6125000 0.8125000 0.5722222 
## 
## $pop.score$`35`
##       ALD       ALV       BEN       BER       CAM       CHA       CRO       GEL 
## 0.6600000 0.5291667 0.6541667 0.7958333 0.5083333 0.7083333 0.8166667 0.3250000 
##       GRA       GRC       HAI       HAN       HOC       HUN       INJ       INW 
## 0.7409091 0.7200000 0.7375000 0.5625000 0.7214286 0.8458333 0.8136364 0.5875000 
##       JAF       KAC       KAG       KAN       KAT       KLP       KUN       LAM 
## 0.4250000 0.3666667 0.7833333 0.8000000 0.3333333 0.6375000 0.5625000 0.7722222 
##       MAT       OKI       PAL       QNC       SON       SSK       SUF       SUU 
## 0.6666667 0.8250000 0.7590909 0.7727273 0.1666667 0.2458333 0.4416667 0.6416667 
##       TAI       TIR       UTS       YUN 
## 0.7142857 0.5500000 0.7500000 0.5944444 
## 
## $pop.score$`40`
##       ALD       ALV       BEN       BER       CAM       CHA       CRO       GEL 
## 0.7200000 0.5375000 0.5875000 0.7958333 0.5208333 0.8041667 0.7291667 0.3250000 
##       GRA       GRC       HAI       HAN       HOC       HUN       INJ       INW 
## 0.7590909 0.6650000 0.6958333 0.5250000 0.6928571 0.7250000 0.7454545 0.5500000 
##       JAF       KAC       KAG       KAN       KAT       KLP       KUN       LAM 
## 0.4500000 0.4000000 0.7791667 0.7681818 0.3000000 0.5500000 0.5375000 0.7166667 
##       MAT       OKI       PAL       QNC       SON       SSK       SUF       SUU 
## 0.5500000 0.8000000 0.8090909 0.7136364 0.1500000 0.0500000 0.4000000 0.6416667 
##       TAI       TIR       UTS       YUN 
## 0.6285714 0.5125000 0.7791667 0.4944444 
## 
## $pop.score$`45`
##       ALD       ALV       BEN       BER       CAM       CHA       CRO       GEL 
## 0.6150000 0.5416667 0.6166667 0.8208333 0.5000000 0.7541667 0.7458333 0.2500000 
##       GRA       GRC       HAI       HAN       HOC       HUN       INJ       INW 
## 0.6727273 0.5950000 0.7625000 0.5125000 0.6214286 0.7625000 0.7454545 0.4875000 
##       JAF       KAC       KAG       KAN       KAT       KLP       KUN       LAM 
## 0.3250000 0.4000000 0.7250000 0.7181818 0.3000000 0.5125000 0.4125000 0.7333333 
##       MAT       OKI       PAL       QNC       SON       SSK       SUF       SUU 
## 0.5125000 0.7666667 0.7363636 0.7590909 0.1000000 0.1000000 0.4500000 0.6000000 
##       TAI       TIR       UTS       YUN 
## 0.6285714 0.5375000 0.7458333 0.4722222 
## 
## $pop.score$`50`
##        ALD        ALV        BEN        BER        CAM        CHA        CRO 
## 0.54500000 0.57083333 0.65833333 0.75000000 0.48333333 0.70416667 0.65416667 
##        GEL        GRA        GRC        HAI        HAN        HOC        HUN 
## 0.27500000 0.67727273 0.54500000 0.77083333 0.48750000 0.60000000 0.73333333 
##        INJ        INW        JAF        KAC        KAG        KAN        KAT 
## 0.74090909 0.45000000 0.30000000 0.19166667 0.74166667 0.71818182 0.25000000 
##        KLP        KUN        LAM        MAT        OKI        PAL        QNC 
## 0.36250000 0.48750000 0.61111111 0.52916667 0.73333333 0.70000000 0.76363636 
##        SON        SSK        SUF        SUU        TAI        TIR        UTS 
## 0.36666667 0.02916667 0.35833333 0.58333333 0.62857143 0.38750000 0.69583333 
##        YUN 
## 0.43333333 
## 
## $pop.score$`55`
##         ALD         ALV         BEN         BER         CAM         CHA 
## 0.545000000 0.520833333 0.587500000 0.762500000 0.437500000 0.679166667 
##         CRO         GEL         GRA         GRC         HAI         HAN 
## 0.625000000 0.325000000 0.686363636 0.540000000 0.687500000 0.337500000 
##         HOC         HUN         INJ         INW         JAF         KAC 
## 0.528571429 0.733333333 0.663636364 0.375000000 0.200000000 0.350000000 
##         KAG         KAN         KAT         KLP         KUN         LAM 
## 0.725000000 0.663636364 0.158333333 0.387500000 0.387500000 0.477777778 
##         MAT         OKI         PAL         QNC         SON         SSK 
## 0.633333333 0.750000000 0.659090909 0.654545455 0.466666667 0.004166667 
##         SUF         SUU         TAI         TIR         UTS         YUN 
## 0.400000000 0.491666667 0.550000000 0.437500000 0.670833333 0.605555556 
## 
## $pop.score$`60`
##       ALD       ALV       BEN       BER       CAM       CHA       CRO       GEL 
## 0.5900000 0.4791667 0.5708333 0.6583333 0.4208333 0.6083333 0.6166667 0.2250000 
##       GRA       GRC       HAI       HAN       HOC       HUN       INJ       INW 
## 0.6590909 0.6350000 0.6208333 0.3750000 0.5428571 0.6458333 0.6272727 0.3750000 
##       JAF       KAC       KAG       KAN       KAT       KLP       KUN       LAM 
## 0.2750000 0.3583333 0.6916667 0.6409091 0.0750000 0.3750000 0.4250000 0.5333333 
##       MAT       OKI       PAL       QNC       SON       SSK       SUF       SUU 
## 0.5666667 0.6625000 0.6000000 0.6045455 0.3000000 0.1416667 0.3000000 0.4666667 
##       TAI       TIR       UTS       YUN 
## 0.5571429 0.3625000 0.6541667 0.4666667 
## 
## 
## $mean
##         1         5        10        15        20        25        30        35 
## 0.3744895 0.5321423 0.6234869 0.6627691 0.6571355 0.6595656 0.6565738 0.6259836 
##        40        45        50        55        60 
## 0.5946897 0.5705289 0.5421440 0.5196531 0.4918561 
## 
## $pred
## $pred$x
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51 52 53 54 55 56 57 58 59 60
## 
## $pred$y
##  [1] 0.3744895 0.4181755 0.4601581 0.4987195 0.5321423 0.5592002 0.5806327
##  [8] 0.5976708 0.6115452 0.6234869 0.6344505 0.6442864 0.6525687 0.6588716
## [15] 0.6627691 0.6640422 0.6632999 0.6613575 0.6590309 0.6571355 0.6563023
## [22] 0.6564223 0.6572020 0.6583476 0.6595656 0.6605631 0.6610510 0.6607409
## [29] 0.6593446 0.6565738 0.6522519 0.6466496 0.6401492 0.6331331 0.6259836
## [36] 0.6190277 0.6123706 0.6060621 0.6001518 0.5946897 0.5896829 0.5849687
## [43] 0.5803418 0.5755970 0.5705289 0.5650042 0.5591772 0.5532741 0.5475209
## [50] 0.5421440 0.5372960 0.5328367 0.5285523 0.5242290 0.5196531 0.5146617
## [57] 0.5092951 0.5036445 0.4978011 0.4918561
## 
## 
## $best
## [1] 16
#Optimal number = 27

Run DAPC with object

dapc_snp <- dapc(snp3, n.pca = 27, n.da = 5, grp = populations)

Save it

saveRDS(
  dapc_snp, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc_snp.rds"
  )
)

To load it

dapc_snp <- readRDS(
  here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc_snp.rds"
  )
)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b",  "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")

Plot using different discriminant functions

1 & 2

# 1 and 2
scatter(
  dapc_snp,
  bg = "white",
  scree.da = TRUE,
  cex = 1,
  pch = 20,
  cex.lab = 0.1,
  col = myCol,
  xax = 1, 
  yax = 2  
)

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/albania_croatia_greece_US/dapc_euro_global_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)

dev.off()
good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/albania_croatia_greece_US/dapc_euro_global_region_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches

myCol2 <- c("#a113b2", "#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#a113b2", "#146c45", "magenta", "magenta",  "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9",  "#146c45",  "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#a113b2", "#c41A1C", "#c41A1C")
            

good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)

dev.off()
myCol2 <- c("#a113b2", "#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#a113b2", "#146c45", "magenta", "magenta",  "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9",  "#146c45",  "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#a113b2", "#c41A1C", "#c41A1C")
            

good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/albania_croatia_greece_US/dapc_euro_global_region_PC1_3.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches

myCol2 <- c("#a113b2", "#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#a113b2", "#146c45", "magenta", "magenta",  "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9",  "#146c45",  "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#a113b2", "#c41A1C", "#c41A1C")
            

good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=3)

dev.off()
myCol2 <- c("#a113b2", "#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#a113b2", "#146c45", "magenta", "magenta",  "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9",  "#146c45",  "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#a113b2", "#c41A1C", "#c41A1C")
            

good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=3)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter

5.3 Plot grouped by country (instead of pop)

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_albania_croatia_greece_US.rds"))
head(sampling_loc)
##     Pop_City Location Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ      USA 39.79081 -74.92910  Americas          BER 2018
## 2 Palm Beach      USA 26.70560 -80.03640  Americas          PAL 2018
## 3  Dubrovnik  Croatia 42.60654  18.22661    Europe          CRO 2017
## 4      Vlore  Albania 40.46600  19.48970    Europe          ALV 2020
## 5     Durres  Albania 41.29704  19.50373    Europe          ALD 2018
## 6     Tirana  Albania 41.31473  19.83172    Europe          TIR 2017
##            Region   Subregion order order2 orderold
## 1   North America                 1     NA       75
## 2   North America                 3     NA       77
## 3 Southern Europe East Europe    31     23       23
## 4 Southern Europe East Europe    32     24       24
## 5 Southern Europe East Europe    33     25       25
## 6 Southern Europe East Europe    34     26       26
strata(snp2) <- data.frame(other(snp2))

# Currently set on just 
head(pop(snp2)) 
## [1] OKI OKI OKI OKI OKI OKI
## 36 Levels: ALD ALV BEN BER CAM CHA CRO GEL GRA GRC HAI HAN HOC HUN INJ ... YUN

Load the csv

countr <- read.csv(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "DAPC_alb_cro_gre_US.csv"
))
df <- as.data.frame(countr)

head(df)
##   pop country
## 1 OKI   Japan
## 2 OKI   Japan
## 3 OKI   Japan
## 4 OKI   Japan
## 5 OKI   Japan
## 6 OKI   Japan
snp2@pop <- as.factor(df$country)
snp2$pop
##   [1] Japan     Japan     Japan     Japan     Japan     Japan     Japan    
##   [8] Japan     Japan     Japan     Japan     Japan     China     China    
##  [15] China     China     China     China     China     China     China    
##  [22] China     China     China     China     China     China     China    
##  [29] China     China     China     China     China     Nepal     Nepal    
##  [36] Nepal     Nepal     Taiwan    Taiwan    Taiwan    Taiwan    Taiwan   
##  [43] Taiwan    Taiwan    China     China     China     China     China    
##  [50] China     China     China     China     China     China     China    
##  [57] Vietnam   Vietnam   Vietnam   Vietnam   Malaysia  Malaysia  Malaysia 
##  [64] Malaysia  Vietnam   Vietnam   Vietnam   Vietnam   Vietnam   Vietnam  
##  [71] Vietnam   Vietnam   Vietnam   Vietnam   Vietnam   Vietnam   Vietnam  
##  [78] Vietnam   Vietnam   Vietnam   Vietnam   Vietnam   Thailand  Thailand 
##  [85] Thailand  Thailand  Thailand  Thailand  Thailand  Thailand  Thailand 
##  [92] Thailand  Thailand  Thailand  Thailand  Thailand  Thailand  Thailand 
##  [99] Thailand  Thailand  Thailand  Thailand  Thailand  Thailand  Japan    
## [106] Japan     Japan     Japan     Japan     Japan     Japan     Japan    
## [113] Japan     Japan     Japan     Japan     Japan     Japan     Japan    
## [120] Japan     Japan     Japan     Japan     Japan     Japan     Japan    
## [127] Japan     Albania   Albania   Albania   Albania   Cambodia  Cambodia 
## [134] Cambodia  Cambodia  Cambodia  Cambodia  Japan     Cambodia  Cambodia 
## [141] Cambodia  Cambodia  Cambodia  Cambodia  Japan     Japan     Greece   
## [148] Japan     Japan     Japan     Japan     Japan     Japan     Japan    
## [155] Japan     Japan     India     India     India     India     India    
## [162] India     India     India     India     India     India     India    
## [169] Thailand  Thailand  Thailand  Thailand  Thailand  Thailand  Thailand 
## [176] Thailand  Thailand  USA       USA       USA       USA       USA      
## [183] USA       USA       USA       USA       USA       USA       USA      
## [190] USA       USA       USA       USA       USA       USA       USA      
## [197] USA       USA       USA       USA       Bhutan    Bhutan    Nepal    
## [204] Nepal     Sri Lanka Sri Lanka Thailand  Thailand  Thailand  Thailand 
## [211] Thailand  Thailand  Thailand  Thailand  Thailand  Thailand  Thailand 
## [218] Indonesia Indonesia Indonesia Indonesia Indonesia Indonesia Indonesia
## [225] Indonesia Indonesia Indonesia Indonesia Indonesia Maldives  Maldives 
## [232] Maldives  Maldives  Croatia   Croatia   Croatia   Croatia   Croatia  
## [239] Croatia   Croatia   Croatia   Croatia   Croatia   Croatia   Croatia  
## [246] Greece    Greece    Greece    Greece    Greece    Greece    Greece   
## [253] Greece    Greece    Greece    Greece    Greece    Greece    Greece   
## [260] Greece    Greece    Greece    Greece    Greece    Greece    Albania  
## [267] Albania   Albania   Albania   Albania   Albania   Albania   Albania  
## [274] Albania   Albania   Albania   Albania   Albania   Albania   Albania  
## [281] Albania   Albania   Albania   Albania   Albania   Albania   Albania  
## [288] Indonesia Indonesia Indonesia Indonesia Indonesia Indonesia Indonesia
## [295] Indonesia Indonesia Indonesia Indonesia Indonesia Indonesia Indonesia
## [302] Indonesia Malaysia  Malaysia  Malaysia  Malaysia  Malaysia  Malaysia 
## [309] Malaysia  Malaysia  Malaysia  Malaysia  Malaysia  Malaysia 
## 17 Levels: Albania Bhutan Cambodia China Croatia Greece India ... Vietnam

Save the genind object

saveRDS(snp2, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "snp_country.rds"
))

Load the genind object

snp_country <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "snp_country.rds"
))

Scale

snp_country <- scaleGen(snp_country, NA.method="mean")
## Warning in .local(x, ...): Some scaling values are null.
##  Corresponding alleles are removed.
class(snp_country)
## [1] "matrix" "array"
dim(snp_country)
## [1]   314 45278
snp_country[1:5,1:5]
##      AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1001        0.9454195       -0.9454195        0.8715638       -0.8715638
## 1002        0.0000000        0.0000000       -1.6005080        1.6005080
## 1003       -0.3445529        0.3445529       -1.6005080        1.6005080
## 1004       -0.3445529        0.3445529       -1.6005080        1.6005080
## 1005       -0.3445529        0.3445529       -0.3644721        0.3644721
##      AX-583036983_C.T
## 1001       1.30258591
## 1002       0.02858339
## 1003       1.30258591
## 1004       0.02858339
## 1005       1.30258591
# Get the populations from the genlight object
populations <- snp2$pop

5.3.1 Find clusters

grp <- find.clusters(snp_country, max.n.clust=10)
#retained 300
#Choose the number of clusters (>=2): 4

Save it

saveRDS(
  grp, here(
    "euro_global/output/dapc/MAF_1/grp_albania_croatia_greece_US.rds"
  )
)

To load it

grp <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/grp_albania_croatia_greece_US.rds"
  )
)
table(pop(snp2), grp$grp)
##            
##              1  2  3  4
##   Albania    0 26  0  0
##   Bhutan     0  0  2  0
##   Cambodia   0  0 12  0
##   China      0 22 11  0
##   Croatia    0 12  0  0
##   Greece     0 21  0  0
##   India      0  0 12  0
##   Indonesia  0  0  0 27
##   Japan     35 12  0  0
##   Malaysia   0  0 16  0
##   Maldives   0  0  4  0
##   Nepal      0  0  4  2
##   Sri Lanka  0  0  2  0
##   Taiwan     0  7  0  0
##   Thailand   0  0 42  0
##   USA       23  0  0  0
##   Vietnam    0  7 15  0
dapc_country_1 <- dapc(snp_country, grp$grp)
#300 PCs retained
#5 discriminant functions retained

Save the genind object

saveRDS(dapc_country_1, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc_country_1.rds"
))

Load the genind object

dapc_country_1 <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US",  "dapc_country_1.rds"
))

Calculating the optimum PC number to rerun DAPC

optim.a.score(dapc_country_1, n.pca=1:ncol(dapc_country_1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs 

## $pop.score
## $pop.score$`1`
##         1         2         3         4 
## 0.0000000 0.5879167 0.0000000 0.2729839 
## 
## $pop.score$`50`
##         1         2         3         4 
## 0.7073171 0.4016667 0.6724138 0.3806452 
## 
## $pop.score$`100`
##         1         2         3         4 
## 0.4426829 0.3070833 0.4413793 0.2987903 
## 
## $pop.score$`150`
##         1         2         3         4 
## 0.2292683 0.2066667 0.2224138 0.2016129 
## 
## $pop.score$`200`
##         1         2         3         4 
## 0.1621951 0.1412500 0.1534483 0.1149194 
## 
## $pop.score$`250`
##          1          2          3          4 
## 0.09024390 0.07166667 0.07931034 0.06532258 
## 
## $pop.score$`300`
##          1          2          3          4 
## 0.02073171 0.01250000 0.01206897 0.01129032 
## 
## 
## $mean
##          1         50        100        150        200        250        300 
## 0.21522513 0.54051067 0.37248397 0.21499041 0.14295319 0.07663587 0.01414775 
## 
## $pred
## $pred$x
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300
## 
## $pred$y
##   [1] 0.21522513 0.22449083 0.23375055 0.24299769 0.25222565 0.26142782
##   [7] 0.27059759 0.27972837 0.28881353 0.29784648 0.30682062 0.31572932
##  [13] 0.32456599 0.33332403 0.34199682 0.35057776 0.35906024 0.36743766
##  [19] 0.37570341 0.38385088 0.39187348 0.39976458 0.40751760 0.41512591
##  [25] 0.42258291 0.42988201 0.43701659 0.44398004 0.45076576 0.45736714
##  [31] 0.46377758 0.46999048 0.47599921 0.48179719 0.48737780 0.49273443
##  [37] 0.49786049 0.50274936 0.50739444 0.51178912 0.51592679 0.51980086
##  [43] 0.52340471 0.52673173 0.52977533 0.53252889 0.53498582 0.53713949
##  [49] 0.53898331 0.54051067 0.54171741 0.54260912 0.54319384 0.54347961
##  [55] 0.54347446 0.54318644 0.54262358 0.54179393 0.54070551 0.53936637
##  [61] 0.53778454 0.53596806 0.53392498 0.53166332 0.52919113 0.52651645
##  [67] 0.52364731 0.52059175 0.51735781 0.51395352 0.51038693 0.50666607
##  [73] 0.50279899 0.49879371 0.49465828 0.49040073 0.48602911 0.48155145
##  [79] 0.47697579 0.47231016 0.46756261 0.46274117 0.45785389 0.45290879
##  [85] 0.44791392 0.44287731 0.43780701 0.43271105 0.42759747 0.42247430
##  [91] 0.41734959 0.41223138 0.40712769 0.40204658 0.39699607 0.39198421
##  [97] 0.38701903 0.38210857 0.37726087 0.37248397 0.36778442 0.36316278
## [103] 0.35861814 0.35414959 0.34975621 0.34543708 0.34119128 0.33701791
## [109] 0.33291603 0.32888474 0.32492313 0.32103026 0.31720523 0.31344712
## [115] 0.30975501 0.30612799 0.30256513 0.29906553 0.29562826 0.29225242
## [121] 0.28893707 0.28568132 0.28248423 0.27934489 0.27626239 0.27323581
## [127] 0.27026423 0.26734674 0.26448242 0.26167035 0.25890962 0.25619930
## [133] 0.25353849 0.25092627 0.24836171 0.24584391 0.24337194 0.24094490
## [139] 0.23856185 0.23622190 0.23392411 0.23166758 0.22945138 0.22727460
## [145] 0.22513632 0.22303564 0.22097162 0.21894335 0.21694992 0.21499041
## [151] 0.21306393 0.21116968 0.20930688 0.20747475 0.20567253 0.20389944
## [157] 0.20215470 0.20043754 0.19874718 0.19708284 0.19544376 0.19382916
## [163] 0.19223825 0.19067028 0.18912445 0.18760000 0.18609616 0.18461213
## [169] 0.18314716 0.18170047 0.18027127 0.17885880 0.17746229 0.17608094
## [175] 0.17471400 0.17336068 0.17202021 0.17069182 0.16937472 0.16806815
## [181] 0.16677133 0.16548349 0.16420384 0.16293162 0.16166604 0.16040634
## [187] 0.15915174 0.15790146 0.15665473 0.15541077 0.15416881 0.15292808
## [193] 0.15168779 0.15044717 0.14920545 0.14796185 0.14671560 0.14546593
## [199] 0.14421205 0.14295319 0.14168874 0.14041872 0.13914333 0.13786276
## [205] 0.13657720 0.13528682 0.13399184 0.13269243 0.13138879 0.13008110
## [211] 0.12876955 0.12745434 0.12613566 0.12481369 0.12348862 0.12216065
## [217] 0.12082996 0.11949674 0.11816119 0.11682350 0.11548384 0.11414242
## [223] 0.11279942 0.11145503 0.11010945 0.10876286 0.10741545 0.10606741
## [229] 0.10471893 0.10337021 0.10202142 0.10067277 0.09932444 0.09797662
## [235] 0.09662950 0.09528327 0.09393813 0.09259425 0.09125183 0.08991106
## [241] 0.08857213 0.08723523 0.08590055 0.08456828 0.08323861 0.08191172
## [247] 0.08058782 0.07926708 0.07794970 0.07663587 0.07532574 0.07401926
## [253] 0.07271638 0.07141701 0.07012107 0.06882850 0.06753922 0.06625315
## [259] 0.06497022 0.06369035 0.06241347 0.06113951 0.05986838 0.05860002
## [265] 0.05733435 0.05607129 0.05481077 0.05355271 0.05229705 0.05104370
## [271] 0.04979258 0.04854364 0.04729678 0.04605193 0.04480903 0.04356799
## [277] 0.04232874 0.04109121 0.03985532 0.03862099 0.03738815 0.03615673
## [283] 0.03492665 0.03369783 0.03247020 0.03124369 0.03001823 0.02879372
## [289] 0.02757011 0.02634731 0.02512526 0.02390387 0.02268307 0.02146279
## [295] 0.02024295 0.01902348 0.01780429 0.01658533 0.01536651 0.01414775
## 
## 
## $best
## [1] 54
#Optimal number = 54

Run DAPC with object

dapc_snp_country <- dapc(snp_country, n.pca = 54, n.da = 5, grp = populations)

Save it

saveRDS(
  dapc_snp_country, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc_snp_country.rds"
  )
)

To load it

dapc_snp_country <- readRDS(
  here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc_snp_country.rds"
  )
)

5.3.2 Plot with new colors - by region

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/albania_croatia_greece_US/dapc_snp_country_albania_croatia_greece_US_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,1:25)

myCol2 <- c("#a113b2", "#146c45", "#2524f9", "#c41A1C", "#a113b2", "#a113b2", "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#146c45", "#c41A1C", "#2524f9", "#66C2A5", "#2524f9")
  

op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
good.shapes = c(1:25,35:38,1:25)

myCol2 <- c("#a113b2", "#146c45", "#2524f9", "#c41A1C", "#a113b2", "#a113b2", "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#146c45", "#c41A1C", "#2524f9", "#66C2A5", "#2524f9")
  

op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

Plot with new colors - by region

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/albania_croatia_greece_US/dapc_snp_country_albania_croatia_greece_US_PC1_3.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,1:25)

myCol2 <- c("#a113b2", "#146c45", "#2524f9", "#c41A1C", "#a113b2", "#a113b2", "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#146c45", "#c41A1C", "#2524f9", "#66C2A5", "#2524f9")
  

op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

dev.off()
good.shapes = c(1:25,35:38,1:25)

myCol2 <- c("#a113b2", "#146c45", "#2524f9", "#c41A1C", "#a113b2", "#a113b2", "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#146c45", "#c41A1C", "#2524f9", "#66C2A5", "#2524f9")
  

op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

6. DAPC for native_far_east_euro.txt

Using r2<0.01 LD pruning and MAF 1% (scaled and plotting additional PCs)

Create files

cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--keep-fam output/neuroadmixture/native_far_east_euro.txt \
--bfile output/snps_sets/r2_0.01_b \
--make-bed \
--out output/dapc/MAF_1/dapc_native_far_east_euro \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_native_far_east_euro.log

22642 variants loaded from .bim file. –keep-fam: 338 people remaining. Total genotyping rate in remaining samples is 0.970255. 22642 variants and 338 people pass filters and QC.

Convert to raw format

plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/MAF_1/dapc_native_far_east_euro \
--recodeA \
--out output/dapc/MAF_1/dapc_native_far_east_euro \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_native_far_east_euro.log

22642 variants loaded from .bim file. 22642 variants and 338 people pass filters and QC.

Clean env & memory

# Remove all objects from the environment
rm(list = ls())

# Run the garbage collector to free up memory
gc()
##            used  (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells  5970497 318.9   16401712  876.0  24799007 1324.5
## Vcells 10172350  77.7  434414951 3314.4 848466578 6473.3

Import the data and covert it to genind format

# import the data
snp <-
  read.PLINK(
    here("euro_global/output/dapc/MAF_1/dapc_native_far_east_euro.raw"),
    quiet = FALSE,
    chunkSize = 1000,
    parallel = require("parallel"),
    n.cores = 4
  )
## 
##  Reading PLINK raw format into a genlight object... 
## 
## 
##  Reading loci information... 
## 
##  Reading and converting genotypes... 
## .
##  Building final object... 
## 
## ...done.
nInd(snp)
## [1] 338
nLoc(snp)
## [1] 22642
nPop(snp)
## [1] 37
indNames(snp)
##   [1] "1001" "1002" "1003" "1004" "1005" "1006" "1007" "1008" "1009" "1010"
##  [11] "1011" "1012" "1053" "1054" "1055" "1056" "1057" "1058" "1059" "1060"
##  [21] "1061" "1062" "1063" "1064" "1065" "1066" "1067" "1068" "1069" "1070"
##  [31] "1071" "1072" "1073" "1074" "1075" "1076" "1077" "1078" "1079" "1080"
##  [41] "1081" "1082" "1083" "1084" "1085" "1086" "1087" "1088" "1089" "1090"
##  [51] "1091" "1092" "1093" "1094" "1095" "1101" "1102" "1103" "1105" "1106"
##  [61] "1107" "1109" "1110" "1111" "1112" "1113" "1114" "1115" "1116" "1117"
##  [71] "1118" "1119" "1120" "1121" "1122" "1123" "1124" "1125" "1126" "1127"
##  [81] "1128" "1129" "1130" "1131" "1132" "1133" "1134" "1135" "1136" "1137"
##  [91] "1138" "1139" "1140" "1141" "1142" "1143" "1144" "1145" "1146" "1147"
## [101] "1148" "1149" "1150" "1151" "1152" "1153" "1154" "1155" "1156" "1157"
## [111] "1158" "1159" "1160" "1161" "1162" "1163" "1165" "1166" "1167" "1168"
## [121] "1169" "1170" "1171" "1172" "1173" "1174" "1175" "1176" "1177" "1178"
## [131] "1179" "1180" "1181" "1182" "1183" "1184" "1202" "1203" "1204" "1205"
## [141] "1206" "1207" "1208" "1209" "1210" "1211" "1212" "1213" "1214" "1215"
## [151] "1216" "1217" "1238" "1239" "1240" "1241" "1242" "1243" "1244" "1245"
## [161] "1246" "1247" "1249" "1250" "1251" "1252" "1253" "1254" "1255" "1256"
## [171] "1257" "1258" "1259" "1260" "1261" "1262" "1263" "1264" "1265" "1266"
## [181] "1267" "1268" "1270" "1271" "1272" "1273" "1274" "1276" "1282" "1283"
## [191] "1285" "1286" "1325" "1326" "1328" "1329" "1330" "1331" "1332" "1333"
## [201] "1334" "1335" "1336" "1373" "1374" "1375" "1376" "1377" "1378" "1379"
## [211] "1380" "1381" "1382" "1383" "1384" "159"  "162"  "167"  "169"  "170" 
## [221] "171"  "172"  "173"  "2174" "2175" "2176" "2177" "2178" "2179" "217" 
## [231] "2180" "2181" "2182" "2183" "2184" "2185" "218"  "219"  "220"  "221" 
## [241] "222"  "223"  "224"  "225"  "226"  "227"  "230"  "255"  "256"  "257" 
## [251] "258"  "261"  "262"  "263"  "264"  "265"  "266"  "267"  "268"  "269" 
## [261] "270"  "271"  "272"  "273"  "275"  "276"  "277"  "278"  "602"  "603" 
## [271] "604"  "607"  "609"  "610"  "623"  "624"  "625"  "626"  "627"  "628" 
## [281] "629"  "630"  "631"  "632"  "633"  "666"  "669"  "670"  "671"  "672" 
## [291] "673"  "674"  "675"  "676"  "677"  "678"  "679"  "680"  "681"  "682" 
## [301] "683"  "901"  "902"  "903"  "904"  "905"  "906"  "907"  "908"  "909" 
## [311] "910"  "964"  "965"  "966"  "967"  "972"  "973"  "975"  "976"  "977" 
## [321] "978"  "979"  "980"  "981"  "982"  "983"  "984"  "985"  "986"  "987" 
## [331] "988"  "989"  "990"  "991"  "992"  "993"  "994"  "995"
# convert to genind
snp2 <- gl2gi(snp, probar = TRUE, verbose = NULL)
## Starting gl2gi 
##   Processing genlight object with SNP data
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=====================================                                 |  54%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi

Save it

saveRDS(
  snp2, here(
    "euro_global/output/dapc/MAF_1/snp2_far_east_euro.rds"
  )
)

To load it

snp2 <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/snp2_far_east_euro.rds"
  )
)

6.1 Scale

snp3 <- scaleGen(snp2, NA.method="mean")
## Warning in .local(x, ...): Some scaling values are null.
##  Corresponding alleles are removed.
class(snp3)
## [1] "matrix" "array"

Save it

saveRDS(
  snp3, here(
    "euro_global/output/dapc/MAF_1/snp3_far_east_euro.rds"
  )
)

To load it

snp3 <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/snp3_far_east_euro.rds"
  )
)
dim(snp3)
## [1]   338 45284
snp3[1:5,1:5]
##      AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1001        1.0174555       -1.0174555        0.8124603       -0.8124603
## 1002        0.0000000        0.0000000       -1.8770635        1.8770635
## 1003       -0.2486263        0.2486263       -1.8770635        1.8770635
## 1004       -0.2486263        0.2486263       -1.8770635        1.8770635
## 1005       -0.2486263        0.2486263       -0.5323016        0.5323016
##      AX-583036983_C.T
## 1001        1.4460470
## 1002        0.1495911
## 1003        1.4460470
## 1004        0.1495911
## 1005        1.4460470
# Get the populations from the genlight object
populations <- snp$pop

6.2 Find clusters

grp <- find.clusters(snp3, max.n.clust=20)
#retained 350
#Choose the number of clusters (>=2): 6

Save it

saveRDS(
  grp, here(
    "euro_global/output/dapc/MAF_1/grp_far_east_euro.rds"
  )
)

To load it

grp <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/grp_far_east_euro.rds"
  )
)

Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.

xvalDapc(snp3, populations, n.pca.max = 200, n.da = NULL,
              training.set = 0.9, result = c("groupMean", "overall"),
              center = TRUE, scale = FALSE,
              n.pca=NULL, n.rep = 40, xval.plot = TRUE) 

$n.pca: 60 first PCs of PCA used $n.da: 36 discriminant functions saved $var (proportion of conserved variance): 0.426

Run dapc using these #s from CV

dapc1 <- dapc(snp3, snp$pop) 
#60 PCs retained
#36 discriminant functions retained

Save the genind object

saveRDS(dapc1, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "far_east_euro", "dapc1.rds"
))

Load the genind object

dapc1 <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "far_east_euro", "dapc1.rds"
))

Calculating the optimum PC number to rerun DAPC

optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs 

## $pop.score
## $pop.score$`1`
##         ALU         ARM         BEN         CAM         CHA         GEL 
##  0.54166667 -0.03000000  0.92083333  0.35833333  0.18750000  0.00000000 
##         GES         HAI         HAN         HOC         HUN         INJ 
##  0.57083333  0.47916667  0.75000000  0.14285714  0.56666667  0.90909091 
##         INW         JAF         KAC         KAG         KAN         KAT 
##  1.00000000  0.00000000  0.00000000  0.66250000  0.00000000  0.00000000 
##         KER         KLP         KRA         KUN         LAM         MAT 
##  0.05833333  0.00000000  0.33750000  0.00000000  0.00000000  0.47916667 
##         OKI         QNC         RAR         SEV         SOC         SON 
##  0.58333333 -0.03636364  0.21666667  0.04166667  0.04166667  0.00000000 
##         SSK         SUF         SUU         TAI         TIK         UTS 
##  0.31666667  0.00000000  0.83333333  0.00000000  0.07916667  0.43333333 
##         YUN 
##  0.00000000 
## 
## $pop.score$`5`
##         ALU         ARM         BEN         CAM         CHA         GEL 
##  0.91666667  0.92500000  0.89166667  0.56666667  0.41666667 -0.10000000 
##         GES         HAI         HAN         HOC         HUN         INJ 
##  0.40416667  0.37083333  0.97500000  0.37142857  0.83750000  0.88636364 
##         INW         JAF         KAC         KAG         KAN         KAT 
##  0.95000000 -0.07500000 -0.02500000  0.90416667  0.58636364 -0.06666667 
##         KER         KLP         KRA         KUN         LAM         MAT 
##  0.36250000 -0.06250000  0.07083333  0.93750000 -0.03333333  0.62500000 
##         OKI         QNC         RAR         SEV         SOC         SON 
##  0.92083333  0.90000000  0.03750000  0.68750000 -0.01666667 -0.10000000 
##         SSK         SUF         SUU         TAI         TIK         UTS 
##  0.70833333  0.29166667  0.97500000  0.92142857  0.25416667  0.86666667 
##         YUN 
## -0.05000000 
## 
## $pop.score$`10`
##         ALU         ARM         BEN         CAM         CHA         GEL 
##  0.75000000  0.85500000  0.88333333  0.65833333  0.65416667 -0.27500000 
##         GES         HAI         HAN         HOC         HUN         INJ 
##  0.53333333  0.45416667  0.80000000  0.64285714  0.94166667  0.81363636 
##         INW         JAF         KAC         KAG         KAN         KAT 
##  0.82500000 -0.22500000 -0.08333333  0.87916667  0.61818182 -0.13333333 
##         KER         KLP         KRA         KUN         LAM         MAT 
##  0.41666667  0.15000000 -0.03750000  0.81250000 -0.10000000  0.76250000 
##         OKI         QNC         RAR         SEV         SOC         SON 
##  0.86250000  0.90000000  0.30416667  0.71666667  0.38750000 -0.20000000 
##         SSK         SUF         SUU         TAI         TIK         UTS 
##  0.53333333  0.21666667  0.94166667  0.90000000  0.70833333  0.91666667 
##         YUN 
## -0.05000000 
## 
## $pop.score$`15`
##        ALU        ARM        BEN        CAM        CHA        GEL        GES 
##  0.8125000  0.9050000  0.8375000  0.6333333  0.6458333  0.5750000  0.5375000 
##        HAI        HAN        HOC        HUN        INJ        INW        JAF 
##  0.4500000  0.7750000  0.6000000  0.8916667  0.9090909  0.7875000 -0.4000000 
##        KAC        KAG        KAN        KAT        KER        KLP        KRA 
## -0.2083333  0.9000000  0.9136364 -0.1416667  0.7125000  0.7500000  0.2958333 
##        KUN        LAM        MAT        OKI        QNC        RAR        SEV 
##  0.7625000  0.3388889  0.7166667  0.8416667  0.8727273  0.2708333  0.8666667 
##        SOC        SON        SSK        SUF        SUU        TAI        TIK 
##  0.5083333 -0.3000000  0.4250000  0.1583333  0.8583333  0.8785714  0.5291667 
##        UTS        YUN 
##  0.8333333  0.1000000 
## 
## $pop.score$`20`
##         ALU         ARM         BEN         CAM         CHA         GEL 
##  0.73333333  0.83500000  0.84166667  0.55833333  0.64583333  0.52500000 
##         GES         HAI         HAN         HOC         HUN         INJ 
##  0.43333333  0.50416667  0.68750000  0.70714286  0.85416667  0.81363636 
##         INW         JAF         KAC         KAG         KAN         KAT 
##  0.66250000 -0.45000000 -0.09166667  0.87500000  0.88181818  0.43333333 
##         KER         KLP         KRA         KUN         LAM         MAT 
##  0.69583333  0.68750000  0.22500000  0.75000000  0.40000000  0.67083333 
##         OKI         QNC         RAR         SEV         SOC         SON 
##  0.86666667  0.82727273  0.69166667  0.88333333  0.44166667 -0.05000000 
##         SSK         SUF         SUU         TAI         TIK         UTS 
##  0.33750000  0.61666667  0.72500000  0.77142857  0.70416667  0.82916667 
##         YUN 
##  0.15000000 
## 
## $pop.score$`25`
##         ALU         ARM         BEN         CAM         CHA         GEL 
##  0.87083333  0.77500000  0.81666667  0.56666667  0.45416667  0.35000000 
##         GES         HAI         HAN         HOC         HUN         INJ 
##  0.37500000  0.59583333  0.61250000  0.74285714  0.80416667  0.85909091 
##         INW         JAF         KAC         KAG         KAN         KAT 
##  0.55000000 -0.57500000  0.07500000  0.76666667  0.82727273  0.40833333 
##         KER         KLP         KRA         KUN         LAM         MAT 
##  0.76250000  0.55000000  0.27916667  0.51250000  0.57777778  0.67083333 
##         OKI         QNC         RAR         SEV         SOC         SON 
##  0.80000000  0.87727273  0.74166667  0.84583333  0.53333333 -0.06666667 
##         SSK         SUF         SUU         TAI         TIK         UTS 
##  0.40416667  0.55833333  0.76666667  0.80714286  0.72500000  0.79583333 
##         YUN 
##  0.11111111 
## 
## $pop.score$`30`
##         ALU         ARM         BEN         CAM         CHA         GEL 
##  0.82500000  0.81500000  0.78750000  0.56666667  0.56250000  0.40000000 
##         GES         HAI         HAN         HOC         HUN         INJ 
##  0.52916667  0.66666667  0.55000000  0.75000000  0.77916667  0.80454545 
##         INW         JAF         KAC         KAG         KAN         KAT 
##  0.55000000 -0.12500000  0.12500000  0.76666667  0.78181818  0.28333333 
##         KER         KLP         KRA         KUN         LAM         MAT 
##  0.69166667  0.22500000  0.40000000  0.62500000  0.59444444  0.70833333 
##         OKI         QNC         RAR         SEV         SOC         SON 
##  0.74166667  0.82272727  0.72916667  0.82916667  0.71250000  0.15000000 
##         SSK         SUF         SUU         TAI         TIK         UTS 
##  0.41666667  0.50000000  0.69166667  0.66428571  0.79583333  0.80000000 
##         YUN 
##  0.08333333 
## 
## $pop.score$`35`
##       ALU       ARM       BEN       CAM       CHA       GEL       GES       HAI 
## 0.8208333 0.6900000 0.6958333 0.4875000 0.4666667 0.3250000 0.5833333 0.6625000 
##       HAN       HOC       HUN       INJ       INW       JAF       KAC       KAG 
## 0.5500000 0.6857143 0.7708333 0.7863636 0.5375000 0.3500000 0.1000000 0.7750000 
##       KAN       KAT       KER       KLP       KRA       KUN       LAM       MAT 
## 0.6863636 0.2416667 0.7500000 0.5625000 0.3958333 0.5875000 0.6833333 0.6041667 
##       OKI       QNC       RAR       SEV       SOC       SON       SSK       SUF 
## 0.7791667 0.7318182 0.6916667 0.7583333 0.6708333 0.1666667 0.2333333 0.4750000 
##       SUU       TAI       TIK       UTS       YUN 
## 0.6166667 0.6714286 0.7375000 0.7416667 0.1944444 
## 
## $pop.score$`40`
##       ALU       ARM       BEN       CAM       CHA       GEL       GES       HAI 
## 0.7500000 0.7150000 0.6583333 0.4958333 0.6625000 0.3750000 0.6666667 0.6500000 
##       HAN       HOC       HUN       INJ       INW       JAF       KAC       KAG 
## 0.4000000 0.6071429 0.7000000 0.7909091 0.4000000 0.4750000 0.0250000 0.7291667 
##       KAN       KAT       KER       KLP       KRA       KUN       LAM       MAT 
## 0.6863636 0.1833333 0.7125000 0.5125000 0.4458333 0.4750000 0.5888889 0.5583333 
##       OKI       QNC       RAR       SEV       SOC       SON       SSK       SUF 
## 0.7125000 0.7227273 0.7291667 0.7625000 0.6083333 0.1000000 0.2416667 0.3916667 
##       SUU       TAI       TIK       UTS       YUN 
## 0.6333333 0.5928571 0.7416667 0.6916667 0.4333333 
## 
## $pop.score$`45`
##         ALU         ARM         BEN         CAM         CHA         GEL 
##  0.63750000  0.65500000  0.49166667  0.43750000  0.62500000  0.27500000 
##         GES         HAI         HAN         HOC         HUN         INJ 
##  0.52916667  0.61666667  0.48750000  0.57142857  0.67916667  0.68181818 
##         INW         JAF         KAC         KAG         KAN         KAT 
##  0.43750000  0.32500000  0.15000000  0.74583333  0.65909091  0.25833333 
##         KER         KLP         KRA         KUN         LAM         MAT 
##  0.61250000  0.46250000  0.46250000  0.38750000  0.60555556  0.51666667 
##         OKI         QNC         RAR         SEV         SOC         SON 
##  0.68750000  0.67272727  0.65000000  0.70833333  0.73750000 -0.01666667 
##         SSK         SUF         SUU         TAI         TIK         UTS 
## -0.05416667  0.37500000  0.55833333  0.60714286  0.76250000  0.68750000 
##         YUN 
##  0.41111111 
## 
## $pop.score$`50`
##           ALU           ARM           BEN           CAM           CHA 
##  6.750000e-01  6.250000e-01  5.375000e-01  4.458333e-01  5.875000e-01 
##           GEL           GES           HAI           HAN           HOC 
##  3.750000e-01  4.958333e-01  6.250000e-01  3.875000e-01  5.785714e-01 
##           HUN           INJ           INW           JAF           KAC 
##  6.333333e-01  6.727273e-01  4.000000e-01  3.000000e-01  3.500000e-01 
##           KAG           KAN           KAT           KER           KLP 
##  6.875000e-01  6.454545e-01  1.500000e-01  6.666667e-01  3.750000e-01 
##           KRA           KUN           LAM           MAT           OKI 
##  4.083333e-01  4.875000e-01  5.111111e-01  5.708333e-01  6.833333e-01 
##           QNC           RAR           SEV           SOC           SON 
##  5.863636e-01  7.416667e-01  6.833333e-01  6.375000e-01  3.333333e-01 
##           SSK           SUF           SUU           TAI           TIK 
## -1.387779e-17  4.250000e-01  4.000000e-01  5.000000e-01  6.833333e-01 
##           UTS           YUN 
##  6.625000e-01  3.722222e-01 
## 
## $pop.score$`55`
##         ALU         ARM         BEN         CAM         CHA         GEL 
##  0.64583333  0.60000000  0.54166667  0.37500000  0.36666667  0.27500000 
##         GES         HAI         HAN         HOC         HUN         INJ 
##  0.47083333  0.56666667  0.33750000  0.47857143  0.65833333  0.68181818 
##         INW         JAF         KAC         KAG         KAN         KAT 
##  0.37500000  0.20000000 -0.05833333  0.60833333  0.64090909  0.06666667 
##         KER         KLP         KRA         KUN         LAM         MAT 
##  0.62916667  0.33750000  0.51666667  0.41250000  0.42222222  0.54583333 
##         OKI         QNC         RAR         SEV         SOC         SON 
##  0.64166667  0.63181818  0.60416667  0.70000000  0.66666667  0.30000000 
##         SSK         SUF         SUU         TAI         TIK         UTS 
## -0.07500000  0.34166667  0.45833333  0.52142857  0.57916667  0.65833333 
##         YUN 
##  0.30555556 
## 
## $pop.score$`60`
##         ALU         ARM         BEN         CAM         CHA         GEL 
##  0.62500000  0.55000000  0.65416667  0.38750000  0.52916667  0.22500000 
##         GES         HAI         HAN         HOC         HUN         INJ 
##  0.43333333  0.65833333  0.35000000  0.50714286  0.63750000  0.57272727 
##         INW         JAF         KAC         KAG         KAN         KAT 
##  0.40000000  0.25000000 -0.06666667  0.65416667  0.55000000  0.09166667 
##         KER         KLP         KRA         KUN         LAM         MAT 
##  0.61250000  0.37500000  0.46666667  0.38750000  0.41111111  0.52083333 
##         OKI         QNC         RAR         SEV         SOC         SON 
##  0.66250000  0.51363636  0.70000000  0.60000000  0.57916667  0.28333333 
##         SSK         SUF         SUU         TAI         TIK         UTS 
## -0.02916667  0.25833333  0.40833333  0.46428571  0.56666667  0.60000000 
##         YUN 
##  0.44444444 
## 
## 
## $mean
##         1         5        10        15        20        25        30        35 
## 0.2822680 0.4873581 0.5063201 0.5633220 0.5857783 0.5710142 0.5837699 0.5747829 
##        40        45        50        55        60 
## 0.5574249 0.5161408 0.5108050 0.4602204 0.4549779 
## 
## $pred
## $pred$x
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51 52 53 54 55 56 57 58 59 60
## 
## $pred$y
##  [1] 0.3255465 0.3531379 0.3801034 0.4058130 0.4296369 0.4510852 0.4702284
##  [8] 0.4872768 0.5024409 0.5159312 0.5279346 0.5385445 0.5478306 0.5558629
## [15] 0.5627113 0.5684469 0.5731475 0.5768919 0.5797591 0.5818282 0.5831878
## [22] 0.5839646 0.5842953 0.5843164 0.5841643 0.5839437 0.5836316 0.5831735
## [29] 0.5825146 0.5816002 0.5803809 0.5788284 0.5769197 0.5746318 0.5719418
## [36] 0.5688334 0.5653180 0.5614137 0.5571385 0.5525106 0.5475600 0.5423646
## [43] 0.5370139 0.5315979 0.5262061 0.5209039 0.5156588 0.5104140 0.5051126
## [50] 0.4996978 0.4941396 0.4885161 0.4829323 0.4774930 0.4723033 0.4674387
## [57] 0.4628576 0.4584889 0.4542615 0.4501044
## 
## 
## $best
## [1] 24
#Optimal number = 22

Run DAPC with object

dapc_snp <- dapc(snp3, n.pca = 22, n.da = 7, grp = populations)

Save it

saveRDS(
  dapc_snp, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "far_east_euro", "dapc_snp.rds"
  )
)

To load it

dapc_snp <- readRDS(
  here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "far_east_euro", "dapc_snp.rds"
  )
)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b",  "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")

6.3 Plot using different discriminant functions

PCs 1 & 2

# 1 and 2
scatter(
  dapc_snp,
  bg = "white",
  scree.da = TRUE,
  cex = 1,
  pch = 20,
  cex.lab = 0.1,
  col = myCol,
  xax = 1, 
  yax = 2  
)

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/far_east_euro/dapc_far_east_euro_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)

dev.off()
good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/far_east_euro/dapc_far_east_euro_region_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches

myCol2 <- c("goldenrod", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#146c45", "goldenrod", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9",  "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "goldenrod", "#2524f9", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "goldenrod", "goldenrod", "goldenrod", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "#c41A1C", "#c41A1C")
            

good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)

dev.off()
myCol2 <- c("goldenrod", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#146c45", "goldenrod", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9",  "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "goldenrod", "#2524f9", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "goldenrod", "goldenrod", "goldenrod", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "#c41A1C", "#c41A1C")
            

good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/far_east_euro/dapc_far_east_euro_region_PC1_3.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches

myCol2 <- c("goldenrod", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#146c45", "goldenrod", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9",  "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "goldenrod", "#2524f9", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "goldenrod", "goldenrod", "goldenrod", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "#c41A1C", "#c41A1C")
            

good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=3)

dev.off()
myCol2 <- c("goldenrod", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#146c45", "goldenrod", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9",  "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "goldenrod", "#2524f9", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "goldenrod", "goldenrod", "goldenrod", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "#c41A1C", "#c41A1C")
            

good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=3)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter

7. DAPC for native_sicily_and_Americas.txt

Using r2<0.01 LD pruning and MAF 1% (scaled and plotting additional PCs)

Create files

cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--keep-fam output/neuroadmixture/native_sicily_and_Americas.txt \
--bfile output/snps_sets/r2_0.01_b \
--make-bed \
--out output/dapc/MAF_1/dapc_sicily_and_Americas \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_sicily_and_Americas.log

22642 variants loaded from .bim file. –keep-fam: 287 people remaining. Total genotyping rate in remaining samples is 0.967131. 22642 variants and 287 people pass filters and QC.

Convert to raw format

plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/MAF_1/dapc_sicily_and_Americas \
--recodeA \
--out output/dapc/MAF_1/dapc_sicily_and_Americas \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_sicily_and_Americas.log

22642 variants loaded from .bim file. 22642 variants and 287 people pass filters and QC.

Clean env & memory

# Remove all objects from the environment
rm(list = ls())

# Run the garbage collector to free up memory
gc()
##            used  (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells  5970864 318.9   16401712  876.0  24799007 1324.5
## Vcells 10179893  77.7  347531961 2651.5 848466578 6473.3

Import the data and covert it to genind format

# import the data
snp <-
  read.PLINK(
    here("euro_global/output/dapc/MAF_1/dapc_sicily_and_Americas.raw"),
    quiet = FALSE,
    chunkSize = 1000,
    parallel = require("parallel"),
    n.cores = 4
  )
## 
##  Reading PLINK raw format into a genlight object... 
## 
## 
##  Reading loci information... 
## 
##  Reading and converting genotypes... 
## .
##  Building final object... 
## 
## ...done.
nInd(snp)
## [1] 287
nLoc(snp)
## [1] 22642
nPop(snp)
## [1] 33
indNames(snp)
##   [1] "1001" "1002" "1003" "1004" "1005" "1006" "1007" "1008" "1009" "1010"
##  [11] "1011" "1012" "1053" "1054" "1055" "1056" "1057" "1058" "1059" "1060"
##  [21] "1061" "1062" "1063" "1064" "1089" "1090" "1091" "1092" "1093" "1094"
##  [31] "1095" "1101" "1102" "1103" "1105" "1106" "1107" "1161" "1162" "1163"
##  [41] "1165" "1166" "1167" "1168" "1169" "1170" "1171" "1172" "1173" "1174"
##  [51] "1175" "1176" "1177" "1178" "1179" "1180" "1181" "1182" "1183" "1184"
##  [61] "1214" "1215" "1216" "1217" "1226" "1227" "1228" "1229" "1230" "1232"
##  [71] "1233" "1234" "1237" "1238" "1239" "1240" "1241" "1242" "1243" "1244"
##  [81] "1245" "1246" "1247" "1249" "1250" "1251" "1252" "1253" "1254" "1255"
##  [91] "1256" "1257" "1258" "1259" "1260" "1261" "1262" "1263" "1264" "1265"
## [101] "1266" "1267" "1268" "1270" "1271" "1272" "1273" "1274" "1276" "1282"
## [111] "1283" "1285" "1286" "1325" "1326" "1328" "1329" "1330" "1331" "1332"
## [121] "1333" "1334" "1335" "1336" "1373" "1374" "1375" "1376" "1377" "1378"
## [131] "1379" "1380" "1381" "1382" "1383" "1384" "175"  "176"  "177"  "178" 
## [141] "180"  "181"  "182"  "183"  "184"  "185"  "186"  "2174" "2175" "2176"
## [151] "2177" "2178" "2179" "217"  "2180" "2181" "2182" "2183" "2184" "2185"
## [161] "218"  "219"  "220"  "221"  "222"  "223"  "224"  "225"  "226"  "227" 
## [171] "230"  "255"  "256"  "257"  "258"  "261"  "262"  "263"  "264"  "265" 
## [181] "266"  "267"  "268"  "269"  "270"  "271"  "272"  "273"  "275"  "276" 
## [191] "277"  "278"  "294"  "295"  "296"  "297"  "298"  "299"  "301"  "302" 
## [201] "303"  "304"  "305"  "435"  "436"  "437"  "438"  "439"  "440"  "441" 
## [211] "442"  "443"  "444"  "445"  "446"  "602"  "603"  "604"  "607"  "609" 
## [221] "610"  "623"  "624"  "625"  "626"  "627"  "628"  "629"  "630"  "631" 
## [231] "632"  "633"  "666"  "669"  "670"  "671"  "672"  "673"  "674"  "675" 
## [241] "676"  "677"  "678"  "679"  "680"  "681"  "682"  "683"  "684"  "685" 
## [251] "686"  "687"  "688"  "689"  "690"  "691"  "692"  "693"  "694"  "695" 
## [261] "964"  "965"  "966"  "967"  "972"  "973"  "975"  "976"  "977"  "978" 
## [271] "979"  "980"  "981"  "982"  "983"  "984"  "985"  "986"  "987"  "988" 
## [281] "989"  "990"  "991"  "992"  "993"  "994"  "995"
# convert to genind
snp2 <- gl2gi(snp, probar = TRUE, verbose = NULL)
## Starting gl2gi 
##   Processing genlight object with SNP data
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |==============================                                        |  44%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |===================================================                   |  74%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi

Save it

saveRDS(
  snp2, here(
    "euro_global/output/dapc/MAF_1/snp2_sicily_Americas.rds"
  )
)

To load it

snp2 <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/snp2_sicily_Americas.rds"
  )
)

7.1 Scale

snp3 <- scaleGen(snp2, NA.method="mean")
## Warning in .local(x, ...): Some scaling values are null.
##  Corresponding alleles are removed.
class(snp3)
## [1] "matrix" "array"

Save it

saveRDS(
  snp3, here(
    "euro_global/output/dapc/MAF_1/snp3_sicily_Americas.rds"
  )
)

To load it

snp3 <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/snp3_sicily_Americas.rds"
  )
)
dim(snp3)
## [1]   287 45274
snp3[1:5,1:5]
##      AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1001        0.9547918       -0.9547918        0.8231945       -0.8231945
## 1002        0.0000000        0.0000000       -1.5830663        1.5830663
## 1003       -0.3289235        0.3289235       -1.5830663        1.5830663
## 1004       -0.3289235        0.3289235       -1.5830663        1.5830663
## 1005       -0.3289235        0.3289235       -0.3799359        0.3799359
##      AX-583036983_C.T
## 1001       1.29864538
## 1002       0.04387315
## 1003       1.29864538
## 1004       0.04387315
## 1005       1.29864538
# Get the populations from the genlight object
populations <- snp$pop

7.2 Find clusters

grp <- find.clusters(snp3, max.n.clust=20)
#retained 250
#Choose the number of clusters (>=2): 6

Save it

saveRDS(
  grp, here(
    "euro_global/output/dapc/MAF_1/grp_sicily_Americas.rds"
  )
)

To load it

grp <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/grp_sicily_Americas.rds"
  )
)

Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.

xvalDapc(snp3, populations, n.pca.max = 200, n.da = NULL,
              training.set = 0.9, result = c("groupMean", "overall"),
              center = TRUE, scale = FALSE,
              n.pca=NULL, n.rep = 40, xval.plot = TRUE) 

$n.pca: 40 first PCs of PCA used $n.da: 32 discriminant functions saved $var (proportion of conserved variance): 0.354

Run dapc using these #s from CV

dapc1 <- dapc(snp3, snp$pop) 
#40 PCs retained
#32 discriminant functions retained

Save the genind object

saveRDS(dapc1, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "sicily_americas", "dapc1.rds"
))

Load the genind object

dapc1 <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "sicily_americas", "dapc1.rds"
))

Calculating the optimum PC number to rerun DAPC

optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs 

## $pop.score
## $pop.score$`1`
##         BEN         BER         CAM         CHA         GEL         GRV 
##  0.95000000  0.80833333  0.39583333  0.10000000  0.00000000  0.44166667 
##         HAI         HAN         HOC         HUN         INJ         INW 
##  0.22083333  0.75000000  0.14285714  0.54583333  0.89090909  1.00000000 
##         JAF         KAC         KAG         KAN         KAT         KLP 
##  0.00000000  0.00000000  0.75416667  0.53636364  0.00000000  0.00000000 
##         KUN         LAM         MAT         OKI         PAL         QNC 
##  0.00000000  0.00000000  0.42083333  0.65833333  0.52272727 -0.08181818 
##         REC         SIC         SON         SSK         SUF         SUU 
##  0.90454545  0.77777778  0.00000000  0.55833333  0.00000000  0.83333333 
##         TAI         UTS         YUN 
##  0.00000000  0.57083333  0.00000000 
## 
## $pop.score$`5`
##         BEN         BER         CAM         CHA         GEL         GRV 
##  0.87500000  0.87916667  0.47500000  0.39583333 -0.10000000  0.91666667 
##         HAI         HAN         HOC         HUN         INJ         INW 
##  0.52500000  0.91250000  0.65714286  0.92500000  0.82727273  0.91250000 
##         JAF         KAC         KAG         KAN         KAT         KLP 
## -0.10000000  0.00000000  0.89583333  0.62272727 -0.07500000 -0.01250000 
##         KUN         LAM         MAT         OKI         PAL         QNC 
##  1.00000000 -0.03888889  0.59166667  0.82916667  0.94090909  0.93181818 
##         REC         SIC         SON         SSK         SUF         SUU 
##  0.92272727  0.91666667 -0.03333333  0.38750000  0.29166667  1.00000000 
##         TAI         UTS         YUN 
##  0.97142857  0.92083333 -0.01666667 
## 
## $pop.score$`10`
##         BEN         BER         CAM         CHA         GEL         GRV 
##  0.88333333  0.87916667  0.62500000  0.60416667 -0.27500000  0.87916667 
##         HAI         HAN         HOC         HUN         INJ         INW 
##  0.72916667  0.87500000  0.93571429  0.85833333  0.78181818  0.88750000 
##         JAF         KAC         KAG         KAN         KAT         KLP 
## -0.35000000 -0.10000000  0.83750000  0.70454545 -0.10833333  0.30000000 
##         KUN         LAM         MAT         OKI         PAL         QNC 
##  0.88750000  0.27777778  0.71250000  0.86666667  0.87272727  0.85909091 
##         REC         SIC         SON         SSK         SUF         SUU 
##  0.91818182  0.96111111 -0.13333333  0.30833333  0.28333333  0.88333333 
##         TAI         UTS         YUN 
##  0.97142857  0.90416667 -0.05555556 
## 
## $pop.score$`15`
##         BEN         BER         CAM         CHA         GEL         GRV 
##  0.88750000  0.85833333  0.62916667  0.52083333  0.67500000  0.86250000 
##         HAI         HAN         HOC         HUN         INJ         INW 
##  0.59166667  0.82500000  0.92857143  0.87500000  0.93636364  0.81250000 
##         JAF         KAC         KAG         KAN         KAT         KLP 
## -0.47500000  0.03333333  0.89583333  0.91363636  0.51666667  0.83750000 
##         KUN         LAM         MAT         OKI         PAL         QNC 
##  0.86250000  0.36666667  0.79583333  0.89166667  0.89545455  0.88636364 
##         REC         SIC         SON         SSK         SUF         SUU 
##  0.82272727  0.93888889  0.11666667  0.22500000  0.66666667  0.88333333 
##         TAI         UTS         YUN 
##  0.82857143  0.84166667  0.10555556 
## 
## $pop.score$`20`
##         BEN         BER         CAM         CHA         GEL         GRV 
##  0.87083333  0.81666667  0.57500000  0.47916667  0.47500000  0.92083333 
##         HAI         HAN         HOC         HUN         INJ         INW 
##  0.57083333  0.73750000  0.87142857  0.85416667  0.89545455  0.68750000 
##         JAF         KAC         KAG         KAN         KAT         KLP 
## -0.47500000  0.13333333  0.88750000  0.86363636  0.54166667  0.76250000 
##         KUN         LAM         MAT         OKI         PAL         QNC 
##  0.81250000  0.77777778  0.74166667  0.85833333  0.85000000  0.84545455 
##         REC         SIC         SON         SSK         SUF         SUU 
##  0.87272727  0.83333333  0.40000000  0.45416667  0.66666667  0.84166667 
##         TAI         UTS         YUN 
##  0.83571429  0.85000000  0.07222222 
## 
## $pop.score$`25`
##       BEN       BER       CAM       CHA       GEL       GRV       HAI       HAN 
## 0.8708333 0.8750000 0.5583333 0.6375000 0.4250000 0.8583333 0.6250000 0.7250000 
##       HOC       HUN       INJ       INW       JAF       KAC       KAG       KAN 
## 0.7428571 0.8583333 0.7954545 0.7000000 0.5250000 0.2500000 0.7708333 0.8409091 
##       KAT       KLP       KUN       LAM       MAT       OKI       PAL       QNC 
## 0.5166667 0.6500000 0.7000000 0.7166667 0.7208333 0.8541667 0.8409091 0.8636364 
##       REC       SIC       SON       SSK       SUF       SUU       TAI       UTS 
## 0.8090909 0.8388889 0.2833333 0.4416667 0.5750000 0.8333333 0.7714286 0.8541667 
##       YUN 
## 0.2666667 
## 
## $pop.score$`30`
##       BEN       BER       CAM       CHA       GEL       GRV       HAI       HAN 
## 0.7458333 0.8250000 0.4666667 0.6083333 0.5000000 0.8083333 0.7416667 0.6500000 
##       HOC       HUN       INJ       INW       JAF       KAC       KAG       KAN 
## 0.8214286 0.8125000 0.8090909 0.6250000 0.4250000 0.3833333 0.8375000 0.8090909 
##       KAT       KLP       KUN       LAM       MAT       OKI       PAL       QNC 
## 0.4000000 0.6375000 0.6000000 0.6611111 0.6166667 0.8000000 0.8590909 0.8318182 
##       REC       SIC       SON       SSK       SUF       SUU       TAI       UTS 
## 0.8136364 0.7833333 0.2166667 0.4125000 0.5583333 0.7500000 0.7500000 0.8333333 
##       YUN 
## 0.4277778 
## 
## $pop.score$`35`
##       BEN       BER       CAM       CHA       GEL       GRV       HAI       HAN 
## 0.5666667 0.8375000 0.5666667 0.8291667 0.4250000 0.8250000 0.7041667 0.6000000 
##       HOC       HUN       INJ       INW       JAF       KAC       KAG       KAN 
## 0.7642857 0.7791667 0.7863636 0.5375000 0.4250000 0.3750000 0.7875000 0.7909091 
##       KAT       KLP       KUN       LAM       MAT       OKI       PAL       QNC 
## 0.3916667 0.6125000 0.6125000 0.7388889 0.5291667 0.8125000 0.8181818 0.7863636 
##       REC       SIC       SON       SSK       SUF       SUU       TAI       UTS 
## 0.8090909 0.7500000 0.1666667 0.3208333 0.4916667 0.7250000 0.7428571 0.8666667 
##       YUN 
## 0.6833333 
## 
## $pop.score$`40`
##       BEN       BER       CAM       CHA       GEL       GRV       HAI       HAN 
## 0.6125000 0.8250000 0.5416667 0.7125000 0.3500000 0.7666667 0.7750000 0.5250000 
##       HOC       HUN       INJ       INW       JAF       KAC       KAG       KAN 
## 0.7000000 0.7500000 0.7590909 0.5375000 0.2500000 0.3333333 0.7708333 0.7590909 
##       KAT       KLP       KUN       LAM       MAT       OKI       PAL       QNC 
## 0.3416667 0.5250000 0.5250000 0.6944444 0.5333333 0.7583333 0.7909091 0.8181818 
##       REC       SIC       SON       SSK       SUF       SUU       TAI       UTS 
## 0.7318182 0.6666667 0.5166667 0.1375000 0.4250000 0.6333333 0.7142857 0.7833333 
##       YUN 
## 0.7444444 
## 
## 
## $mean
##         1         5        10        15        20        25        30        35 
## 0.3848999 0.5802314 0.5898285 0.6743020 0.6721288 0.6846922 0.6612286 0.6502356 
##        40 
## 0.6153969 
## 
## $pred
## $pred$x
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 
## $pred$y
##  [1] 0.4343442 0.4570347 0.4793634 0.5009661 0.5214787 0.5406094 0.5583547
##  [8] 0.5747835 0.5899647 0.6039670 0.6168420 0.6285712 0.6391189 0.6484494
## [15] 0.6565269 0.6633374 0.6689545 0.6734733 0.6769891 0.6795972 0.6813837
## [22] 0.6823979 0.6826799 0.6822698 0.6812077 0.6795381 0.6773224 0.6746267
## [29] 0.6715169 0.6680587 0.6643099 0.6602942 0.6560273 0.6515247 0.6468021
## [36] 0.6418791 0.6367926 0.6315834 0.6262927 0.6209612
## 
## 
## $best
## [1] 23
#Optimal number = 23

Run DAPC with object

dapc_snp <- dapc(snp3, n.pca = 23, n.da = 5, grp = populations)

Save it

saveRDS(
  dapc_snp, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "sicily_americas", "dapc_snp.rds"
  )
)

To load it

dapc_snp <- readRDS(
  here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "sicily_americas", "dapc_snp.rds"
  )
)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b",  "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")

7.3 Plot with new colors - by region

PCs 1 & 2

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sicily_americas/dapc_snp_sicily_Americas_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

PCs 1 & 2 - colored by region

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sicily_americas/dapc_snp_sicily_Americas_region_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:24,35:38,40,1:25)

myCol2 <- c("#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#f365e7", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45","#2524f9", "#2524f9",  "#c41A1C", "#66C2A5", "#2524f9", "#f365e7", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#c41A1C", "#c41A1C")
            

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
good.shapes = c(1:24,35:38,40,1:25)

myCol2 <- c("#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#f365e7", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45","#2524f9", "#2524f9",  "#c41A1C", "#66C2A5", "#2524f9", "#f365e7", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#c41A1C", "#c41A1C")
            

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

PCs 1 & 3

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sicily_americas/dapc_snp_sicily_Americas_region_PC1_3.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:24,35:38,40,1:25)

myCol2 <- c("#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#f365e7", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45","#2524f9", "#2524f9",  "#c41A1C", "#66C2A5", "#2524f9", "#f365e7", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#c41A1C", "#c41A1C")
            

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

dev.off()
good.shapes = c(1:24,35:38,40,1:25)

myCol2 <- c("#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#f365e7", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45","#2524f9", "#2524f9",  "#c41A1C", "#66C2A5", "#2524f9", "#f365e7", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#c41A1C", "#c41A1C")
            

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

PCs 1 & 4

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sicily_americas/dapc_snp_sicily_Americas_region_PC1_4.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:24,35:38,40,1:25)

myCol2 <- c("#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#f365e7", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45","#2524f9", "#2524f9",  "#c41A1C", "#66C2A5", "#2524f9", "#f365e7", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#c41A1C", "#c41A1C")
            

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

dev.off()
good.shapes = c(1:24,35:38,40,1:25)

myCol2 <- c("#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#f365e7", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45","#2524f9", "#2524f9",  "#c41A1C", "#66C2A5", "#2524f9", "#f365e7", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#c41A1C", "#c41A1C")
            

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

8. DAPC for native_turkey_iberia_US.txt

Using SNP Set 3 - r2<0.01 LD pruning and MAF 1% (scaled and plotting additional PCs)

Create files

cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--keep-fam output/neuroadmixture/native_turkey_iberia_US.txt \
--bfile output/snps_sets/r2_0.01_b \
--make-bed \
--out output/dapc/MAF_1/dapc_native_turkey_iberia_US \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_native_turkey_iberia_US.log

22642 variants loaded from .bim file. –keep-fam: 329 people remaining. Total genotyping rate in remaining samples is 0.969285. 22642 variants and 329 people pass filters and QC.

Convert to raw format

plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/MAF_1/dapc_native_turkey_iberia_US \
--recodeA \
--out output/dapc/MAF_1/dapc_native_turkey_iberia_US \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_native_turkey_iberia_US.log

22642 variants loaded from .bim file. 22642 variants and 287 people pass filters and QC.

Clean env & memory

# Remove all objects from the environment
rm(list = ls())

# Run the garbage collector to free up memory
gc()
##            used  (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells  5971163 318.9   16401712  876.0  24799007 1324.5
## Vcells 10186560  77.8  278025569 2121.2 848466578 6473.3

Import the data and covert it to genind format

# import the data
snp <-
  read.PLINK(
    here("euro_global/output/dapc/MAF_1/dapc_native_turkey_iberia_US.raw"),
    quiet = FALSE,
    chunkSize = 1000,
    parallel = require("parallel"),
    n.cores = 4
  )
## 
##  Reading PLINK raw format into a genlight object... 
## 
## 
##  Reading loci information... 
## 
##  Reading and converting genotypes... 
## .
##  Building final object... 
## 
## ...done.
nInd(snp)
## [1] 329
nLoc(snp)
## [1] 22642
nPop(snp)
## [1] 39
indNames(snp)
##   [1] "1001" "1002" "1003" "1004" "1005" "1006" "1007" "1008" "1009" "1010"
##  [11] "1011" "1012" "1053" "1054" "1055" "1056" "1057" "1058" "1059" "1060"
##  [21] "1061" "1062" "1063" "1064" "1089" "1090" "1091" "1092" "1093" "1094"
##  [31] "1095" "1101" "1102" "1103" "1105" "1106" "1107" "1161" "1162" "1163"
##  [41] "1165" "1166" "1167" "1168" "1169" "1170" "1171" "1172" "1173" "1174"
##  [51] "1175" "1176" "1177" "1178" "1179" "1180" "1181" "1182" "1183" "1184"
##  [61] "1214" "1215" "1216" "1217" "1238" "1239" "1240" "1241" "1242" "1243"
##  [71] "1244" "1245" "1246" "1247" "1249" "1250" "1251" "1252" "1253" "1254"
##  [81] "1255" "1256" "1257" "1258" "1259" "1260" "1261" "1262" "1263" "1264"
##  [91] "1265" "1266" "1267" "1268" "1270" "1271" "1272" "1273" "1274" "1276"
## [101] "1282" "1283" "1285" "1286" "1325" "1326" "1328" "1329" "1330" "1331"
## [111] "1332" "1333" "1334" "1335" "1336" "1373" "1374" "1375" "1376" "1377"
## [121] "1378" "1379" "1380" "1381" "1382" "1383" "1384" "2174" "2175" "2176"
## [131] "2177" "2178" "2179" "217"  "2180" "2181" "2182" "2183" "2184" "2185"
## [141] "218"  "219"  "220"  "221"  "222"  "223"  "224"  "225"  "226"  "227" 
## [151] "230"  "255"  "256"  "257"  "258"  "261"  "262"  "263"  "264"  "265" 
## [161] "266"  "267"  "268"  "269"  "270"  "271"  "272"  "273"  "275"  "276" 
## [171] "277"  "278"  "279"  "280"  "281"  "282"  "283"  "284"  "285"  "286" 
## [181] "287"  "289"  "290"  "291"  "294"  "295"  "296"  "297"  "298"  "299" 
## [191] "301"  "302"  "303"  "304"  "305"  "435"  "436"  "437"  "438"  "439" 
## [201] "440"  "441"  "442"  "443"  "444"  "445"  "446"  "602"  "603"  "604" 
## [211] "607"  "609"  "610"  "623"  "624"  "625"  "626"  "627"  "628"  "629" 
## [221] "630"  "631"  "632"  "633"  "666"  "669"  "670"  "671"  "672"  "673" 
## [231] "674"  "675"  "676"  "677"  "678"  "679"  "680"  "681"  "682"  "683" 
## [241] "765"  "766"  "769"  "770"  "771"  "772"  "773"  "774"  "775"  "776" 
## [251] "777"  "778"  "781"  "782"  "784"  "785"  "786"  "787"  "788"  "789" 
## [261] "790"  "791"  "792"  "793"  "794"  "795"  "835"  "836"  "837"  "838" 
## [271] "839"  "840"  "841"  "842"  "843"  "844"  "845"  "846"  "847"  "848" 
## [281] "877"  "878"  "879"  "880"  "881"  "882"  "883"  "884"  "885"  "886" 
## [291] "887"  "888"  "889"  "890"  "891"  "892"  "893"  "894"  "911"  "912" 
## [301] "913"  "915"  "964"  "965"  "966"  "967"  "972"  "973"  "975"  "976" 
## [311] "977"  "978"  "979"  "980"  "981"  "982"  "983"  "984"  "985"  "986" 
## [321] "987"  "988"  "989"  "990"  "991"  "992"  "993"  "994"  "995"
# convert to genind
snp2 <- gl2gi(snp, probar = TRUE, verbose = NULL)
## Starting gl2gi 
##   Processing genlight object with SNP data
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |============================================                          |  64%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |===================================================                   |  74%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi

Save it

saveRDS(
  snp2, here(
    "euro_global/output/dapc/MAF_1/snp2_iberia_turkey_US.rds"
  )
)

To load it

snp2 <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/snp2_iberia_turkey_US.rds"
  )
)

8.1 Scale

snp3 <- scaleGen(snp2, NA.method="mean")
class(snp3)

Save it

saveRDS(
  snp3, here(
    "euro_global/output/dapc/MAF_1/snp3_iberia_turkey_US.rds"
  )
)

To load it

snp3 <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/snp3_iberia_turkey_US.rds"
  )
)
dim(snp3)
## [1]   329 45278
snp3[1:5,1:5]
##      AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1001        1.0423735       -1.0423735        0.8067209       -0.8067209
## 1002        0.0000000        0.0000000       -1.7420494        1.7420494
## 1003       -0.2605934        0.2605934       -1.7420494        1.7420494
## 1004       -0.2605934        0.2605934       -1.7420494        1.7420494
## 1005       -0.2605934        0.2605934       -0.4676643        0.4676643
##      AX-583036983_C.T
## 1001       1.19822555
## 1002      -0.09459675
## 1003       1.19822555
## 1004      -0.09459675
## 1005       1.19822555
# Get the populations from the genlight object
populations <- snp$pop

8.2 Find clusters

grp <- find.clusters(snp3, max.n.clust=20)
#retained 300
#Choose the number of clusters (>=2): 5

Save it

saveRDS(
  grp, here(
    "euro_global/output/dapc/MAF_1/grp_iberia_turkey_US.rds"
  )
)

To load it

grp <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/grp_iberia_turkey_US.rds"
  )
)

Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.

xvalDapc(snp3, populations, n.pca.max = 200, n.da = NULL,
              training.set = 0.9, result = c("groupMean", "overall"),
              center = TRUE, scale = FALSE,
              n.pca=NULL, n.rep = 40, xval.plot = TRUE) 

$n.pca: 40 first PCs of PCA used $n.da: 38 discriminant functions saved $var (proportion of conserved variance): 0.346

Run dapc using these #s from CV

dapc1 <- dapc(snp3, snp$pop) 
# PCs retained
# discriminant functions retained

Save the genind object

saveRDS(dapc1, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "iberia_turkey_US", "dapc1.rds"
))

Load the genind object

dapc1 <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "iberia_turkey_US", "dapc1.rds"
))

Calculating the optimum PC number to rerun DAPC

optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs 

## $pop.score
## $pop.score$`1`
##          BAR          BEN          BER          CAM          CHA          GEL 
##  0.600000000  0.845833333  0.658333333  0.491666667  0.241666667  0.000000000 
##          HAI          HAN          HOC          HUN          INJ          INW 
##  0.562500000  1.000000000  0.142857143  0.708333333  0.881818182  1.000000000 
##          JAF          KAC          KAG          KAN          KAT          KLP 
##  0.000000000  0.000000000  0.404166667  0.545454545  0.000000000  0.000000000 
##          KUN          LAM          MAT          OKI          PAL          POL 
##  0.000000000  0.000000000  0.516666667  0.679166667  0.527272727  0.000000000 
##          POP          QNC          SON          SPB          SPC          SPM 
## -0.062500000 -0.095454545  0.000000000  0.500000000  0.000000000  0.000000000 
##          SPS          SSK          SUF          SUU          TAI          TUA 
##  0.000000000  0.220833333  0.000000000  0.833333333  0.000000000 -0.005555556 
##          TUH          UTS          YUN 
##  0.158333333  0.520833333  0.000000000 
## 
## $pop.score$`5`
##         BAR         BEN         BER         CAM         CHA         GEL 
##  0.88333333  0.86250000  0.60833333  0.39166667  0.38333333 -0.10000000 
##         HAI         HAN         HOC         HUN         INJ         INW 
##  0.42083333  0.97500000  0.14285714  0.89166667  0.94090909  0.90000000 
##         JAF         KAC         KAG         KAN         KAT         KLP 
## -0.12500000 -0.04166667  0.93333333  0.55000000 -0.07500000 -0.07500000 
##         KUN         LAM         MAT         OKI         PAL         POL 
##  0.97500000 -0.07222222  0.58333333  0.55000000  0.90000000 -0.15000000 
##         POP         QNC         SON         SPB         SPC         SPM 
##  0.40416667  0.93181818 -0.06666667  0.70625000  0.08333333 -0.04000000 
##         SPS         SSK         SUF         SUU         TAI         TUA 
##  0.68750000  0.62083333  0.30833333  0.96666667  0.55000000  0.08333333 
##         TUH         UTS         YUN 
##  0.45833333  0.87500000 -0.09444444 
## 
## $pop.score$`10`
##         BAR         BEN         BER         CAM         CHA         GEL 
##  0.86250000  0.91250000  0.93750000  0.53750000  0.66250000 -0.22500000 
##         HAI         HAN         HOC         HUN         INJ         INW 
##  0.66250000  0.86250000  0.61428571  0.87500000  0.79545455  0.83750000 
##         JAF         KAC         KAG         KAN         KAT         KLP 
## -0.30000000 -0.02500000  0.91666667  0.66363636 -0.05000000  0.17500000 
##         KUN         LAM         MAT         OKI         PAL         POL 
##  0.85000000 -0.03888889  0.77083333  0.86666667  0.90454545  0.72500000 
##         POP         QNC         SON         SPB         SPC         SPM 
##  0.78333333  0.94090909 -0.23333333  0.81875000  0.60833333 -0.12000000 
##         SPS         SSK         SUF         SUU         TAI         TUA 
##  0.75625000  0.59583333  0.23333333  0.90000000  0.93571429  0.05555556 
##         TUH         UTS         YUN 
##  0.50833333  0.87500000 -0.08888889 
## 
## $pop.score$`15`
##        BAR        BEN        BER        CAM        CHA        GEL        HAI 
##  0.9208333  0.9208333  0.9375000  0.6208333  0.5291667  0.6500000  0.6875000 
##        HAN        HOC        HUN        INJ        INW        JAF        KAC 
##  0.8625000  0.7642857  0.8875000  0.9045455  0.8000000 -0.3000000 -0.0750000 
##        KAG        KAN        KAT        KLP        KUN        LAM        MAT 
##  0.8958333  0.8227273 -0.1166667  0.5625000  0.7500000  0.1277778  0.6875000 
##        OKI        PAL        POL        POP        QNC        SON        SPB 
##  0.8041667  0.9227273  0.6000000  0.7083333  0.9090909 -0.2000000  0.8062500 
##        SPC        SPM        SPS        SSK        SUF        SUU        TAI 
##  0.5416667 -0.1900000  0.7750000  0.4666667  0.3916667  0.8750000  0.8285714 
##        TUA        TUH        UTS        YUN 
##  0.3611111  0.6750000  0.9291667  0.2888889 
## 
## $pop.score$`20`
##         BAR         BEN         BER         CAM         CHA         GEL 
##  0.92083333  0.90000000  0.90416667  0.62916667  0.65833333  0.60000000 
##         HAI         HAN         HOC         HUN         INJ         INW 
##  0.57916667  0.76250000  0.85000000  0.89166667  0.83636364  0.72500000 
##         JAF         KAC         KAG         KAN         KAT         KLP 
## -0.40000000 -0.20833333  0.87500000  0.87272727  0.50000000  0.76250000 
##         KUN         LAM         MAT         OKI         PAL         POL 
##  0.81250000  0.66666667  0.77916667  0.90000000  0.92727273  0.52500000 
##         POP         QNC         SON         SPB         SPC         SPM 
##  0.71666667  0.89090909 -0.03333333  0.58125000  0.33333333 -0.15000000 
##         SPS         SSK         SUF         SUU         TAI         TUA 
##  0.87500000  0.32083333  0.65833333  0.81666667  0.79285714  0.56666667 
##         TUH         UTS         YUN 
##  0.79583333  0.85416667  0.32777778 
## 
## $pop.score$`25`
##         BAR         BEN         BER         CAM         CHA         GEL 
##  0.89583333  0.85000000  0.92083333  0.61666667  0.58750000  0.45000000 
##         HAI         HAN         HOC         HUN         INJ         INW 
##  0.64583333  0.71250000  0.87142857  0.85833333  0.90909091  0.66250000 
##         JAF         KAC         KAG         KAN         KAT         KLP 
##  0.00000000 -0.03333333  0.89583333  0.84090909  0.46666667  0.76250000 
##         KUN         LAM         MAT         OKI         PAL         POL 
##  0.78750000  0.75555556  0.70833333  0.83750000  0.86818182  0.35000000 
##         POP         QNC         SON         SPB         SPC         SPM 
##  0.70416667  0.86363636  0.23333333  0.61875000  0.28333333 -0.27000000 
##         SPS         SSK         SUF         SUU         TAI         TUA 
##  0.83125000  0.44583333  0.59166667  0.81666667  0.80000000  0.35555556 
##         TUH         UTS         YUN 
##  0.68750000  0.87916667  0.20000000 
## 
## $pop.score$`30`
##        BAR        BEN        BER        CAM        CHA        GEL        HAI 
##  0.8291667  0.7166667  0.8541667  0.5958333  0.5500000  0.3500000  0.7833333 
##        HAN        HOC        HUN        INJ        INW        JAF        KAC 
##  0.6125000  0.7571429  0.8000000  0.8090909  0.6625000  0.4500000  0.4333333 
##        KAG        KAN        KAT        KLP        KUN        LAM        MAT 
##  0.8875000  0.8181818  0.4500000  0.6000000  0.6500000  0.5888889  0.7375000 
##        OKI        PAL        POL        POP        QNC        SON        SPB 
##  0.9041667  0.8409091  0.4250000  0.7208333  0.8500000  0.2166667  0.5750000 
##        SPC        SPM        SPS        SSK        SUF        SUU        TAI 
##  0.2166667 -0.2800000  0.8187500  0.3583333  0.6333333  0.7916667  0.8142857 
##        TUA        TUH        UTS        YUN 
##  0.5444444  0.6708333  0.8666667  0.4722222 
## 
## $pop.score$`35`
##        BAR        BEN        BER        CAM        CHA        GEL        HAI 
##  0.8583333  0.7708333  0.8333333  0.5041667  0.6708333  0.5750000  0.7750000 
##        HAN        HOC        HUN        INJ        INW        JAF        KAC 
##  0.5500000  0.7571429  0.7833333  0.7954545  0.6875000  0.3250000  0.3416667 
##        KAG        KAN        KAT        KLP        KUN        LAM        MAT 
##  0.8666667  0.8090909  0.4416667  0.5875000  0.5250000  0.6833333  0.6750000 
##        OKI        PAL        POL        POP        QNC        SON        SPB 
##  0.8250000  0.8227273  0.3750000  0.6458333  0.8272727  0.1666667  0.4750000 
##        SPC        SPM        SPS        SSK        SUF        SUU        TAI 
##  0.1916667 -0.1200000  0.8125000  0.2958333  0.5833333  0.6500000  0.6928571 
##        TUA        TUH        UTS        YUN 
##  0.3833333  0.7041667  0.8541667  0.5833333 
## 
## $pop.score$`40`
##        BAR        BEN        BER        CAM        CHA        GEL        HAI 
##  0.7833333  0.6458333  0.8000000  0.6166667  0.7250000  0.3500000  0.7500000 
##        HAN        HOC        HUN        INJ        INW        JAF        KAC 
##  0.5000000  0.6714286  0.8416667  0.7409091  0.5750000  0.3750000  0.3500000 
##        KAG        KAN        KAT        KLP        KUN        LAM        MAT 
##  0.7958333  0.7954545  0.3583333  0.5625000  0.5625000  0.5500000  0.5500000 
##        OKI        PAL        POL        POP        QNC        SON        SPB 
##  0.8333333  0.8045455  0.2750000  0.6708333  0.7590909  0.1500000  0.5875000 
##        SPC        SPM        SPS        SSK        SUF        SUU        TAI 
##  0.1416667 -0.2400000  0.6312500  0.2708333  0.5083333  0.6250000  0.6857143 
##        TUA        TUH        UTS        YUN 
##  0.3277778  0.7125000  0.7458333  0.5000000 
## 
## 
## $mean
##         1         5        10        15        20        25        30        35 
## 0.3045015 0.4545556 0.5477519 0.5982943 0.6311963 0.6220776 0.6250149 0.6047319 
##        40 
## 0.5612480 
## 
## $pred
## $pred$x
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 
## $pred$y
##  [1] 0.3045015 0.3457581 0.3855216 0.4222886 0.4545556 0.4812350 0.5029027
##  [8] 0.5205503 0.5351694 0.5477519 0.5591570 0.5697155 0.5796256 0.5890857
## [15] 0.5982943 0.6073191 0.6157061 0.6228710 0.6282293 0.6311963 0.6314468
## [22] 0.6296908 0.6268978 0.6240370 0.6220776 0.6217111 0.6225186 0.6238029
## [29] 0.6248673 0.6250149 0.6236858 0.6208680 0.6166864 0.6112661 0.6047319
## [36] 0.5972212 0.5889195 0.5800248 0.5707350 0.5612480
## 
## 
## $best
## [1] 21
#Optimal number = 30

Run DAPC with object

dapc_snp1 <- dapc(snp3, n.pca = 30, n.da = 6, grp = populations)

Save it

saveRDS(
  dapc_snp, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "iberia_turkey_US", "dapc_snp.rds"
  )
)

To load it

dapc_snp <- readRDS(
  here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "iberia_turkey_US", "dapc_snp.rds"
  )
)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b",  "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")

8.3 Plot with new colors - by region

PCs 1 & 2

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/iberia_turkey_US/dapc_snp_iberia_turkey_US_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

PCs 1 & 2 - colored by region

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/iberia_turkey_US/dapc_snp_iberia_turkey_US_region_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,3:4,6:7,10:14,17)

myCol2 <- c("#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9",  "#c41A1C", "#66C2A5", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "goldenrod", "#c41A1C", "#c41A1C")

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
good.shapes = c(1:25,35:38,3:4,6:7,10:14,17)

myCol2 <- c("#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9",  "#c41A1C", "#66C2A5", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "goldenrod", "#c41A1C", "#c41A1C")

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

PCs 1 & 3

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/iberia_turkey_US/dapc_snp_iberia_turkey_US_region_PC1_3.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,3:4,6:7,10:14,17)

myCol2 <- c("#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9",  "#c41A1C", "#66C2A5", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "goldenrod", "#c41A1C", "#c41A1C")

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

dev.off()
good.shapes = c(1:25,35:38,3:4,6:7,10:14,17)

myCol2 <- c("#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9",  "#c41A1C", "#66C2A5", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "goldenrod", "#c41A1C", "#c41A1C")

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

PCs 1 & 4

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/iberia_turkey_US/dapc_snp_iberia_turkey_US_region_PC1_4.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,3:4,6:7,10:14,17)

myCol2 <- c("#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9",  "#c41A1C", "#66C2A5", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "goldenrod", "#c41A1C", "#c41A1C")

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

dev.off()
good.shapes = c(1:25,35:38,3:4,6:7,10:14,17)

myCol2 <- c("#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C",  "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9",  "#c41A1C", "#66C2A5", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "goldenrod", "#c41A1C", "#c41A1C")

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

9. DAPC for all europe

Using r2<0.01 LD pruning and MAF 1% (scaled and plotting additional PCs)

echo "FRS
STS
POP
POL
SPB
SPS
SPC
BAR
SPM
IMP
ITG
BRE
DES
TRE
ITB
CES
ROM
ITR
SIC
ITP
MAL
SLO
CRO
ALV
ALD
TIR
SER
GRA
GRC
ROS
BUL
TUA
TUH
SEV
ALU
KER
KRA
SOC
TIK
RAR
GES
ARM
" > euro_global/output/neuroadmixture/europe_all.txt

Create files

cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--keep-fam output/neuroadmixture/europe_all.txt \
--bfile output/snps_sets/r2_0.01_b \
--make-bed \
--out output/dapc/MAF_1/dapc_europe_MAF01 \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_europe_MAF01.log

22642 variants loaded from .bim file. –keep-fam: 410 people remaining. Total genotyping rate in remaining samples is 0.970962. 22642 variants and 410 people pass filters and QC.

Convert to raw format

plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/MAF_1/dapc_europe_MAF01 \
--recodeA \
--out output/dapc/MAF_1/dapc_europe_MAF01 \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_europe_MAF01.log

22642 variants loaded from .bim file. 22642 variants and 287 people pass filters and QC.

Clean env & memory

# Remove all objects from the environment
rm(list = ls())

# Run the garbage collector to free up memory
gc()
##            used  (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells  5971456 319.0   16401712  876.0  24799007 1324.5
## Vcells 10193610  77.8  177936365 1357.6 848466578 6473.3

Import the data and covert it to genind format

# import the data
snp <-
  read.PLINK(
    here("euro_global/output/dapc/MAF_1/dapc_europe_MAF01.raw"),
    quiet = FALSE,
    chunkSize = 1000,
    parallel = require("parallel"),
    n.cores = 4
  )
## 
##  Reading PLINK raw format into a genlight object... 
## 
## 
##  Reading loci information... 
## 
##  Reading and converting genotypes... 
## .
##  Building final object... 
## 
## ...done.
nInd(snp)
## [1] 410
nLoc(snp)
## [1] 22642
nPop(snp)
## [1] 41
indNames(snp)
##   [1] "1065" "1066" "1067" "1068" "1069" "1070" "1071" "1072" "1073" "1074"
##  [11] "1075" "1076" "1077" "1078" "1079" "1080" "1081" "1082" "1083" "1084"
##  [21] "1085" "1086" "1087" "1088" "1109" "1110" "1111" "1112" "1113" "1114"
##  [31] "1115" "1116" "1117" "1118" "1119" "1120" "1121" "1122" "1123" "1124"
##  [41] "1125" "1126" "1127" "1128" "1129" "1130" "1131" "1132" "1133" "1134"
##  [51] "1135" "1136" "1137" "1138" "1139" "1140" "1141" "1142" "1143" "1144"
##  [61] "1145" "1146" "1147" "1148" "1149" "1150" "1151" "1152" "1153" "1154"
##  [71] "1155" "1156" "1157" "1158" "1159" "1160" "1185" "1186" "1187" "1188"
##  [81] "1189" "1190" "1191" "1192" "1193" "1194" "1195" "1201" "1202" "1203"
##  [91] "1204" "1205" "1206" "1207" "1208" "1209" "1210" "1211" "1212" "1213"
## [101] "1218" "1219" "1220" "1221" "1222" "1223" "1224" "1225" "1226" "1227"
## [111] "1228" "1229" "1230" "1232" "1233" "1234" "1237" "1287" "1288" "1289"
## [121] "1292" "1293" "1294" "1295" "1426" "1427" "1428" "1429" "1430" "1431"
## [131] "1432" "1433" "1434" "1435" "1436" "1437" "1438" "1439" "1440" "1441"
## [141] "1443" "1444" "1446" "1447" "1449" "1451" "1452" "1454" "1456" "1458"
## [151] "1460" "1461" "159"  "162"  "167"  "169"  "170"  "171"  "172"  "173" 
## [161] "193"  "194"  "195"  "196"  "197"  "198"  "199"  "200"  "201"  "202" 
## [171] "203"  "204"  "2187" "2188" "2189" "2191" "2192" "2193" "2194" "2195"
## [181] "2202" "2215" "2216" "2217" "2218" "279"  "280"  "281"  "282"  "283" 
## [191] "284"  "285"  "286"  "287"  "289"  "290"  "291"  "701"  "702"  "703" 
## [201] "704"  "705"  "706"  "707"  "708"  "709"  "710"  "711"  "712"  "713" 
## [211] "714"  "715"  "716"  "717"  "718"  "719"  "720"  "721"  "722"  "723" 
## [221] "724"  "725"  "726"  "727"  "728"  "729"  "730"  "731"  "732"  "733" 
## [231] "735"  "736"  "737"  "741"  "742"  "743"  "744"  "745"  "746"  "747" 
## [241] "749"  "750"  "751"  "752"  "753"  "754"  "755"  "756"  "757"  "758" 
## [251] "759"  "760"  "761"  "762"  "763"  "764"  "765"  "766"  "769"  "770" 
## [261] "771"  "772"  "773"  "774"  "775"  "776"  "777"  "778"  "781"  "782" 
## [271] "784"  "785"  "786"  "787"  "788"  "789"  "790"  "791"  "792"  "793" 
## [281] "794"  "795"  "801"  "802"  "803"  "804"  "805"  "806"  "807"  "808" 
## [291] "809"  "810"  "811"  "812"  "813"  "814"  "815"  "816"  "817"  "818" 
## [301] "819"  "820"  "821"  "822"  "824"  "825"  "826"  "827"  "829"  "830" 
## [311] "831"  "833"  "834"  "835"  "836"  "837"  "838"  "839"  "840"  "841" 
## [321] "842"  "843"  "844"  "845"  "846"  "847"  "848"  "849"  "850"  "851" 
## [331] "852"  "853"  "854"  "855"  "856"  "857"  "859"  "860"  "861"  "862" 
## [341] "863"  "864"  "865"  "866"  "867"  "868"  "869"  "870"  "871"  "872" 
## [351] "873"  "874"  "875"  "876"  "877"  "878"  "879"  "880"  "881"  "882" 
## [361] "883"  "884"  "885"  "886"  "887"  "888"  "889"  "890"  "891"  "892" 
## [371] "893"  "894"  "901"  "902"  "903"  "904"  "905"  "906"  "907"  "908" 
## [381] "909"  "910"  "911"  "912"  "913"  "915"  "916"  "917"  "918"  "919" 
## [391] "920"  "921"  "922"  "923"  "924"  "925"  "926"  "927"  "928"  "929" 
## [401] "930"  "931"  "932"  "933"  "934"  "935"  "936"  "937"  "938"  "939"
# convert to genind
snp2 <- gl2gi(snp, probar = TRUE, verbose = NULL)
## Starting gl2gi 
##   Processing genlight object with SNP data
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi

Save it

saveRDS(
  snp2, here(
    "euro_global/output/dapc/MAF_1/snp2_europe_all.rds"
  )
)

To load it

snp2 <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/snp2_europe_all.rds"
  )
)

9.1 Scale

snp3 <- scaleGen(snp2, NA.method="mean")
class(snp3)

Save it

saveRDS(
  snp3, here(
    "euro_global/output/dapc/MAF_1/snp3_europe_all.rds"
  )
)

To load it

snp3 <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/snp3_europe_all.rds"
  )
)
dim(snp3)
## [1]   410 45220
snp3[1:5,1:5]
##      AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1065        1.0694662       -1.0694662       0.07327617      -0.07327617
## 1066        1.0694662       -1.0694662       1.37950360      -1.37950360
## 1067        1.0694662       -1.0694662       1.37950360      -1.37950360
## 1068       -0.2365789        0.2365789       0.07327617      -0.07327617
## 1069        1.0694662       -1.0694662       1.37950360      -1.37950360
##      AX-583036983_C.A
## 1065        0.1719037
## 1066        1.5336507
## 1067        1.5336507
## 1068        0.1719037
## 1069        1.5336507
# Get the populations from the genlight object
populations <- snp$pop

9.2 Find clusters

grp <- find.clusters(snp3, max.n.clust=20)
#retained 400
#Choose the number of clusters (>=2): 5

Save it

saveRDS(
  grp, here(
    "euro_global/output/dapc/MAF_1/grp_europe_all.rds"
  )
)

To load it

grp <- readRDS(
  here(
    "euro_global/output/dapc/MAF_1/grp_europe_all.rds"
  )
)

Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.

xvalDapc(snp3, populations, n.pca.max = 200, n.da = NULL,
              training.set = 0.9, result = c("groupMean", "overall"),
              center = TRUE, scale = FALSE,
              n.pca=NULL, n.rep = 30, xval.plot = TRUE) 

$n.pca: 100 first PCs of PCA used $n.da: 40 discriminant functions saved $var (proportion of conserved variance): 0.518

Run dapc using these #s from CV

dapc1 <- dapc(snp3, snp$pop) 
# PCs retained
# discriminant functions retained

Save the genind object

saveRDS(dapc1, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc1.rds"
))

Load the genind object

dapc1 <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc1.rds"
))

Calculating the optimum PC number to rerun DAPC

optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs 

## $pop.score
## $pop.score$`1`
##          ALD          ALU          ALV          ARM          BAR          BRE 
##  0.000000000  0.833333333  0.008333333  0.075000000  0.229166667  0.284615385 
##          BUL          CES          CRO          DES          FRS          GES 
## -0.020000000 -0.132142857 -0.058333333  0.303125000  0.141666667 -0.004166667 
##          GRA          GRC          IMP          ITB          ITP          ITR 
##  0.795454545  0.000000000  0.000000000  0.000000000 -0.022222222  0.225000000 
##          KER          KRA          MAL          POL          POP          RAR 
##  0.500000000  0.083333333 -0.025000000  0.000000000  0.166666667  0.454166667 
##          ROM          ROS          SER          SEV          SIC          SLO 
##  0.000000000 -0.045454545  0.000000000 -0.020833333 -0.022222222  0.166666667 
##          SOC          SPB          SPC          SPM          SPS          STS 
## -0.008333333 -0.031250000  0.000000000  0.000000000  0.000000000  0.250000000 
##          TIK          TIR          TRE          TUA          TUH 
##  0.570833333  0.000000000  0.445833333  0.000000000 -0.020833333 
## 
## $pop.score$`10`
##         ALD         ALU         ALV         ARM         BAR         BRE 
##  0.75000000  0.62500000  0.41666667  0.80000000  0.85833333  0.76538462 
##         BUL         CES         CRO         DES         FRS         GES 
##  0.57500000  0.86071429  0.53750000  0.87812500  0.59583333  0.22500000 
##         GRA         GRC         IMP         ITB         ITP         ITR 
##  0.91363636  0.53000000 -0.08750000  0.15000000  0.68888889  0.72083333 
##         KER         KRA         MAL         POL         POP         RAR 
##  0.39166667  0.07916667  0.89166667  0.37500000  0.06250000 -0.08750000 
##         ROM         ROS         SER         SEV         SIC         SLO 
##  0.37500000  0.74090909  0.90000000  0.75000000  0.40555556  0.86666667 
##         SOC         SPB         SPC         SPM         SPS         STS 
##  0.26250000  0.83750000  0.58333333 -0.14000000  0.80000000  0.66666667 
##         TIK         TIR         TRE         TUA         TUH 
##  0.66666667  0.10000000  0.48333333  0.23333333  0.42083333 
## 
## $pop.score$`20`
##          ALD          ALU          ALV          ARM          BAR          BRE 
##  0.675000000  0.816666667  0.354166667  0.885000000  0.900000000  0.738461538 
##          BUL          CES          CRO          DES          FRS          GES 
##  0.665000000  0.871428571  0.625000000  0.815625000  0.700000000  0.525000000 
##          GRA          GRC          IMP          ITB          ITP          ITR 
##  0.868181818  0.640000000  0.250000000  0.530000000  0.855555556  0.687500000 
##          KER          KRA          MAL          POL          POP          RAR 
##  0.625000000 -0.004166667  0.712500000  0.475000000  0.500000000  0.341666667 
##          ROM          ROS          SER          SEV          SIC          SLO 
##  0.675000000  0.868181818  0.712500000  0.825000000  0.483333333  0.862500000 
##          SOC          SPB          SPC          SPM          SPS          STS 
##  0.354166667  0.656250000  0.300000000 -0.210000000  0.693750000  0.750000000 
##          TIK          TIR          TRE          TUA          TUH 
##  0.508333333  0.287500000  0.604166667  0.083333333  0.625000000 
## 
## $pop.score$`30`
##         ALD         ALU         ALV         ARM         BAR         BRE 
##  0.58000000  0.61666667  0.46250000  0.76000000  0.78333333  0.71923077 
##         BUL         CES         CRO         DES         FRS         GES 
##  0.65500000  0.83571429  0.55000000  0.78437500  0.64583333  0.46250000 
##         GRA         GRC         IMP         ITB         ITP         ITR 
##  0.77727273  0.54000000  0.01250000  0.42000000  0.68888889  0.55000000 
##         KER         KRA         MAL         POL         POP         RAR 
##  0.76250000  0.03333333  0.62083333  0.40000000  0.37500000  0.55000000 
##         ROM         ROS         SER         SEV         SIC         SLO 
##  0.55000000  0.78181818  0.51250000  0.80833333  0.41666667  0.79166667 
##         SOC         SPB         SPC         SPM         SPS         STS 
##  0.31666667  0.62500000  0.13333333 -0.38000000  0.63125000  0.72916667 
##         TIK         TIR         TRE         TUA         TUH 
##  0.66250000  0.16250000  0.65833333  0.19444444  0.79583333 
## 
## $pop.score$`40`
##        ALD        ALU        ALV        ARM        BAR        BRE        BUL 
##  0.4750000  0.7083333  0.3041667  0.7350000  0.7416667  0.6307692  0.7200000 
##        CES        CRO        DES        FRS        GES        GRA        GRC 
##  0.7714286  0.6500000  0.7468750  0.5333333  0.3083333  0.7227273  0.5450000 
##        IMP        ITB        ITP        ITR        KER        KRA        MAL 
## -0.0750000  0.3000000  0.6277778  0.4958333  0.6583333  0.0750000  0.6708333 
##        POL        POP        RAR        ROM        ROS        SER        SEV 
##  0.1000000  0.3916667  0.6750000  0.4375000  0.6909091  0.3625000  0.7291667 
##        SIC        SLO        SOC        SPB        SPC        SPM        SPS 
##  0.3277778  0.7083333  0.2875000  0.5000000  0.0250000 -0.5200000  0.6312500 
##        STS        TIK        TIR        TRE        TUA        TUH 
##  0.6208333  0.5583333 -0.0750000  0.5541667  0.3166667  0.6666667 
## 
## $pop.score$`50`
##         ALD         ALU         ALV         ARM         BAR         BRE 
##  0.40500000  0.69583333  0.31666667  0.65000000  0.67916667  0.65769231 
##         BUL         CES         CRO         DES         FRS         GES 
##  0.57000000  0.65000000  0.52500000  0.71875000  0.57083333  0.22083333 
##         GRA         GRC         IMP         ITB         ITP         ITR 
##  0.63181818  0.38000000 -0.20000000  0.17000000  0.55555556  0.56666667 
##         KER         KRA         MAL         POL         POP         RAR 
##  0.55000000  0.14166667  0.64166667  0.15000000  0.42916667  0.60416667 
##         ROM         ROS         SER         SEV         SIC         SLO 
##  0.32500000  0.65909091  0.36250000  0.65833333  0.36666667  0.62083333 
##         SOC         SPB         SPC         SPM         SPS         STS 
##  0.48750000  0.42500000 -0.01666667 -0.67000000  0.58125000  0.58750000 
##         TIK         TIR         TRE         TUA         TUH 
##  0.57916667  0.08750000  0.54583333  0.12222222  0.63750000 
## 
## $pop.score$`60`
##         ALD         ALU         ALV         ARM         BAR         BRE 
##  0.41500000  0.60000000  0.28333333  0.50500000  0.56250000  0.61538462 
##         BUL         CES         CRO         DES         FRS         GES 
##  0.59000000  0.62142857  0.59166667  0.66250000  0.57083333  0.25000000 
##         GRA         GRC         IMP         ITB         ITP         ITR 
##  0.57272727  0.35500000 -0.32500000  0.22000000  0.54444444  0.38750000 
##         KER         KRA         MAL         POL         POP         RAR 
##  0.48333333  0.26666667  0.58333333  0.00000000  0.40416667  0.49583333 
##         ROM         ROS         SER         SEV         SIC         SLO 
##  0.20000000  0.55909091  0.23750000  0.57083333  0.39444444  0.49166667 
##         SOC         SPB         SPC         SPM         SPS         STS 
##  0.51666667  0.31875000  0.06666667 -0.69000000  0.47500000  0.48750000 
##         TIK         TIR         TRE         TUA         TUH 
##  0.56666667  0.06250000  0.57500000  0.01666667  0.47916667 
## 
## $pop.score$`70`
##         ALD         ALU         ALV         ARM         BAR         BRE 
##  0.34000000  0.50416667  0.34583333  0.50000000  0.48750000  0.53076923 
##         BUL         CES         CRO         DES         FRS         GES 
##  0.47500000  0.53571429  0.47500000  0.55625000  0.50833333  0.16250000 
##         GRA         GRC         IMP         ITB         ITP         ITR 
##  0.42727273  0.22500000 -0.32500000  0.20000000  0.43333333  0.45416667 
##         KER         KRA         MAL         POL         POP         RAR 
##  0.47916667  0.27083333  0.42916667  0.02500000  0.42083333  0.54166667 
##         ROM         ROS         SER         SEV         SIC         SLO 
##  0.21250000  0.47272727  0.05000000  0.50416667  0.23333333  0.55416667 
##         SOC         SPB         SPC         SPM         SPS         STS 
##  0.42500000  0.27500000 -0.04166667 -0.80000000  0.34375000  0.50833333 
##         TIK         TIR         TRE         TUA         TUH 
##  0.53333333  0.20000000  0.47916667  0.10000000  0.37916667 
## 
## $pop.score$`80`
##         ALD         ALU         ALV         ARM         BAR         BRE 
##  0.25500000  0.44583333  0.24166667  0.40000000  0.47500000  0.44230769 
##         BUL         CES         CRO         DES         FRS         GES 
##  0.31500000  0.50714286  0.42916667  0.53125000  0.42083333  0.31666667 
##         GRA         GRC         IMP         ITB         ITP         ITR 
##  0.38636364  0.29000000 -0.17500000  0.11000000  0.26111111  0.20833333 
##         KER         KRA         MAL         POL         POP         RAR 
##  0.31666667  0.20416667  0.32916667  0.00000000  0.37083333  0.43333333 
##         ROM         ROS         SER         SEV         SIC         SLO 
##  0.11250000  0.38181818  0.10000000  0.43750000  0.26666667  0.43750000 
##         SOC         SPB         SPC         SPM         SPS         STS 
##  0.32083333  0.11875000 -0.13333333 -0.70000000  0.27500000  0.50833333 
##         TIK         TIR         TRE         TUA         TUH 
##  0.40000000  0.10000000  0.45000000  0.02777778  0.31250000 
## 
## $pop.score$`90`
##         ALD         ALU         ALV         ARM         BAR         BRE 
##  0.19500000  0.40000000  0.22500000  0.31000000  0.34166667  0.42692308 
##         BUL         CES         CRO         DES         FRS         GES 
##  0.33500000  0.39642857  0.35000000  0.44375000  0.37916667  0.17916667 
##         GRA         GRC         IMP         ITB         ITP         ITR 
##  0.28636364  0.21500000 -0.21250000  0.11000000  0.27777778  0.24583333 
##         KER         KRA         MAL         POL         POP         RAR 
##  0.33750000  0.13750000  0.27083333  0.00000000  0.27916667  0.34583333 
##         ROM         ROS         SER         SEV         SIC         SLO 
##  0.06250000  0.36363636  0.05000000  0.37916667  0.24444444  0.34166667 
##         SOC         SPB         SPC         SPM         SPS         STS 
##  0.39583333  0.05000000 -0.20833333 -0.73000000  0.09375000  0.30416667 
##         TIK         TIR         TRE         TUA         TUH 
##  0.34583333  0.05000000  0.31250000 -0.06666667  0.29166667 
## 
## $pop.score$`100`
##         ALD         ALU         ALV         ARM         BAR         BRE 
##  0.07000000  0.28750000  0.15833333  0.22000000  0.24166667  0.35000000 
##         BUL         CES         CRO         DES         FRS         GES 
##  0.19500000  0.31071429  0.32500000  0.38125000  0.30416667  0.07083333 
##         GRA         GRC         IMP         ITB         ITP         ITR 
##  0.33181818  0.15500000 -0.21250000  0.05000000  0.20555556  0.17083333 
##         KER         KRA         MAL         POL         POP         RAR 
##  0.34166667  0.13333333  0.30416667  0.02500000  0.14166667  0.25833333 
##         ROM         ROS         SER         SEV         SIC         SLO 
##  0.06250000  0.26363636  0.01250000  0.31666667  0.18333333  0.31250000 
##         SOC         SPB         SPC         SPM         SPS         STS 
##  0.29166667  0.05625000 -0.10000000 -0.73000000  0.03125000  0.22500000 
##         TIK         TIR         TRE         TUA         TUH 
##  0.33333333  0.03750000  0.34166667 -0.03333333  0.21666667 
## 
## 
## $mean
##         1        10        20        30        40        50        60        70 
## 0.1249367 0.5236150 0.5885756 0.5359877 0.4715532 0.4302369 0.3800920 0.3275972 
##        80        90       100 
## 0.2666021 0.2086725 0.1619628 
## 
## $pred
## $pred$x
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100
## 
## $pred$y
##   [1] 0.1249367 0.1779653 0.2303467 0.2814228 0.3305355 0.3770268 0.4202384
##   [8] 0.4595124 0.4941906 0.5236150 0.5473318 0.5657054 0.5793043 0.5886973
##  [15] 0.5944530 0.5971401 0.5973272 0.5955831 0.5924763 0.5885756 0.5843546
##  [22] 0.5799069 0.5752310 0.5703254 0.5651887 0.5598195 0.5542162 0.5483774
##  [29] 0.5423018 0.5359877 0.5294459 0.5227356 0.5159282 0.5090951 0.5023076
##  [36] 0.4956372 0.4891551 0.4829329 0.4770418 0.4715532 0.4665143 0.4618750
##  [43] 0.4575611 0.4534983 0.4496124 0.4458291 0.4420742 0.4382733 0.4343523
##  [50] 0.4302369 0.4258708 0.4212700 0.4164685 0.4115003 0.4063992 0.4011994
##  [57] 0.3959348 0.3906394 0.3853471 0.3800920 0.3748985 0.3697533 0.3646337
##  [64] 0.3595168 0.3543799 0.3492002 0.3439548 0.3386210 0.3331761 0.3275972
##  [71] 0.3218686 0.3160036 0.3100223 0.3039451 0.2977922 0.2915839 0.2853405
##  [78] 0.2790822 0.2728293 0.2666021 0.2604194 0.2542936 0.2482361 0.2422577
##  [85] 0.2363697 0.2305831 0.2249089 0.2193584 0.2139426 0.2086725 0.2035549
##  [92] 0.1985784 0.1937273 0.1889860 0.1843387 0.1797698 0.1752634 0.1708039
##  [99] 0.1663756 0.1619628
## 
## 
## $best
## [1] 17
#Optimal number = 17

Run DAPC with object

dapc_snp <- dapc(snp3, n.pca = 17, n.da = 6, grp = populations)

Save it

saveRDS(
  dapc_snp, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc_snp.rds"
  )
)

To load it

dapc_snp <- readRDS(
  here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc_snp.rds"
  )
)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b",  "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")

9.3 Plot with new colors - by region

PCs 1 & 2

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_europe_all_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

PCs 1 & 3

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_europe_all_PC1_3.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

dev.off()
good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

PCs 1 & 4

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_europe_all_PC1_4.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

dev.off()
good.shapes = c(1:25,35:38,1:25)

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

PCs 1 & 2

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_europe_all_PC1_2_region.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)

myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2",  "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod",  "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2",  "goldenrod", "#a113b2", "#a113b2", "#a113b2",  "#a113b2",  "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod")
           
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)

myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2",  "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod",  "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2",  "goldenrod", "#a113b2", "#a113b2", "#a113b2",  "#a113b2",  "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod")
           
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

PCs 1 & 3

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_europe_all_PC1_3_region.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)

myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2",  "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod",  "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2",  "goldenrod", "#a113b2", "#a113b2", "#a113b2",  "#a113b2",  "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod")
           
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

dev.off()
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)

myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2",  "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod",  "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2",  "goldenrod", "#a113b2", "#a113b2", "#a113b2",  "#a113b2",  "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod")
           
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

PCs 1 & 4

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_europe_all_PC1_4_region.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)

myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2",  "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod",  "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2",  "goldenrod", "#a113b2", "#a113b2", "#a113b2",  "#a113b2",  "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod")
           
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

dev.off()
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)

myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2",  "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod",  "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2",  "goldenrod", "#a113b2", "#a113b2", "#a113b2",  "#a113b2",  "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod")
           
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

PCs 1 & 5

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_europe_all_PC1_5_region.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)

myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2",  "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod",  "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2",  "goldenrod", "#a113b2", "#a113b2", "#a113b2",  "#a113b2",  "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod")
           
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=5)

dev.off()
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)

myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2",  "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod",  "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2",  "goldenrod", "#a113b2", "#a113b2", "#a113b2",  "#a113b2",  "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod")
           
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=5)

PCs 1 & 6

pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_europe_all_PC1_6_region.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches


good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)

myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2",  "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod",  "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2",  "goldenrod", "#a113b2", "#a113b2", "#a113b2",  "#a113b2",  "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod")
           
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=6)

dev.off()
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)

myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2",  "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod",  "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2",  "goldenrod", "#a113b2", "#a113b2", "#a113b2",  "#a113b2",  "#FF7F00", "goldenrod",  "#a113b2",  "#a113b2", "goldenrod", "goldenrod")
           
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=6)

9.4 Plot grouped by country (instead of pop)

Import Sample Locations

sampling_loc <- read.csv(
  here("scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sampling_loc_europe_all.csv"))
saveRDS(sampling_loc, here("scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sampling_loc_europe_all.rds"))
sampling_loc <- readRDS(here("scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sampling_loc_europe_all.rds"))
head(sampling_loc)
##    Pop_City  Country Latitude Longitude Continent Abbreviation Year
## 1     Vlore  Albania 40.46600  19.48970    Europe          ALV 2020
## 2    Durres  Albania 41.29704  19.50373    Europe          ALD 2018
## 3    Tirana  Albania 41.31473  19.83172    Europe          TIR 2017
## 4    Ijevan  Armenia 40.87971  45.14764    Europe          ARM 2020
## 5       Lom Bulgaria 43.80489  23.23634    Europe          BUL 2019
## 6 Dubrovnik  Croatia 42.60654  18.22661    Europe          CRO 2017
##            Region   Subregion order order2 orderold
## 1 Southern Europe East Europe    32     24       24
## 2 Southern Europe East Europe    33     25       25
## 3 Southern Europe East Europe    34     26       26
## 4  Eastern Europe East Europe    50     42       42
## 5  Eastern Europe East Europe    39     31       31
## 6 Southern Europe East Europe    31     23       23
strata(snp2) <- data.frame(other(snp2))

# Currently set on just 
head(pop(snp2)) 
## [1] SOC SOC SOC SOC SOC SOC
## 41 Levels: ALD ALU ALV ARM BAR BRE BUL CES CRO DES FRS GES GRA GRC IMP ... TUH

Load the csv

countr <- read.csv(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "europe_all", "DAPC_europe_all.csv"
))
df <- as.data.frame(countr)

head(df)
##   pop country
## 1 SOC  Russia
## 2 SOC  Russia
## 3 SOC  Russia
## 4 SOC  Russia
## 5 SOC  Russia
## 6 SOC  Russia
snp2@pop <- as.factor(df$country)
snp2$pop
##   [1] Russia       Russia       Russia       Russia       Russia      
##   [6] Russia       Russia       Russia       Russia       Russia      
##  [11] Russia       Russia       Ukraine      Ukraine      Ukraine     
##  [16] Ukraine      Ukraine      Ukraine      Ukraine      Ukraine     
##  [21] Ukraine      Ukraine      Ukraine      Ukraine      Georgia     
##  [26] Georgia      Georgia      Georgia      Georgia      Georgia     
##  [31] Georgia      Georgia      Georgia      Georgia      Georgia     
##  [36] Georgia      Ukraine      Ukraine      Ukraine      Ukraine     
##  [41] Ukraine      Ukraine      Ukraine      Ukraine      Ukraine     
##  [46] Ukraine      Ukraine      Ukraine      Russia       Russia      
##  [51] Russia       Russia       Russia       Russia       Russia      
##  [56] Russia       Russia       Russia       Russia       Russia      
##  [61] Russia       Russia       Russia       Russia       Russia      
##  [66] Russia       Russia       Russia       Russia       Russia      
##  [71] Russia       Russia       Russia       Russia       Russia      
##  [76] Russia       Italy        Italy        Italy        Italy       
##  [81] Italy        Italy        Italy        Italy        Italy       
##  [86] Italy        Italy        Italy        Ukraine      Ukraine     
##  [91] Ukraine      Ukraine      Ukraine      Ukraine      Ukraine     
##  [96] Ukraine      Ukraine      Ukraine      Ukraine      Ukraine     
## [101] France       France       France       France       France      
## [106] France       France       France       Italy        Italy       
## [111] Italy        Italy        Italy        Italy        Italy       
## [116] Italy        Italy        Italy (1995) Italy (1995) Italy (1995)
## [121] Italy (1995) Italy (1995) Italy (1995) Italy (1995) France      
## [126] France       France       France       Italy (1995) Italy (1995)
## [131] Italy (1995) Italy (1995) Italy (1995) Italy (1995) Italy (1995)
## [136] Italy (1995) Italy (1995) Italy (1995) Italy (1995) Italy (1995)
## [141] Italy (1995) Italy (1995) Italy (1995) Italy (1995) Italy (1995)
## [146] Italy (1995) Italy (1995) Italy (1995) Italy (1995) Italy (1995)
## [151] Italy (1995) Italy (1995) Russia       Russia       Russia      
## [156] Russia       Russia       Russia       Russia       Russia      
## [161] Albania      Albania      Albania      Albania      Italy       
## [166] Italy        Italy        Italy        Italy        Italy       
## [171] Italy        Italy        Italy (1995) Italy (1995) Italy (1995)
## [176] Italy (1995) Italy (1995) Italy (1995) Italy (1995) Italy (1995)
## [181] Greece       Italy (1995) Italy (1995) Italy (1995) Italy (1995)
## [186] Spain        Spain        Spain        Spain        Spain       
## [191] Spain        Spain        Spain        Spain        Spain       
## [196] Spain        Spain        Bulgaria     Bulgaria     Bulgaria    
## [201] Bulgaria     Bulgaria     Bulgaria     Bulgaria     Bulgaria    
## [206] Bulgaria     Bulgaria     Croatia      Croatia      Croatia     
## [211] Croatia      Croatia      Croatia      Croatia      Croatia     
## [216] Croatia      Croatia      Croatia      Croatia      Greece      
## [221] Greece       Greece       Greece       Greece       Greece      
## [226] Greece       Greece       Greece       Greece       Greece      
## [231] Greece       Greece       Greece       Greece       Greece      
## [236] Greece       Greece       Greece       Greece       Italy       
## [241] Italy        Italy        Italy        Italy        Malta       
## [246] Malta        Malta        Malta        Malta        Malta       
## [251] Malta        Malta        Malta        Malta        Malta       
## [256] Malta        Spain        Spain        Spain        Spain       
## [261] Spain        Turkey       Turkey       Turkey       Turkey      
## [266] Turkey       Turkey       Turkey       Turkey       Turkey      
## [271] Turkey       Turkey       Turkey       Turkey       Turkey      
## [276] Turkey       Turkey       Turkey       Turkey       Turkey      
## [281] Turkey       Turkey       Albania      Albania      Albania     
## [286] Albania      Albania      Albania      Albania      Albania     
## [291] Albania      Albania      France       France       France      
## [296] France       France       France       France       France      
## [301] France       France       France       France       Italy       
## [306] Italy        Italy        Italy        Italy        Italy       
## [311] Italy        Italy        Italy        Portugal     Portugal    
## [316] Portugal     Portugal     Portugal     Portugal     Portugal    
## [321] Portugal     Portugal     Portugal     Portugal     Portugal    
## [326] Portugal     Portugal     Romania      Romania      Romania     
## [331] Romania      Romania      Romania      Romania      Romania     
## [336] Romania      Romania      Romania      Serbia       Serbia      
## [341] Serbia       Serbia       Slovenia     Slovenia     Slovenia    
## [346] Slovenia     Slovenia     Slovenia     Slovenia     Slovenia    
## [351] Slovenia     Slovenia     Slovenia     Slovenia     Spain       
## [356] Spain        Spain        Spain        Spain        Spain       
## [361] Spain        Spain        Spain        Spain        Spain       
## [366] Spain        Spain        Spain        Spain        Spain       
## [371] Spain        Spain        Armenia      Armenia      Armenia     
## [376] Armenia      Armenia      Armenia      Armenia      Armenia     
## [381] Armenia      Armenia      Spain        Spain        Spain       
## [386] Spain        Albania      Albania      Albania      Albania     
## [391] Albania      Albania      Albania      Albania      Albania     
## [396] Albania      Albania      Albania      Italy        Italy       
## [401] Italy        Italy        Italy        Italy        Italy       
## [406] Italy        Italy        Italy        Italy        Italy       
## 18 Levels: Albania Armenia Bulgaria Croatia France Georgia Greece ... Ukraine

Save the genind object

saveRDS(snp2, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "europe_all", "snp_country.rds"
))

Load the genind object

snp_country <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "europe_all", "snp_country.rds"
))

Scale

snp_country <- scaleGen(snp_country, NA.method="mean")
## Warning in .local(x, ...): Some scaling values are null.
##  Corresponding alleles are removed.
class(snp_country)
## [1] "matrix" "array"
dim(snp_country)
## [1]   410 45220
snp_country[1:5,1:5]
##      AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1065        1.0694662       -1.0694662       0.07327617      -0.07327617
## 1066        1.0694662       -1.0694662       1.37950360      -1.37950360
## 1067        1.0694662       -1.0694662       1.37950360      -1.37950360
## 1068       -0.2365789        0.2365789       0.07327617      -0.07327617
## 1069        1.0694662       -1.0694662       1.37950360      -1.37950360
##      AX-583036983_C.A
## 1065        0.1719037
## 1066        1.5336507
## 1067        1.5336507
## 1068        0.1719037
## 1069        1.5336507
# Get the populations from the genlight object
populations <- snp2$pop

9.4.1 Find clusters

grp <- find.clusters(snp_country, max.n.clust=12)
#retained 350
#Choose the number of clusters (>=2): 5

Save the genind object

saveRDS(grp, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "grp_countries.rds"
))

Load the genind object

grp <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "grp_countries.rds"
))
table(pop(snp2), grp$grp)
##               
##                 1  2  3  4  5
##   Albania       0 26  0  0  0
##   Armenia       0  0 10  0  0
##   Bulgaria     10  0  0  0  0
##   Croatia       0 12  0  0  0
##   France       24  0  0  0  0
##   Georgia       0  0 12  0  0
##   Greece        0 21  0  0  0
##   Italy        55  0  0  0  0
##   Italy (1995)  0  0  0  0 43
##   Malta        12  0  0  0  0
##   Portugal     14  0  0  0  0
##   Romania      11  0  0  0  0
##   Russia        0  0 48  0  0
##   Serbia        4  0  0  0  0
##   Slovenia     12  0  0  0  0
##   Spain         0  0  0 22 17
##   Turkey        3  0  0 18  0
##   Ukraine       0  0 36  0  0

Cross-validation

xvalDapc(snp_country, populations, n.pca.max = 200, n.da = NULL,
              training.set = 0.9, result = c("groupMean", "overall"),
              center = TRUE, scale = FALSE,
              n.pca=NULL, n.rep = 30, xval.plot = TRUE) 

$n.pca: 120 first PCs of PCA used $n.da: 16 discriminant functions saved $var (proportion of conserved variance): 0.571

Run dapc using these #s from CV

dapc1 <- dapc(snp_country, populations)  
#120 PCs retained
#16 discriminant functions retained

Save the genind object

saveRDS(dapc1, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc1_country.rds"
))

Load the genind object

dapc1 <- readRDS(here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc1_country.rds"
))

Calculating the optimum PC number to rerun DAPC

optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs 

## $pop.score
## $pop.score$`1`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##   0.00000000   0.60000000   0.00000000   0.00000000   0.00000000   0.00000000 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##   0.23809524   0.19727273   0.88372093   0.00000000   0.00000000   0.00000000 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##   0.70937500   0.00000000   0.00000000  -0.02692308   0.00000000   0.22083333 
## 
## $pop.score$`10`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##    0.7942308    0.8400000    0.5300000    0.5916667    0.7145833    0.1750000 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##    0.8880952    0.1345455    0.8174419    0.2666667    0.2750000    0.2045455 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##    0.3687500    0.8375000    0.6000000    0.3820513    0.7166667    0.4222222 
## 
## $pop.score$`20`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##    0.8115385    0.8200000    0.6200000    0.5708333    0.8604167    0.4166667 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##    0.9190476    0.3890909    0.7581395    0.4291667    0.7142857    0.8772727 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##    0.3864583    0.6500000    0.8458333    0.7115385    0.8333333    0.4472222 
## 
## $pop.score$`30`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##    0.7153846    0.7200000    0.6200000    0.5500000    0.7354167    0.4250000 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##    0.8214286    0.5163636    0.7127907    0.4541667    0.5535714    0.7772727 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##    0.4531250    0.4500000    0.7833333    0.6833333    0.7761905    0.5069444 
## 
## $pop.score$`40`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##    0.6480769    0.6500000    0.6650000    0.5791667    0.6645833    0.2541667 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##    0.7380952    0.5545455    0.7151163    0.5541667    0.5500000    0.6363636 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##    0.4739583    0.4750000    0.6500000    0.6038462    0.6976190    0.4750000 
## 
## $pop.score$`50`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##    0.6115385    0.6050000    0.4900000    0.5541667    0.6625000    0.3416667 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##    0.7142857    0.5863636    0.6779070    0.4291667    0.3642857    0.5772727 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##    0.4625000    0.3375000    0.6416667    0.6217949    0.5333333    0.6208333 
## 
## $pop.score$`60`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##    0.6442308    0.4600000    0.4950000    0.5125000    0.5562500    0.2500000 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##    0.6142857    0.5772727    0.6151163    0.3125000    0.4357143    0.4681818 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##    0.4322917    0.1750000    0.5583333    0.5576923    0.4952381    0.6069444 
## 
## $pop.score$`70`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##    0.6384615    0.4100000    0.4250000    0.5041667    0.5229167    0.3958333 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##    0.5976190    0.5136364    0.5988372    0.2583333    0.3678571    0.4045455 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##    0.4406250    0.1125000    0.4500000    0.5217949    0.4357143    0.6027778 
## 
## $pop.score$`80`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##    0.5673077    0.3350000    0.3150000    0.3833333    0.5000000    0.0500000 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##    0.5190476    0.5463636    0.5360465    0.1125000    0.5000000    0.3363636 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##    0.3916667    0.1000000    0.3416667    0.4935897    0.4333333    0.5513889 
## 
## $pop.score$`90`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##    0.4865385    0.2600000    0.2300000    0.2541667    0.4104167    0.0875000 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##    0.4500000    0.4954545    0.4895349    0.1208333    0.3535714    0.2409091 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##    0.4135417    0.0500000    0.3791667    0.4128205    0.3571429    0.5305556 
## 
## $pop.score$`100`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##   0.43076923   0.14500000   0.21500000   0.27500000   0.43541667  -0.07083333 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##   0.39047619   0.46363636   0.45930233   0.09583333   0.37857143   0.24090909 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##   0.34062500   0.01250000   0.24583333   0.35000000   0.27619048   0.48888889 
## 
## $pop.score$`110`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##    0.3653846    0.1750000    0.1500000    0.2291667    0.3770833   -0.1500000 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##    0.3714286    0.4563636    0.4430233    0.1541667    0.2500000    0.1727273 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##    0.3375000    0.0375000    0.2208333    0.3358974    0.2404762    0.4430556 
## 
## $pop.score$`120`
##      Albania      Armenia     Bulgaria      Croatia       France      Georgia 
##    0.3230769    0.1400000    0.1300000    0.2250000    0.3541667   -0.1458333 
##       Greece        Italy Italy (1995)        Malta     Portugal      Romania 
##    0.2952381    0.4218182    0.4058140    0.1041667    0.2392857    0.1409091 
##       Russia       Serbia     Slovenia        Spain       Turkey      Ukraine 
##    0.2500000    0.0250000    0.1541667    0.3217949    0.2071429    0.3736111 
## 
## 
## $mean
##         1        10        20        30        40        50        60        70 
## 0.1567986 0.5310536 0.6700469 0.6252401 0.5880391 0.5462101 0.4870306 0.4555899 
##        80        90       100       110       120 
## 0.3895893 0.3345640 0.2873955 0.2560893 0.2202976 
## 
## $pred
## $pred$x
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120
## 
## $pred$y
##   [1] 0.1567986 0.2040849 0.2509478 0.2969578 0.3416853 0.3847008 0.4255749
##   [8] 0.4638781 0.4991808 0.5310536 0.5591705 0.5836188 0.6045895 0.6222735
##  [15] 0.6368617 0.6485450 0.6575143 0.6639604 0.6680743 0.6700469 0.6700803
##  [22] 0.6684216 0.6653292 0.6610614 0.6558766 0.6500332 0.6437894 0.6374038
##  [29] 0.6311345 0.6252401 0.6199231 0.6151633 0.6108849 0.6070119 0.6034686
##  [36] 0.6001789 0.5970670 0.5940570 0.5910730 0.5880391 0.5848875 0.5815818
##  [43] 0.5780940 0.5743958 0.5704591 0.5662556 0.5617571 0.5569355 0.5517626
##  [50] 0.5462101 0.5402733 0.5340410 0.5276258 0.5211398 0.5146955 0.5084051
##  [57] 0.5023811 0.4967358 0.4915815 0.4870306 0.4831516 0.4798377 0.4769381
##  [64] 0.4743024 0.4717797 0.4692195 0.4664710 0.4633837 0.4598069 0.4555899
##  [71] 0.4506266 0.4449890 0.4387934 0.4321565 0.4251947 0.4180245 0.4107624
##  [78] 0.4035248 0.3964283 0.3895893 0.3830979 0.3769385 0.3710689 0.3654471
##  [85] 0.3600310 0.3547786 0.3496477 0.3445964 0.3395825 0.3345640 0.3295113
##  [92] 0.3244450 0.3193982 0.3144039 0.3094953 0.3047055 0.3000675 0.2956144
##  [99] 0.2913794 0.2873955 0.2836833 0.2802130 0.2769424 0.2738293 0.2708313
## [106] 0.2679061 0.2650116 0.2621055 0.2591455 0.2560893 0.2529040 0.2495941
## [113] 0.2461736 0.2426563 0.2390561 0.2353868 0.2316625 0.2278969 0.2241040
## [120] 0.2202976
## 
## 
## $best
## [1] 21
#Optimal number = 21

Run DAPC with object

dapc_snp1 <- dapc(snp_country, n.pca = 21, n.da = 6, grp = populations)

Save it

saveRDS(
  dapc_snp1, here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc_snp_country.rds"
  )
)

To load it

dapc_snp <- readRDS(
  here("scripts", "RMarkdowns",
  "output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc_snp_country.rds"
  )
)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b",  "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")

9.4.2 Plot

9.4.2.1 PCs 1 & 2
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_2.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches
  

good.shapes = c(1:25,35:38,1:25) 

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
good.shapes = c(1:25,35:38,1:25) 

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

9.4.2.2 PCs 1 & 3
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_3.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches
  

good.shapes = c(1:25,35:38,1:25) 

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

dev.off()
good.shapes = c(1:25,35:38,1:25) 

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

9.4.2.3 PCs 1 & 4
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_4.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches
  

good.shapes = c(1:25,35:38,1:25) 

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

dev.off()
good.shapes = c(1:25,35:38,1:25) 

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

Check R symbols for plot

#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,35:38) 
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
  scale_shape_manual(values=good.shapes[1:N]) +
  geom_point()
## Warning: Removed 698 rows containing missing values or values outside the scale range
## (`geom_point()`).

#1:25,28:31,36,55:57

9.4.3 Plots with colors by region

9.4.3.1 PCs 1 & 2
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_2_region.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches
  

good.shapes = c(1:20,35:38,1:25) 

myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
  

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

dev.off()
good.shapes = c(1:20,35:38,1:25) 

myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
  

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=2)

9.4.3.1 PCs 1 & 3
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_3_region.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches
  

good.shapes = c(1:25,35:38,1:25) 

myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
  

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

dev.off()
good.shapes = c(1:25,35:38,1:25) 

myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
  

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=3)

9.4.3.1 PCs 1 & 4
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_4_region.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches
  

good.shapes = c(1:25,35:38,1:25) 

myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
  

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

dev.off()
good.shapes = c(1:25,35:38,1:25) 

myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
  

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=4)

9.4.3.1 PCs 1 & 5
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_5_region.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches
  

good.shapes = c(1:25,35:38,1:25) 

myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
  

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=5)

dev.off()
good.shapes = c(1:25,35:38,1:25) 

myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
  

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=5)

9.4.3.1 PCs 1 & 6
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_6_region.pdf",   # The directory you want to save the file in
    width = 7, # The width of the plot in inches
    height = 7) # The height of the plot in inches
  

good.shapes = c(1:25,35:38,1:25) 

myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
  

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=6)

dev.off()
good.shapes = c(1:25,35:38,1:25) 

myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
  

op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,  cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft",  xax =1, yax=6)