load libraries
##
## /// adegenet 2.1.10 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature requests: adegenetIssues()
## here() starts at /gpfs/gibbs/pi/caccone/mkc54/albo
## Loading required package: ggplot2
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: dartR.data
## Registered S3 method overwritten by 'pegas':
## method from
## print.amova ade4
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## Registered S3 method overwritten by 'genetics':
## method from
## [.haplotype pegas
## **** Welcome to dartR [Version 2.9.7 ] ****
## Be aware that owing to CRAN requirements and compatibility reasons not all functions of the package may run after the basic installation, as some packages could still be missing. Hence for a most enjoyable experience we recommend to run the function
## gl.install.vanilla.dartR()
## This installs all missing and required packages for your version of dartR. In case something fails during installation please refer to this tutorial: https://github.com/green-striped-gecko/dartR/wiki/Installation-tutorial.
##
## For information how to cite dartR, please use:
## citation('dartR')
## Global verbosity is set to: 2
##
## **** Have fun using dartR! ****
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.1 \
--keep-fam output/fst/pops_4fst.txt \
--make-bed \
--out output/dapc/dapc \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc.log
47484 variants loaded from .bim file. –keep-fam: 407 people remaining. Total genotyping rate in remaining samples is 0.97243. 47484 variants and 407 people pass filters and QC.
Convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/dapc \
--recodeA \
--out output/dapc/dapc \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc.log
47484 variants loaded from .bim file. 47484 variants and 407 people pass filters and QC.
Import the data and covert it to genind format
# import the data
albo <-
read.PLINK(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/dapc/dapc.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
## Loading required package: parallel
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## Starting gl2gi
## Processing genlight object with SNP data
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi
Save the genind object
Load the genind object
## /// GENIND OBJECT /////////
##
## // 407 individuals; 47,484 loci; 94,968 alleles; size: 174.1 Mb
##
## // Basic content
## @tab: 407 x 94968 matrix of allele counts
## @loc.n.all: number of alleles per locus (range: 2-2)
## @loc.fac: locus factor for the 94968 columns of @tab
## @all.names: list of allele names for each locus
## @ploidy: ploidy of each individual (range: 2-2)
## @type: codom
## @call: df2genind(X = xx[, ], sep = "/", ncode = 1, ind.names = x@ind.names,
## pop = x@pop, NA.char = "-", ploidy = 2)
##
## // Optional content
## @pop: population of each individual (group size range: 4-16)
## @other: a list containing: sex phenotype pat mat
Find clusters
grp <- find.clusters(albo1, max.n.clust=40)
#retained 400 PCs
#Choose the number of clusters (>=2): 3
Save it
To load it
## [1] "Kstat" "stat" "grp" "size"
## [1] 235 66 106
##
## 1 2 3
## ALD 10 0 0
## ALU 0 0 12
## ALV 12 0 0
## ARM 0 0 10
## BAR 12 0 0
## BRE 0 13 0
## BUL 10 0 0
## CES 0 14 0
## CRO 12 0 0
## DES 0 16 0
## FRS 12 0 0
## GES 0 0 12
## GRA 11 0 0
## GRC 10 0 0
## IMP 4 0 0
## ITB 3 2 0
## ITP 8 0 0
## ITR 12 0 0
## KER 0 0 12
## KRA 0 0 12
## MAL 12 0 0
## POP 10 2 0
## RAR 0 0 12
## ROM 4 0 0
## ROS 11 0 0
## SER 4 0 0
## SEV 0 0 12
## SIC 9 0 0
## SLO 12 0 0
## SOC 0 0 12
## SPB 0 8 0
## SPC 0 6 0
## SPM 2 3 0
## SPS 8 0 0
## STS 12 0 0
## TIK 0 0 12
## TIR 4 0 0
## TRE 10 2 0
## TUA 9 0 0
## TUH 12 0 0
table.value(table(pop(albo1), grp$grp), col.lab=paste("inf", 1:3), #inferred groups
row.lab=paste("ori", 1:40)) #original groups
Save it
To load it
## #################################################
## # Discriminant Analysis of Principal Components #
## #################################################
## class: dapc
## $call: dapc.genind(x = albo1, pop = grp$grp)
##
## $n.pca: 400 first PCs of PCA used
## $n.da: 2 discriminant functions saved
## $var (proportion of conserved variance): 0.998
##
## $eig (eigenvalues): 74050000 2101000 vector length content
## 1 $eig 2 eigenvalues
## 2 $grp 407 prior group assignment
## 3 $prior 3 prior group probabilities
## 4 $assign 407 posterior group assignment
## 5 $pca.cent 94968 centring vector of PCA
## 6 $pca.norm 94968 scaling vector of PCA
## 7 $pca.eig 406 eigenvalues of PCA
##
## data.frame nrow ncol content
## 1 $tab 407 400 retained PCs of PCA
## 2 $means 3 400 group means
## 3 $loadings 400 2 loadings of variables
## 4 $ind.coord 407 2 coordinates of individuals (principal components)
## 5 $grp.coord 3 2 coordinates of groups
## 6 $posterior 407 3 posterior membership probabilities
## 7 $pca.loadings 94968 400 PCA loadings of original variables
## 8 $var.contr 94968 2 contribution of original variables
Calculating the optimum PC number to rerun DAPC
optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=10) #calculating optimal number of PCs
## $pop.score
## $pop.score$`1`
## 1 2 3
## 0.0000000 0.6212121 0.9528302
##
## $pop.score$`50`
## 1 2 3
## 0.1034043 0.7545455 0.7226415
##
## $pop.score$`100`
## 1 2 3
## 0.1565957 0.5227273 0.4962264
##
## $pop.score$`150`
## 1 2 3
## 0.1429787 0.3712121 0.3962264
##
## $pop.score$`200`
## 1 2 3
## 0.09957447 0.23333333 0.23867925
##
## $pop.score$`250`
## 1 2 3
## 0.06468085 0.17878788 0.15754717
##
## $pop.score$`300`
## 1 2 3
## 0.03957447 0.09393939 0.09245283
##
## $pop.score$`350`
## 1 2 3
## 0.01446809 0.05757576 0.04056604
##
## $pop.score$`400`
## 1 2 3
## 0.002553191 0.003030303 0.004716981
##
##
## $mean
## 1 50 100 150 200 250
## 0.524680770 0.526863740 0.391849811 0.303472420 0.190529016 0.133671967
## 300 350 400
## 0.075322231 0.037536627 0.003433492
##
## $pred
## $pred$x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## [307] 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## [325] 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## [343] 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## [361] 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## [379] 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## [397] 397 398 399 400
##
## $pred$y
## [1] 0.540436866 0.539771052 0.539104713 0.538437271 0.537768147 0.537096766
## [7] 0.536422548 0.535744917 0.535063295 0.534377105 0.533685768 0.532988708
## [13] 0.532285347 0.531575106 0.530857410 0.530131679 0.529397337 0.528653806
## [19] 0.527900509 0.527136867 0.526362304 0.525576241 0.524778102 0.523967308
## [25] 0.523143283 0.522305448 0.521453226 0.520586039 0.519703310 0.518804462
## [31] 0.517888916 0.516956096 0.516005423 0.515036320 0.514048210 0.513040515
## [37] 0.512012658 0.510964060 0.509894144 0.508802334 0.507688050 0.506550716
## [43] 0.505389755 0.504204587 0.502994637 0.501759326 0.500498078 0.499210313
## [49] 0.497895455 0.496552927 0.495182335 0.493784028 0.492358539 0.490906401
## [55] 0.489428147 0.487924311 0.486395425 0.484842022 0.483264637 0.481663801
## [61] 0.480040048 0.478393912 0.476725924 0.475036620 0.473326530 0.471596190
## [67] 0.469846131 0.468076888 0.466288992 0.464482978 0.462659378 0.460818726
## [73] 0.458961554 0.457088396 0.455199785 0.453296255 0.451378337 0.449446566
## [79] 0.447501474 0.445543595 0.443573462 0.441591607 0.439598565 0.437594868
## [85] 0.435581049 0.433557642 0.431525179 0.429484194 0.427435220 0.425378790
## [91] 0.423315437 0.421245694 0.419170095 0.417089172 0.415003459 0.412913489
## [97] 0.410819795 0.408722910 0.406623367 0.404521699 0.402418363 0.400313503
## [103] 0.398207187 0.396099484 0.393990461 0.391880186 0.389768727 0.387656152
## [109] 0.385542530 0.383427927 0.381312412 0.379196053 0.377078917 0.374961073
## [115] 0.372842589 0.370723532 0.368603971 0.366483973 0.364363606 0.362242939
## [121] 0.360122039 0.358000973 0.355879811 0.353758620 0.351637468 0.349516423
## [127] 0.347395552 0.345274924 0.343154607 0.341034668 0.338915176 0.336796198
## [133] 0.334677802 0.332560057 0.330443030 0.328326790 0.326211403 0.324096938
## [139] 0.321983463 0.319871047 0.317759756 0.315649658 0.313540823 0.311433317
## [145] 0.309327208 0.307222565 0.305119456 0.303017948 0.300918109 0.298820007
## [151] 0.296723739 0.294629516 0.292537576 0.290448159 0.288361505 0.286277853
## [157] 0.284197442 0.282120511 0.280047300 0.277978048 0.275912995 0.273852379
## [163] 0.271796441 0.269745419 0.267699552 0.265659081 0.263624244 0.261595281
## [169] 0.259572431 0.257555933 0.255546027 0.253542952 0.251546948 0.249558253
## [175] 0.247577107 0.245603750 0.243638420 0.241681358 0.239732802 0.237792991
## [181] 0.235862166 0.233940564 0.232028427 0.230125992 0.228233500 0.226351190
## [187] 0.224479300 0.222618071 0.220767742 0.218928551 0.217100739 0.215284544
## [193] 0.213480207 0.211687965 0.209908060 0.208140729 0.206386213 0.204644750
## [199] 0.202916580 0.201201942 0.199501011 0.197813700 0.196139857 0.194479331
## [205] 0.192831971 0.191197623 0.189576138 0.187967362 0.186371146 0.184787336
## [211] 0.183215782 0.181656332 0.180108834 0.178573136 0.177049087 0.175536536
## [217] 0.174035330 0.172545319 0.171066349 0.169598271 0.168140932 0.166694180
## [223] 0.165257865 0.163831833 0.162415935 0.161010018 0.159613930 0.158227520
## [229] 0.156850636 0.155483127 0.154124841 0.152775626 0.151435331 0.150103804
## [235] 0.148780894 0.147466448 0.146160316 0.144862346 0.143572385 0.142290284
## [241] 0.141015888 0.139749049 0.138489612 0.137237428 0.135992344 0.134754208
## [247] 0.133522870 0.132298177 0.131079978 0.129868122 0.128662479 0.127463015
## [253] 0.126269717 0.125082575 0.123901575 0.122726706 0.121557956 0.120395313
## [259] 0.119238765 0.118088300 0.116943906 0.115805572 0.114673284 0.113547032
## [265] 0.112426803 0.111312586 0.110204368 0.109102137 0.108005882 0.106915591
## [271] 0.105831251 0.104752851 0.103680379 0.102613822 0.101553169 0.100498408
## [277] 0.099449528 0.098406515 0.097369358 0.096338045 0.095312565 0.094292904
## [283] 0.093279052 0.092270996 0.091268725 0.090272226 0.089281487 0.088296497
## [289] 0.087317243 0.086343714 0.085375898 0.084413782 0.083457355 0.082506605
## [295] 0.081561520 0.080622088 0.079688296 0.078760134 0.077837589 0.076920649
## [301] 0.076009292 0.075103458 0.074203075 0.073308074 0.072418383 0.071533932
## [307] 0.070654650 0.069780466 0.068911310 0.068047111 0.067187798 0.066333301
## [313] 0.065483548 0.064638470 0.063797996 0.062962054 0.062130575 0.061303487
## [319] 0.060480720 0.059662203 0.058847865 0.058037636 0.057231445 0.056429222
## [325] 0.055630895 0.054836395 0.054045649 0.053258589 0.052475142 0.051695238
## [331] 0.050918808 0.050145778 0.049376081 0.048609643 0.047846396 0.047086267
## [337] 0.046329188 0.045575085 0.044823890 0.044075532 0.043329939 0.042587041
## [343] 0.041846767 0.041109047 0.040373810 0.039640986 0.038910502 0.038182290
## [349] 0.037456278 0.036732396 0.036010577 0.035290776 0.034572951 0.033857062
## [355] 0.033143067 0.032430924 0.031720593 0.031012033 0.030305201 0.029600058
## [361] 0.028896562 0.028194671 0.027494344 0.026795541 0.026098220 0.025402339
## [367] 0.024707858 0.024014735 0.023322930 0.022632401 0.021943106 0.021255005
## [373] 0.020568056 0.019882218 0.019197450 0.018513711 0.017830960 0.017149155
## [379] 0.016468255 0.015788219 0.015109005 0.014430573 0.013752882 0.013075889
## [385] 0.012399555 0.011723837 0.011048694 0.010374086 0.009699971 0.009026308
## [391] 0.008353055 0.007680172 0.007007618 0.006335350 0.005663328 0.004991511
## [397] 0.004319857 0.003648325 0.002976875 0.002305464
##
##
## $best
## [1] 1
Rerun DAPC with optimum PCs (31)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#FFED7F", "#6a8fe0", "#FDCDAC", "#8c61cd", "#f365e7", "#871550","#f6c8de", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b", "#FFFF33", "#FF7F00","#2524f9", "#cddb9b","#799d10", "#CCCCCC", "#B3E2CD", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A","#FDC086", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5")
dapc2 <- dapc(albo1, var.contrib = TRUE, scale = FALSE, n.pca = 31, n.da = 9) #change PCs to the optimal
op <- par(cex = 0.65)
scatter(dapc2, pch = 20, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomright", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="bottomleft")
eig_percent <- round((dapc2$eig/(sum(dapc2$eig)))*100,2)
eig_percent #35.15 19.07 11.59 6.87 5.12 4.56 3.49 2.93 2.30 1.51 1.41 1.30 0.94 0.80 0.63 0.50 0.40 0.29 0.26 0.25 0.15 0.12 0.10 0.08 0.05 0.05 0.04 0.02 0.02 0.01 0.00
## [1] 35.15 19.07 11.59 6.87 5.12 4.56 3.49 2.93 2.30 1.51 1.41 1.30
## [13] 0.94 0.80 0.63 0.50 0.40 0.29 0.26 0.25 0.15 0.12 0.10 0.08
## [25] 0.05 0.05 0.04 0.02 0.02 0.01 0.00
Changing symbols for localities
good.shapes = c(1:25)
op <- par(cex = 0.65)
scatter(dapc2, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomright", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="bottomleft")
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.01_b \
--keep-fam output/fst/pops_4fst.txt \
--make-bed \
--out output/dapc/dapc_01_b \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc_01_b.log
20968 variants loaded from .bim file. –keep-fam: 408 people remaining. Total genotyping rate in remaining samples is 0.971028. 20968 variants and 408 people pass filters and QC.
Convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/dapc_01_b \
--recodeA \
--out output/dapc/dapc_01_b \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc_01_b.log
20968 variants loaded from .bim file. 20968 variants and 408 people pass filters and QC.
Import the data and covert it to genind format
# import the data
albo <-
read.PLINK(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/dapc/dapc_01_b.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
##
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## Starting gl2gi
## Processing genlight object with SNP data
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi
Save the genind object
Load the genind object
albo1 <- readRDS(here(
"scripts", "RMarkdowns", "output", "europe", "dapc", "albo1_MAF01.rds"
))
albo1
## /// GENIND OBJECT /////////
##
## // 408 individuals; 20,968 loci; 41,936 alleles; size: 77.1 Mb
##
## // Basic content
## @tab: 408 x 41936 matrix of allele counts
## @loc.n.all: number of alleles per locus (range: 2-2)
## @loc.fac: locus factor for the 41936 columns of @tab
## @all.names: list of allele names for each locus
## @ploidy: ploidy of each individual (range: 2-2)
## @type: codom
## @call: df2genind(X = xx[, ], sep = "/", ncode = 1, ind.names = x@ind.names,
## pop = x@pop, NA.char = "-", ploidy = 2)
##
## // Optional content
## @pop: population of each individual (group size range: 4-16)
## @other: a list containing: sex phenotype pat mat
Find clusters
grp <- find.clusters(albo1, max.n.clust=40)
#retained 400 PCs
#Choose the number of clusters (>=2): 3
Save it
To load it
## [1] "Kstat" "stat" "grp" "size"
## [1] 68 106 234
##
## 1 2 3
## ALD 0 0 10
## ALU 0 12 0
## ALV 0 0 12
## ARM 0 10 0
## BAR 0 0 12
## BRE 13 0 0
## BUL 0 0 10
## CES 14 0 0
## CRO 0 0 12
## DES 16 0 0
## FRS 0 0 12
## GES 0 12 0
## GRA 0 0 11
## GRC 0 0 10
## IMP 0 0 4
## ITB 2 0 3
## ITP 0 0 8
## ITR 0 0 12
## KER 0 12 0
## KRA 0 12 0
## MAL 0 0 12
## POP 2 0 10
## RAR 0 12 0
## ROM 0 0 4
## ROS 0 0 11
## SER 0 0 4
## SEV 0 12 0
## SIC 0 0 9
## SLO 0 0 12
## SOC 0 12 0
## SPB 8 0 0
## SPC 6 0 0
## SPM 4 0 2
## SPS 0 0 8
## STS 0 0 12
## TIK 0 12 0
## TIR 0 0 4
## TRE 3 0 9
## TUA 0 0 9
## TUH 0 0 12
Save it
To load it
## #################################################
## # Discriminant Analysis of Principal Components #
## #################################################
## class: dapc
## $call: dapc.genind(x = albo1, pop = grp$grp)
##
## $n.pca: 400 first PCs of PCA used
## $n.da: 2 discriminant functions saved
## $var (proportion of conserved variance): 0.998
##
## $eig (eigenvalues): 59260000 2313000 vector length content
## 1 $eig 2 eigenvalues
## 2 $grp 408 prior group assignment
## 3 $prior 3 prior group probabilities
## 4 $assign 408 posterior group assignment
## 5 $pca.cent 41936 centring vector of PCA
## 6 $pca.norm 41936 scaling vector of PCA
## 7 $pca.eig 407 eigenvalues of PCA
##
## data.frame nrow ncol content
## 1 $tab 408 400 retained PCs of PCA
## 2 $means 3 400 group means
## 3 $loadings 400 2 loadings of variables
## 4 $ind.coord 408 2 coordinates of individuals (principal components)
## 5 $grp.coord 3 2 coordinates of groups
## 6 $posterior 408 3 posterior membership probabilities
## 7 $pca.loadings 41936 400 PCA loadings of original variables
## 8 $var.contr 41936 2 contribution of original variables
Calculating the optimum PC number to rerun DAPC
optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=10) #calculating optimal number of PCs
## $pop.score
## $pop.score$`1`
## 1 2 3
## 0.6176471 0.9339623 0.0000000
##
## $pop.score$`50`
## 1 2 3
## 0.7558824 0.7047170 0.1123932
##
## $pop.score$`100`
## 1 2 3
## 0.5632353 0.5075472 0.1576923
##
## $pop.score$`150`
## 1 2 3
## 0.3750000 0.3867925 0.1435897
##
## $pop.score$`200`
## 1 2 3
## 0.22794118 0.25943396 0.09957265
##
## $pop.score$`250`
## 1 2 3
## 0.15000000 0.15000000 0.06111111
##
## $pop.score$`300`
## 1 2 3
## 0.12500000 0.09245283 0.04572650
##
## $pop.score$`350`
## 1 2 3
## 0.05147059 0.05000000 0.02393162
##
## $pop.score$`400`
## 1 2 3
## 0.002941176 0.006603774 0.002991453
##
##
## $mean
## 1 50 100 150 200 250
## 0.517203108 0.524330832 0.409491591 0.301794065 0.195649263 0.120370370
## 300 350 400
## 0.087726442 0.041800737 0.004178801
##
## $pred
## $pred$x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## [307] 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
## [325] 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
## [343] 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
## [361] 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## [379] 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## [397] 397 398 399 400
##
## $pred$y
## [1] 0.517203108 0.518005371 0.518806173 0.519603859 0.520396778 0.521183274
## [7] 0.521961695 0.522730386 0.523487695 0.524231968 0.524961551 0.525674791
## [13] 0.526370034 0.527045628 0.527699917 0.528331249 0.528937970 0.529518427
## [19] 0.530070967 0.530593935 0.531085678 0.531544542 0.531968875 0.532357022
## [25] 0.532707331 0.533018146 0.533287816 0.533514687 0.533697104 0.533833415
## [31] 0.533921966 0.533961103 0.533949173 0.533884522 0.533765497 0.533590445
## [37] 0.533357711 0.533065643 0.532712586 0.532296888 0.531816894 0.531270952
## [43] 0.530657407 0.529974606 0.529220896 0.528394623 0.527494134 0.526517775
## [49] 0.525463892 0.524330832 0.523117576 0.521825643 0.520457186 0.519014358
## [55] 0.517499313 0.515914203 0.514261182 0.512542403 0.510760019 0.508916184
## [61] 0.507013051 0.505052772 0.503037502 0.500969393 0.498850599 0.496683273
## [67] 0.494469568 0.492211637 0.489911634 0.487571711 0.485194023 0.482780721
## [73] 0.480333960 0.477855893 0.475348673 0.472814453 0.470255386 0.467673626
## [79] 0.465071325 0.462450638 0.459813717 0.457162715 0.454499786 0.451827083
## [85] 0.449146759 0.446460968 0.443771862 0.441081594 0.438392319 0.435706189
## [91] 0.433025358 0.430351979 0.427688204 0.425036187 0.422398082 0.419776042
## [97] 0.417172219 0.414588768 0.412027840 0.409491591 0.406981692 0.404497901
## [103] 0.402039493 0.399605744 0.397195932 0.394809333 0.392445222 0.390102876
## [109] 0.387781571 0.385480585 0.383199192 0.380936670 0.378692295 0.376465343
## [115] 0.374255091 0.372060814 0.369881789 0.367717293 0.365566602 0.363428992
## [121] 0.361303739 0.359190120 0.357087412 0.354994890 0.352911830 0.350837510
## [127] 0.348771206 0.346712193 0.344659749 0.342613149 0.340571670 0.338534588
## [133] 0.336501180 0.334470721 0.332442489 0.330415760 0.328389809 0.326363914
## [139] 0.324337350 0.322309394 0.320279322 0.318246411 0.316209937 0.314169177
## [145] 0.312123405 0.310071900 0.308013937 0.305948793 0.303875744 0.301794065
## [151] 0.299703235 0.297603524 0.295495408 0.293379359 0.291255850 0.289125355
## [157] 0.286988346 0.284845297 0.282696680 0.280542970 0.278384639 0.276222160
## [163] 0.274056007 0.271886652 0.269714569 0.267540231 0.265364111 0.263186682
## [169] 0.261008417 0.258829790 0.256651274 0.254473342 0.252296466 0.250121121
## [175] 0.247947779 0.245776913 0.243608997 0.241444504 0.239283906 0.237127678
## [181] 0.234976292 0.232830221 0.230689939 0.228555918 0.226428632 0.224308554
## [187] 0.222196157 0.220091915 0.217996300 0.215909785 0.213832844 0.211765950
## [193] 0.209709576 0.207664195 0.205630281 0.203608305 0.201598743 0.199602066
## [199] 0.197618748 0.195649263 0.193694043 0.191753367 0.189827472 0.187916596
## [205] 0.186020978 0.184140856 0.182276468 0.180428051 0.178595845 0.176780086
## [211] 0.174981013 0.173198865 0.171433879 0.169686293 0.167956346 0.166244275
## [217] 0.164550319 0.162874716 0.161217703 0.159579520 0.157960403 0.156360592
## [223] 0.154780323 0.153219836 0.151679368 0.150159158 0.148659443 0.147180462
## [229] 0.145722453 0.144285653 0.142870301 0.141476635 0.140104894 0.138755314
## [235] 0.137428134 0.136123593 0.134841929 0.133583378 0.132348181 0.131136574
## [241] 0.129948796 0.128785084 0.127645678 0.126530814 0.125440732 0.124375669
## [247] 0.123335863 0.122321553 0.121332976 0.120370370 0.119433791 0.118522562
## [253] 0.117635820 0.116772707 0.115932361 0.115113921 0.114316528 0.113539320
## [259] 0.112781437 0.112042018 0.111320203 0.110615131 0.109925942 0.109251775
## [265] 0.108591769 0.107945064 0.107310799 0.106688114 0.106076147 0.105474040
## [271] 0.104880930 0.104295957 0.103718261 0.103146981 0.102581256 0.102020226
## [277] 0.101463031 0.100908809 0.100356701 0.099805844 0.099255380 0.098704447
## [283] 0.098152185 0.097597733 0.097040231 0.096478818 0.095912633 0.095340815
## [289] 0.094762505 0.094176842 0.093582965 0.092980013 0.092367126 0.091743443
## [295] 0.091108104 0.090460247 0.089799014 0.089123542 0.088432972 0.087726442
## [301] 0.087003323 0.086263903 0.085508705 0.084738248 0.083953051 0.083153636
## [307] 0.082340523 0.081514232 0.080675283 0.079824197 0.078961494 0.078087693
## [313] 0.077203316 0.076308883 0.075404914 0.074491928 0.073570448 0.072640991
## [319] 0.071704080 0.070760234 0.069809974 0.068853819 0.067892291 0.066925909
## [325] 0.065955193 0.064980664 0.064002843 0.063022248 0.062039402 0.061054823
## [331] 0.060069033 0.059082551 0.058095898 0.057109594 0.056124159 0.055140114
## [337] 0.054157979 0.053178274 0.052201519 0.051228234 0.050258941 0.049294159
## [343] 0.048334408 0.047380209 0.046432083 0.045490548 0.044556126 0.043629336
## [349] 0.042710700 0.041800737 0.040899851 0.040007973 0.039124921 0.038250509
## [355] 0.037384552 0.036526868 0.035677271 0.034835577 0.034001601 0.033175160
## [361] 0.032356069 0.031544142 0.030739197 0.029941049 0.029149512 0.028364404
## [367] 0.027585539 0.026812733 0.026045802 0.025284561 0.024528826 0.023778413
## [373] 0.023033137 0.022292814 0.021557259 0.020826288 0.020099717 0.019377361
## [379] 0.018659036 0.017944558 0.017233741 0.016526402 0.015822357 0.015121421
## [385] 0.014423409 0.013728137 0.013035422 0.012345077 0.011656920 0.010970765
## [391] 0.010286429 0.009603726 0.008922473 0.008242486 0.007563579 0.006885568
## [397] 0.006208269 0.005531498 0.004855070 0.004178801
##
##
## $best
## [1] 32
Rerun DAPC with optimum PCs (33)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#FFED7F", "#6a8fe0", "#FDCDAC", "#8c61cd", "#f365e7", "#871550","#f6c8de", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b", "#FFFF33", "#FF7F00","#2524f9", "#cddb9b","#799d10", "#CCCCCC", "#B3E2CD", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A","#FDC086", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5")
dapc2 <- dapc(albo1, var.contrib = TRUE, scale = FALSE, n.pca = 33, n.da = 9) #change PCs to the optimal
op <- par(cex = 0.65)
scatter(dapc2, pch = 20, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomright", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="bottomleft")
## [1] 34.21 19.62 11.20 6.77 4.96 4.59 3.56 3.08 2.30 1.88 1.49 1.34
## [13] 0.95 0.80 0.74 0.45 0.38 0.33 0.30 0.25 0.16 0.13 0.13 0.10
## [25] 0.08 0.06 0.05 0.04 0.03 0.02 0.01 0.00 0.00
Changing symbols for localities
good.shapes = c(1:25)
op <- par(cex = 0.65)
scatter(dapc2, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomright", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="bottomleft")
cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.1_b \
--make-bed \
--out output/dapc/dapc_01b \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc_01b.log
66317 variants loaded from .bim file. 66317 variants and 688 people pass filters and QC.
Convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/dapc_01b \
--recodeA \
--out output/dapc/dapc_01b \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc_01b.log
66317 variants loaded from .bim file. 66317 variants and 688 people pass filters and QC.
Import the data and covert it to genind format
# import the data
snp <-
read.PLINK(
here("euro_global/output/dapc/dapc_01b.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
nInd(snp)
nLoc(snp)
nPop(snp)
indNames(snp)
# convert to genind
snp2 <- gl2gi(snp, probar = TRUE, verbose = NULL)
Save it
To load it
Import sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
## [1] OKI OKI OKI OKI OKI OKI
## 73 Levels: ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS ... YUN
Load the csv
countr <- read.csv(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "DAPC_countries.csv"
))
df <- as.data.frame(countr)
head(df)
## pop country
## 1 OKI Japan
## 2 OKI Japan
## 3 OKI Japan
## 4 OKI Japan
## 5 OKI Japan
## 6 OKI Japan
## [1] Japan Japan Japan Japan Japan
## [6] Japan Japan Japan Japan Japan
## [11] Japan Japan China China China
## [16] China China China China China
## [21] China China China China Russia
## [26] Russia Russia Russia Russia Russia
## [31] Russia Russia Russia Russia Russia
## [36] Russia Ukraine Ukraine Ukraine Ukraine
## [41] Ukraine Ukraine Ukraine Ukraine Ukraine
## [46] Ukraine Ukraine Ukraine China China
## [51] China China China China China
## [56] China China Nepal Nepal Nepal
## [61] Nepal Georgia Georgia Georgia Georgia
## [66] Georgia Georgia Georgia Georgia Georgia
## [71] Georgia Georgia Georgia Ukraine Ukraine
## [76] Ukraine Ukraine Ukraine Ukraine Ukraine
## [81] Ukraine Ukraine Ukraine Ukraine Ukraine
## [86] Russia Russia Russia Russia Russia
## [91] Russia Russia Russia Russia Russia
## [96] Russia Russia Russia Russia Russia
## [101] Russia Russia Russia Russia Russia
## [106] Russia Russia Russia Russia Russia
## [111] Russia Russia Russia Taiwan Taiwan
## [116] Taiwan Taiwan Taiwan Taiwan Taiwan
## [121] China China China China China
## [126] China China China China China
## [131] China China Vietnam Vietnam Vietnam
## [136] Vietnam Italy Italy Italy Italy
## [141] Italy Italy Italy Italy Italy
## [146] Italy Italy Italy Ukraine Ukraine
## [151] Ukraine Ukraine Ukraine Ukraine Ukraine
## [156] Ukraine Ukraine Ukraine Ukraine Ukraine
## [161] Malaysia Malaysia Malaysia Malaysia France
## [166] France France France France France
## [171] France France Italy Italy Italy
## [176] Italy Italy Italy Italy Italy
## [181] Italy Vietnam Vietnam Vietnam Vietnam
## [186] Vietnam Vietnam Vietnam Vietnam Vietnam
## [191] Vietnam Vietnam Vietnam Vietnam Vietnam
## [196] Vietnam Vietnam Vietnam Vietnam Thailand
## [201] Thailand Thailand Thailand Thailand Thailand
## [206] Thailand Thailand Thailand Thailand Thailand
## [211] Thailand Thailand Thailand Thailand Thailand
## [216] Thailand Thailand Thailand Thailand Thailand
## [221] Thailand Italy Italy Italy Italy
## [226] Italy Italy Italy Japan Japan
## [231] Japan Japan Japan Japan Japan
## [236] Japan Japan Japan Japan Japan
## [241] Japan Japan Japan Japan Japan
## [246] Japan Japan Japan Japan Japan
## [251] Japan France France France France
## [256] Italy Italy Italy Italy Italy
## [261] Italy Italy Italy Italy Italy
## [266] Italy Italy Italy Italy Italy
## [271] Italy Italy Italy Italy Italy
## [276] Italy Italy Italy Italy Russia
## [281] Russia Russia Russia Russia Russia
## [286] Russia Russia Brazil Brazil Brazil
## [291] Brazil Brazil Brazil Brazil Brazil
## [296] Brazil Brazil Brazil Albania Albania
## [301] Albania Albania Italy Italy Italy
## [306] Italy Italy Italy Italy Italy
## [311] Cambodia Cambodia Cambodia Cambodia Cambodia
## [316] Cambodia Japan Cambodia Cambodia Cambodia
## [321] Cambodia Cambodia Cambodia Italy Italy
## [326] Italy Japan Italy Italy Italy
## [331] Italy Italy Japan Greece Japan
## [336] Italy Italy Italy Italy Japan
## [341] Japan Japan Japan Japan Japan
## [346] Japan Japan India India India
## [351] India India India India India
## [356] India India India India Thailand
## [361] Thailand Thailand Thailand Thailand Thailand
## [366] Thailand Thailand Thailand Spain Spain
## [371] Spain Spain Spain Spain Spain
## [376] Spain Spain Spain Spain Spain
## [381] USA USA USA USA USA
## [386] USA USA USA USA USA
## [391] USA USA USA USA USA
## [396] USA USA USA USA USA
## [401] USA USA USA Bhutan Bhutan
## [406] Nepal Nepal Sri\xa0Lanka Sri\xa0Lanka Thailand
## [411] Thailand Thailand Thailand Thailand Thailand
## [416] Thailand Thailand Thailand Thailand Thailand
## [421] Indonesia Indonesia Indonesia Indonesia Indonesia
## [426] Indonesia Indonesia Indonesia Indonesia Indonesia
## [431] Indonesia Indonesia Maldives Maldives Maldives
## [436] Maldives Brazil Brazil Brazil Brazil
## [441] Brazil Brazil Brazil Brazil Brazil
## [446] Brazil Brazil Brazil Bulgaria Bulgaria
## [451] Bulgaria Bulgaria Bulgaria Bulgaria Bulgaria
## [456] Bulgaria Bulgaria Bulgaria Croatia Croatia
## [461] Croatia Croatia Croatia Croatia Croatia
## [466] Croatia Croatia Croatia Croatia Croatia
## [471] Greece Greece Greece Greece Greece
## [476] Greece Greece Greece Greece Greece
## [481] Greece Greece Greece Greece Greece
## [486] Greece Greece Greece Greece Greece
## [491] Italy Italy Italy Italy Italy
## [496] Malta Malta Malta Malta Malta
## [501] Malta Malta Malta Malta Malta
## [506] Malta Malta Spain Spain Spain
## [511] Spain Spain Turkey Turkey Turkey
## [516] Turkey Turkey Turkey Turkey Turkey
## [521] Turkey Turkey Turkey Turkey Turkey
## [526] Turkey Turkey Turkey Turkey Turkey
## [531] Turkey Turkey Turkey Albania Albania
## [536] Albania Albania Albania Albania Albania
## [541] Albania Albania Albania France France
## [546] France France France France France
## [551] France France France France France
## [556] Italy Italy Italy Italy Italy
## [561] Italy Italy Italy Italy Portugal
## [566] Portugal Portugal Portugal Portugal Portugal
## [571] Portugal Portugal Portugal Portugal Portugal
## [576] Portugal Portugal Portugal Romania Romania
## [581] Romania Romania Romania Romania Romania
## [586] Romania Romania Romania Romania Serbia
## [591] Serbia Serbia Serbia Slovenia Slovenia
## [596] Slovenia Slovenia Slovenia Slovenia Slovenia
## [601] Slovenia Slovenia Slovenia Slovenia Slovenia
## [606] Spain Spain Spain Spain Spain
## [611] Spain Spain Spain Spain Spain
## [616] Spain Spain Spain Spain Spain
## [621] Spain Spain Spain Armenia Armenia
## [626] Armenia Armenia Armenia Armenia Armenia
## [631] Armenia Armenia Armenia Spain Spain
## [636] Spain Spain Albania Albania Albania
## [641] Albania Albania Albania Albania Albania
## [646] Albania Albania Albania Albania Italy
## [651] Italy Italy Italy Italy Italy
## [656] Italy Italy Italy Italy Italy
## [661] Italy Indonesia Indonesia Indonesia Indonesia
## [666] Indonesia Indonesia Indonesia Indonesia Indonesia
## [671] Indonesia Indonesia Indonesia Indonesia Indonesia
## [676] Indonesia Malaysia Malaysia Malaysia Malaysia
## [681] Malaysia Malaysia Malaysia Malaysia Malaysia
## [686] Malaysia Malaysia Malaysia
## 32 Levels: Albania Armenia Bhutan Brazil Bulgaria Cambodia China ... Vietnam
Save the genind object
saveRDS(snp2, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "snp_country.rds"
))
Load the genind object
Scale
## [1] "matrix" "array"
## [1] 688 132634
## AX-583033342_C.C AX-583033342_C.G AX-583035163_A.A AX-583035163_A.T
## 1001 0.3976471 -0.3976471 0.5372938 -0.5372938
## 1002 0.3976471 -0.3976471 0.5372938 -0.5372938
## 1003 0.3976471 -0.3976471 0.5372938 -0.5372938
## 1004 0.3976471 -0.3976471 0.5372938 -0.5372938
## 1005 0.3976471 -0.3976471 0.5372938 -0.5372938
## AX-583033370_G.T
## 1001 1.0219800
## 1002 0.0000000
## 1003 -0.2722724
## 1004 -0.2722724
## 1005 -0.2722724
grp <- find.clusters(snp_country, max.n.clust=25)
#retained 600
#Choose the number of clusters (>=2): 6
Save the genind object
Load the genind object
## [1] "Kstat" "stat" "grp" "size"
## [1] 134 59 144 106 216 29
##
## 1 2 3 4 5 6
## Albania 0 26 0 0 0 0
## Armenia 0 0 0 10 0 0
## Bhutan 2 0 0 0 0 0
## Brazil 12 0 11 0 0 0
## Bulgaria 0 0 0 0 10 0
## Cambodia 12 0 0 0 0 0
## China 13 0 0 0 20 0
## Croatia 0 12 0 0 0 0
## France 0 0 0 0 24 0
## Georgia 0 0 0 12 0 0
## Greece 0 21 0 0 0 0
## India 12 0 0 0 0 0
## Indonesia 0 0 0 0 0 27
## Italy 0 0 49 0 49 0
## Japan 0 0 35 0 12 0
## Malaysia 16 0 0 0 0 0
## Maldives 4 0 0 0 0 0
## Malta 0 0 0 0 12 0
## Nepal 4 0 0 0 0 2
## Portugal 0 0 9 0 5 0
## Romania 0 0 0 0 11 0
## Russia 0 0 0 48 0 0
## Serbia 0 0 0 0 4 0
## Slovenia 0 0 0 0 12 0
## Spain 0 0 17 0 22 0
## Sri\xa0Lanka 2 0 0 0 0 0
## Taiwan 0 0 0 0 7 0
## Thailand 42 0 0 0 0 0
## Turkey 0 0 0 0 21 0
## Ukraine 0 0 0 36 0 0
## USA 0 0 23 0 0 0
## Vietnam 15 0 0 0 7 0
table.value(table(pop(snp2), grp$grp), col.lab=paste("inf", 1:6), #inferred groups
row.lab=paste("ori", 1:32)) #original groups - 32 countries
Save the genind object
saveRDS(dapc1, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "dapc1.rds"
))
Load the genind object
dapc1 <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "dapc1.rds"
))
## #################################################
## # Discriminant Analysis of Principal Components #
## #################################################
## class: dapc
## $call: dapc.data.frame(x = as.data.frame(x), grp = ..1)
##
## $n.pca: 100 first PCs of PCA used
## $n.da: 5 discriminant functions saved
## $var (proportion of conserved variance): 0.399
##
## $eig (eigenvalues): 12760 6391 3147 2836 1369 vector length content
## 1 $eig 5 eigenvalues
## 2 $grp 688 prior group assignment
## 3 $prior 6 prior group probabilities
## 4 $assign 688 posterior group assignment
## 5 $pca.cent 132634 centring vector of PCA
## 6 $pca.norm 132634 scaling vector of PCA
## 7 $pca.eig 687 eigenvalues of PCA
##
## data.frame nrow ncol content
## 1 $tab 688 100 retained PCs of PCA
## 2 $means 6 100 group means
## 3 $loadings 100 5 loadings of variables
## 4 $ind.coord 688 5 coordinates of individuals (principal components)
## 5 $grp.coord 6 5 coordinates of groups
## 6 $posterior 688 6 posterior membership probabilities
## 7 $pca.loadings 132634 100 PCA loadings of original variables
## 8 $var.contr 132634 5 contribution of original variables
Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.
xvalDapc(snp_country, populations, n.pca.max = 200, n.da = NULL,
training.set = 0.9, result = c("groupMean", "overall"),
center = TRUE, scale = FALSE,
n.pca=NULL, n.rep = 30, xval.plot = TRUE)
Number of PCs Achieving Highest Mean Success
“140”
Number of PCs Achieving Lowest MSE
“140” n.pca: 140 first
PCs of PCA used n.da: 31 discriminant functions saved var (proportion of
conserved variance): 0.469
So the cross-validation gives us a very different # of PCAs to retain (140) compared to the a-score (13)
Run DAPC with object
$n.pca: 13 first PCs of PCA used $n.da: 5 discriminant functions saved $var (proportion of conserved variance): 0.156
Save it
saveRDS(
dapc_snp1, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "dapc_snp1.rds"
)
)
To load it
dapc_snp1 <- readRDS(
here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "dapc_snp1.rds"
)
)
Most contributing alleles
Run DAPC with object using #pcs from cross-validation
$n.pca: 140 first PCs of PCA used $n.da: 31 discriminant functions saved $var (proportion of conserved variance): 0.469
Save it
saveRDS(
dapc_snp2, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "dapc_snp2.rds"
)
)
To load it
dapc_snp2 <- readRDS(
here("scripts", "RMarkdowns", "output", "euro_global", "dapc", "MAF_1", "dapc_snp2.rds"))
grp <- dapc_snp2$grp
levels(grp) <- iconv(levels(grp), to = "ASCII//TRANSLIT")
levels(grp) <- gsub("[^[:alnum:][:space:]]", "", levels(grp))
grp <- factor(grp)
dapc_snp2$grp <- grp
#dapc_snp2$grp <- iconv(dapc_snp2$grp, to = "ASCII//TRANSLIT")
#dapc_snp2$grp <- gsub("[^[:alnum:][:space:]]", "", dapc_snp2$grp)
#Sys.setlocale("LC_ALL", "C")
#dapc_snp2 <- dapc_snp2$label <- iconv(dapc_snp2$label, to = "ASCII//TRANSLIT")
scatter(dapc_snp2)
grp <- dapc_snp1$grp
levels(grp) <- iconv(levels(grp), to = "ASCII//TRANSLIT")
levels(grp) <- gsub("[^[:alnum:][:space:]]", "", levels(grp))
grp <- factor(grp)
dapc_snp1$grp <- grp
scatter(dapc_snp1)
Even highest contributors have VERY low loadings though, so no one variant is driving the pattern
myCol2 <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b", "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")
Check R symbols for plot
#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,35:38,43,60,62:64)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
scale_shape_manual(values=good.shapes[1:N]) +
geom_point()
## Warning: Removed 671 rows containing missing values or values outside the scale range
## (`geom_point()`).
Plot using different discriminant functions
PCs 1 & 2
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/dapc_euro_global_r1_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,43,60,62:64)
op <- par(cex = 0.39)
scatter(dapc_snp1, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
good.shapes = c(1:25,35:38,43,60,62:64)
op <- par(cex = 0.39)
scatter(dapc_snp1, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
These are the plots I kept for results
Run DAPC with object
Save it
saveRDS(
dapc_snp3, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "dapc_snp3.rds"
)
)
To load it
dapc_snp3 <- readRDS(
here("scripts", "RMarkdowns", "output", "euro_global", "dapc", "MAF_1", "dapc_snp3.rds"))
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/dapc3_euro_global_r1_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,43,60,62:64)
op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
grp <- dapc_snp3$grp
levels(grp) <- iconv(levels(grp), to = "ASCII//TRANSLIT")
levels(grp) <- gsub("[^[:alnum:][:space:]]", "", levels(grp))
grp <- factor(grp)
dapc_snp3$grp <- grp
good.shapes = c(1:25,35:38,43,60,62:64)
op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
Try new colors - by region
myCol2 <- c ("#a113b2", "goldenrod", "#146c45", "#66C2A5", "goldenrod", "#2524f9", "#c41A1C", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#146c45", "#2524f9", "#a113b2", "#c41A1C", "#2524f9", "#146c45", "#a113b2", "#146c45", "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#146c45", "#c41A1C", "#2524f9", "goldenrod", "goldenrod", "#66C2A5", "#2524f9")
# "#a41415"
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/dapc_euro_global_region_r1_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,43,60,62:64)
op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
good.shapes = c(1:25,35:38,43,60,62:64)
op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
PCs 1 & 3
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/dapc_euro_global_region_r1_PC1_3.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,43,60,62:64)
op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
dev.off()
good.shapes = c(1:25,35:38,43,60,62:64)
op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
PCs 1 & 4
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/dapc_euro_global_region_r1_PC1_4.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,43,60,62:64)
op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
dev.off()
good.shapes = c(1:25,35:38,43,60,62:64)
op <- par(cex = 0.39)
scatter(dapc_snp3, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
Using r2<0.01 LD pruning and MAF 1% (scaled and plotting additional PCs)
Create files
cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--keep-fam output/neuroadmixture/native_italy_all_and_US.txt \
--bfile output/snps_sets/r2_0.01_b \
--make-bed \
--export vcf \
--out output/dapc/dapc_italy_all_and_US \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc_italy_all_and_US.log
22642 variants loaded from .bim file. –keep-fam: 353 people remaining. Total genotyping rate in remaining samples is 0.965364. 22642 variants and 353 people pass filters and QC.
Convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/dapc_italy_all_and_US \
--recodeA \
--out output/dapc/dapc_italy_all_and_US \
--silent;
grep 'samples\|variants\|remaining' output/dapc/dapc_italy_all_and_US.log
22642 variants loaded from .bim file. 22642 variants and 353 people pass filters and QC.
Import the data and covert it to genind format
# import the data
snp <-
read.PLINK(
here("euro_global/output/dapc/dapc_italy_all_and_US.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
##
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## [1] 353
## [1] 22642
## [1] 40
## [1] "1001" "1002" "1003" "1004" "1005" "1006" "1007" "1008" "1009" "1010"
## [11] "1011" "1012" "1053" "1054" "1055" "1056" "1057" "1058" "1059" "1060"
## [21] "1061" "1062" "1063" "1064" "1089" "1090" "1091" "1092" "1093" "1094"
## [31] "1095" "1101" "1102" "1103" "1105" "1106" "1107" "1161" "1162" "1163"
## [41] "1165" "1166" "1167" "1168" "1169" "1170" "1171" "1172" "1173" "1174"
## [51] "1175" "1176" "1177" "1178" "1179" "1180" "1181" "1182" "1183" "1184"
## [61] "1185" "1186" "1187" "1188" "1189" "1190" "1191" "1192" "1193" "1194"
## [71] "1195" "1201" "1214" "1215" "1216" "1217" "1226" "1227" "1228" "1229"
## [81] "1230" "1232" "1233" "1234" "1237" "1238" "1239" "1240" "1241" "1242"
## [91] "1243" "1244" "1245" "1246" "1247" "1249" "1250" "1251" "1252" "1253"
## [101] "1254" "1255" "1256" "1257" "1258" "1259" "1260" "1261" "1262" "1263"
## [111] "1264" "1265" "1266" "1267" "1268" "1270" "1271" "1272" "1273" "1274"
## [121] "1276" "1282" "1283" "1285" "1286" "1287" "1288" "1289" "1292" "1293"
## [131] "1294" "1295" "1325" "1326" "1328" "1329" "1330" "1331" "1332" "1333"
## [141] "1334" "1335" "1336" "1373" "1374" "1375" "1376" "1377" "1378" "1379"
## [151] "1380" "1381" "1382" "1383" "1384" "1430" "1431" "1432" "1433" "1434"
## [161] "1435" "1436" "1437" "1438" "1439" "1440" "1441" "1443" "1444" "1446"
## [171] "1447" "1449" "1451" "1452" "1454" "1456" "1458" "1460" "1461" "197"
## [181] "198" "199" "200" "201" "202" "203" "204" "2174" "2175" "2176"
## [191] "2177" "2178" "2179" "217" "2180" "2181" "2182" "2183" "2184" "2185"
## [201] "2187" "2188" "2189" "218" "2191" "2192" "2193" "2194" "2195" "219"
## [211] "220" "2215" "2216" "2217" "2218" "221" "222" "223" "224" "225"
## [221] "226" "227" "230" "255" "256" "257" "258" "261" "262" "263"
## [231] "264" "265" "266" "267" "268" "269" "270" "271" "272" "273"
## [241] "275" "276" "277" "278" "294" "295" "296" "297" "298" "299"
## [251] "301" "302" "303" "304" "305" "435" "436" "437" "438" "439"
## [261] "440" "441" "442" "443" "444" "445" "446" "602" "603" "604"
## [271] "607" "609" "610" "623" "624" "625" "626" "627" "628" "629"
## [281] "630" "631" "632" "633" "666" "669" "670" "671" "672" "673"
## [291] "674" "675" "676" "677" "678" "679" "680" "681" "682" "683"
## [301] "747" "749" "750" "751" "752" "824" "825" "826" "827" "829"
## [311] "830" "831" "833" "834" "928" "929" "930" "931" "932" "933"
## [321] "934" "935" "936" "937" "938" "939" "964" "965" "966" "967"
## [331] "972" "973" "975" "976" "977" "978" "979" "980" "981" "982"
## [341] "983" "984" "985" "986" "987" "988" "989" "990" "991" "992"
## [351] "993" "994" "995"
## Starting gl2gi
## Processing genlight object with SNP data
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi
Save it
To load it
Scale
I get a warning saying “Some scaling values are null. Corresponding alleles are removed”. It seems this error is because one or more allele is fixed in this subset of populations. These were removed.
Save it
To load it
## [1] 353 45280
## AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1001 0.8736826 -0.8736826 0.9129240 -0.9129240
## 1002 0.0000000 0.0000000 -1.4519198 1.4519198
## 1003 -0.4541610 0.4541610 -1.4519198 1.4519198
## 1004 -0.4541610 0.4541610 -1.4519198 1.4519198
## 1005 -0.4541610 0.4541610 -0.2694979 0.2694979
## AX-583036983_C.T
## 1001 1.21831144
## 1002 -0.07035841
## 1003 1.21831144
## 1004 -0.07035841
## 1005 1.21831144
Save it
To load it
Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.
xvalDapc(snp3, populations, n.pca.max = 200, n.da = NULL,
training.set = 0.9, result = c("groupMean", "overall"),
center = TRUE, scale = FALSE,
n.pca=NULL, n.rep = 40, xval.plot = TRUE)
Number of PCs Achieving Highest Mean
Success"60" Number of PCs Achieving Lowest MSE
“60” $n.pca:
60 first PCs of PCA used $n.da: 39 discriminant functions saved $var
(proportion of conserved variance): 0.408
Run dapc using these #s
Save the genind object
saveRDS(dapc1, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "italy_US", "dapc1.rds"
))
Load the genind object
dapc1 <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "italy_US", "dapc1.rds"
))
Calculating the optimum PC number to rerun DAPC
optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=10) #calculating optimal number of PCs
## $pop.score
## $pop.score$`1`
## BEN BER BRE CAM CES CHA
## 0.91666667 0.75000000 -0.06153846 0.50000000 0.40714286 0.10833333
## DES GEL HAI HAN HOC HUN
## 0.05000000 0.00000000 0.58333333 1.00000000 0.14285714 0.66666667
## IMP INJ INW ITB ITP ITR
## 0.00000000 0.90909091 1.00000000 0.00000000 0.66666667 0.64166667
## JAF KAC KAG KAN KAT KLP
## 0.00000000 0.00000000 0.42500000 0.18181818 0.00000000 0.00000000
## KUN LAM MAT OKI PAL QNC
## 0.00000000 0.00000000 0.66666667 0.75000000 0.20000000 -0.05454545
## ROM SIC SON SSK SUF SUU
## 0.00000000 0.22222222 0.00000000 0.36666667 0.00000000 1.00000000
## TAI TRE UTS YUN
## 0.00000000 0.16666667 0.00000000 0.00000000
##
## $pop.score$`5`
## BEN BER BRE CAM CES CHA
## 0.78333333 0.98333333 0.63846154 0.58333333 0.94285714 0.45833333
## DES GEL HAI HAN HOC HUN
## 0.61250000 -0.05000000 0.54166667 0.97500000 0.24285714 0.98333333
## IMP INJ INW ITB ITP ITR
## 0.20000000 0.98181818 0.90000000 0.10000000 0.87777778 1.00000000
## JAF KAC KAG KAN KAT KLP
## -0.15000000 -0.05000000 0.91666667 0.60909091 0.00000000 -0.05000000
## KUN LAM MAT OKI PAL QNC
## 0.87500000 -0.06666667 0.76666667 0.66666667 0.95454545 0.95454545
## ROM SIC SON SSK SUF SUU
## -0.07500000 0.43333333 -0.10000000 0.61666667 0.30000000 0.98333333
## TAI TRE UTS YUN
## 0.78571429 0.69166667 0.84166667 -0.02222222
##
## $pop.score$`10`
## BEN BER BRE CAM CES CHA
## 0.91666667 0.97500000 0.82307692 0.64166667 0.82857143 0.74166667
## DES GEL HAI HAN HOC HUN
## 0.81250000 -0.30000000 0.65833333 0.82500000 0.60000000 0.91666667
## IMP INJ INW ITB ITP ITR
## 0.55000000 0.91818182 0.87500000 0.52000000 0.93333333 0.64166667
## JAF KAC KAG KAN KAT KLP
## -0.30000000 -0.08333333 0.92500000 0.63636364 -0.06666667 0.17500000
## KUN LAM MAT OKI PAL QNC
## 0.82500000 -0.02222222 0.77500000 0.94166667 0.88181818 0.89090909
## ROM SIC SON SSK SUF SUU
## 0.42500000 0.37777778 -0.20000000 0.53333333 0.23333333 0.95000000
## TAI TRE UTS YUN
## 0.94285714 0.74166667 0.84166667 -0.03333333
##
## $pop.score$`15`
## BEN BER BRE CAM CES CHA
## 0.91666667 0.87500000 0.76923077 0.60000000 0.83571429 0.65833333
## DES GEL HAI HAN HOC HUN
## 0.79375000 0.75000000 0.70833333 0.80000000 0.78571429 0.90000000
## IMP INJ INW ITB ITP ITR
## 0.30000000 0.92727273 0.82500000 0.70000000 0.90000000 0.64166667
## JAF KAC KAG KAN KAT KLP
## -0.30000000 -0.08333333 0.90000000 0.82727273 0.08333333 0.82500000
## KUN LAM MAT OKI PAL QNC
## 0.75000000 0.47777778 0.75000000 0.92500000 0.85454545 0.95454545
## ROM SIC SON SSK SUF SUU
## 0.40000000 0.37777778 -0.23333333 0.45000000 0.48333333 0.93333333
## TAI TRE UTS YUN
## 0.85714286 0.58333333 0.92500000 0.18888889
##
## $pop.score$`20`
## BEN BER BRE CAM CES CHA
## 0.90833333 0.82500000 0.79230769 0.63333333 0.86428571 0.54166667
## DES GEL HAI HAN HOC HUN
## 0.84375000 0.50000000 0.45833333 0.80000000 0.88571429 0.77500000
## IMP INJ INW ITB ITP ITR
## 0.25000000 0.88181818 0.75000000 0.58000000 0.90000000 0.75833333
## JAF KAC KAG KAN KAT KLP
## -0.30000000 0.18333333 0.88333333 0.84545455 0.50000000 0.75000000
## KUN LAM MAT OKI PAL QNC
## 0.77500000 0.57777778 0.74166667 0.89166667 0.90909091 0.82727273
## ROM SIC SON SSK SUF SUU
## 0.37500000 0.43333333 -0.06666667 0.27500000 0.61666667 0.81666667
## TAI TRE UTS YUN
## 0.85714286 0.74166667 0.93333333 0.11111111
##
## $pop.score$`25`
## BEN BER BRE CAM CES CHA DES
## 0.8500000 0.8583333 0.8076923 0.5916667 0.8071429 0.4083333 0.8000000
## GEL HAI HAN HOC HUN IMP INJ
## 0.5500000 0.3833333 0.6750000 0.8000000 0.8666667 0.1750000 0.8363636
## INW ITB ITP ITR JAF KAC KAG
## 0.7500000 0.6200000 0.8222222 0.8083333 -0.4500000 0.1833333 0.8083333
## KAN KAT KLP KUN LAM MAT OKI
## 0.8454545 0.4666667 0.6250000 0.6250000 0.8000000 0.6333333 0.8666667
## PAL QNC ROM SIC SON SSK SUF
## 0.7454545 0.8909091 0.7000000 0.4222222 0.1333333 0.4583333 0.6166667
## SUU TAI TRE UTS YUN
## 0.6500000 0.8428571 0.7083333 0.8750000 0.1444444
##
## $pop.score$`30`
## BEN BER BRE CAM CES CHA DES
## 0.8500000 0.8666667 0.7615385 0.5833333 0.8857143 0.2750000 0.8250000
## GEL HAI HAN HOC HUN IMP INJ
## 0.4500000 0.6666667 0.6250000 0.7142857 0.7250000 0.0750000 0.8636364
## INW ITB ITP ITR JAF KAC KAG
## 0.7000000 0.4600000 0.8444444 0.7166667 -0.0500000 0.5166667 0.8583333
## KAN KAT KLP KUN LAM MAT OKI
## 0.8181818 0.4166667 0.6250000 0.5750000 0.7000000 0.7166667 0.7833333
## PAL QNC ROM SIC SON SSK SUF
## 0.8454545 0.7454545 0.6250000 0.6222222 0.2666667 0.4500000 0.5333333
## SUU TAI TRE UTS YUN
## 0.7333333 0.6571429 0.7500000 0.8416667 0.2333333
##
## $pop.score$`35`
## BEN BER BRE CAM CES CHA DES
## 0.77500000 0.80000000 0.72307692 0.55833333 0.74285714 0.63333333 0.79375000
## GEL HAI HAN HOC HUN IMP INJ
## 0.45000000 0.69166667 0.52500000 0.74285714 0.80833333 0.45000000 0.70000000
## INW ITB ITP ITR JAF KAC KAG
## 0.67500000 0.40000000 0.77777778 0.65833333 0.35000000 0.20000000 0.76666667
## KAN KAT KLP KUN LAM MAT OKI
## 0.76363636 0.30000000 0.55000000 0.60000000 0.72222222 0.59166667 0.82500000
## PAL QNC ROM SIC SON SSK SUF
## 0.80000000 0.74545455 0.57500000 0.62222222 0.03333333 0.35000000 0.51666667
## SUU TAI TRE UTS YUN
## 0.63333333 0.65714286 0.72500000 0.84166667 0.33333333
##
## $pop.score$`40`
## BEN BER BRE CAM CES CHA DES GEL
## 0.6083333 0.8333333 0.7538462 0.5416667 0.7785714 0.6583333 0.8125000 0.3000000
## HAI HAN HOC HUN IMP INJ INW ITB
## 0.7500000 0.5250000 0.7285714 0.7750000 0.3000000 0.6818182 0.6500000 0.6600000
## ITP ITR JAF KAC KAG KAN KAT KLP
## 0.7222222 0.5750000 0.3500000 0.2666667 0.7500000 0.8545455 0.2333333 0.5750000
## KUN LAM MAT OKI PAL QNC ROM SIC
## 0.5250000 0.7000000 0.5583333 0.8083333 0.7545455 0.7909091 0.2000000 0.6333333
## SON SSK SUF SUU TAI TRE UTS YUN
## 0.4333333 0.3500000 0.4833333 0.5666667 0.6428571 0.7833333 0.7666667 0.4555556
##
## $pop.score$`45`
## BEN BER BRE CAM CES CHA DES GEL
## 0.6333333 0.8166667 0.7692308 0.5416667 0.7714286 0.6416667 0.7312500 0.3000000
## HAI HAN HOC HUN IMP INJ INW ITB
## 0.6500000 0.5250000 0.7000000 0.7333333 0.2000000 0.7818182 0.6250000 0.5800000
## ITP ITR JAF KAC KAG KAN KAT KLP
## 0.6888889 0.5833333 0.4000000 0.2833333 0.7250000 0.7454545 0.2833333 0.5000000
## KUN LAM MAT OKI PAL QNC ROM SIC
## 0.5250000 0.7000000 0.4583333 0.7916667 0.7727273 0.7636364 0.4000000 0.5777778
## SON SSK SUF SUU TAI TRE UTS YUN
## 0.4000000 0.3000000 0.4666667 0.5833333 0.6142857 0.7833333 0.8166667 0.3777778
##
## $pop.score$`50`
## BEN BER BRE CAM CES CHA DES GEL
## 0.5833333 0.7750000 0.7846154 0.4333333 0.8000000 0.7333333 0.6937500 0.3000000
## HAI HAN HOC HUN IMP INJ INW ITB
## 0.6500000 0.5000000 0.6714286 0.7750000 0.1250000 0.7000000 0.3000000 0.5600000
## ITP ITR JAF KAC KAG KAN KAT KLP
## 0.5555556 0.5583333 0.3000000 0.2166667 0.7166667 0.7818182 0.1166667 0.3750000
## KUN LAM MAT OKI PAL QNC ROM SIC
## 0.5000000 0.7222222 0.4916667 0.7500000 0.6181818 0.7727273 0.4000000 0.5777778
## SON SSK SUF SUU TAI TRE UTS YUN
## 0.4666667 0.1083333 0.3000000 0.6333333 0.6285714 0.6916667 0.7250000 0.4222222
##
## $pop.score$`55`
## BEN BER BRE CAM CES CHA DES GEL
## 0.6000000 0.7583333 0.6769231 0.4083333 0.7428571 0.7083333 0.7375000 0.3500000
## HAI HAN HOC HUN IMP INJ INW ITB
## 0.6500000 0.3250000 0.6142857 0.6666667 0.1750000 0.7363636 0.3500000 0.4200000
## ITP ITR JAF KAC KAG KAN KAT KLP
## 0.6222222 0.5083333 0.2500000 0.3166667 0.7500000 0.6000000 0.1666667 0.3750000
## KUN LAM MAT OKI PAL QNC ROM SIC
## 0.3750000 0.6444444 0.4500000 0.7250000 0.6272727 0.6363636 0.4750000 0.4777778
## SON SSK SUF SUU TAI TRE UTS YUN
## 0.5000000 0.1000000 0.3833333 0.5166667 0.7142857 0.6916667 0.7333333 0.4555556
##
## $pop.score$`60`
## BEN BER BRE CAM CES
## 5.083333e-01 7.083333e-01 7.230769e-01 3.500000e-01 6.571429e-01
## CHA DES GEL HAI HAN
## 6.583333e-01 6.312500e-01 2.500000e-01 6.083333e-01 4.000000e-01
## HOC HUN IMP INJ INW
## 5.571429e-01 7.250000e-01 7.500000e-02 6.909091e-01 4.750000e-01
## ITB ITP ITR JAF KAC
## 5.200000e-01 6.111111e-01 4.583333e-01 2.000000e-01 4.000000e-01
## KAG KAN KAT KLP KUN
## 7.500000e-01 5.818182e-01 1.666667e-01 4.250000e-01 3.750000e-01
## LAM MAT OKI PAL QNC
## 6.444444e-01 4.583333e-01 6.750000e-01 6.181818e-01 6.363636e-01
## ROM SIC SON SSK SUF
## 4.500000e-01 5.333333e-01 3.666667e-01 -1.665091e-17 4.333333e-01
## SUU TAI TRE UTS YUN
## 5.333333e-01 5.000000e-01 6.750000e-01 5.750000e-01 4.111111e-01
##
##
## $mean
## 1 5 10 15 20 25 30 35
## 0.3051345 0.5409070 0.5817042 0.6404075 0.6406431 0.6250357 0.6287852 0.6101916
## 40 45 50 55 60
## 0.6033986 0.5885236 0.5453468 0.5253546 0.5003971
##
## $pred
## $pred$x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51 52 53 54 55 56 57 58 59 60
##
## $pred$y
## [1] 0.3337460 0.3781713 0.4210422 0.4607936 0.4958603 0.5250879 0.5489646
## [8] 0.5683894 0.5842613 0.5974795 0.6087986 0.6183976 0.6263111 0.6325738
## [15] 0.6372203 0.6403146 0.6420375 0.6425993 0.6422102 0.6410803 0.6394157
## [22] 0.6374067 0.6352392 0.6330995 0.6311736 0.6295918 0.6282608 0.6270314
## [29] 0.6257544 0.6242807 0.6225021 0.6204748 0.6182960 0.6160631 0.6138732
## [36] 0.6117901 0.6097433 0.6076287 0.6053423 0.6027801 0.5998435 0.5964568
## [43] 0.5925494 0.5880511 0.5828914 0.5770513 0.5707169 0.5641257 0.5575151
## [50] 0.5511224 0.5451325 0.5395197 0.5342056 0.5291121 0.5241607 0.5192840
## [57] 0.5144585 0.5096713 0.5049098 0.5001612
##
##
## $best
## [1] 18
Run DAPC with object
Save it
saveRDS(
dapc_snp, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "italy_US", "dapc_snp.rds"
)
)
To load it
dapc_snp <- readRDS(
here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "italy_US", "dapc_snp.rds"
)
)
Check R symbols for plot
#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,35:38,43,60,62:64)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
scale_shape_manual(values=good.shapes[1:N]) +
geom_point()
## Warning: Removed 679 rows containing missing values or values outside the scale range
## (`geom_point()`).
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b", "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")
Plot using different discriminant functions PCs 1 & 2
# 1 and 2
scatter(
dapc_snp,
bg = "white",
scree.da = TRUE,
cex = 1,
pch = 20,
cex.lab = 0.1,
col = myCol,
xax = 1,
yax = 2
)
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/italy_US/dapc_euro_global_r1_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25, 53:84)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
dev.off()
good.shapes = c(1:25, 53:84)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_native_temporal.rds"))
head(sampling_loc)
## Pop_City Location Latitude Longitude Continent Abbreviation Year
## 1 Brescia Brescia 45.53373 10.204450 Europe BRE 1995
## 2 Cesena Cesena 44.15287 12.244265 Europe CES 1995
## 3 Desenzano Desenzano 45.46289 10.549140 Europe DES 1995
## 4 Bologna Bologna 44.48478 11.366584 Europe ITB 2017
## 5 Imperia Imperia 43.87159 8.003559 Europe IMP 2017
## 6 Puglia Puglia 41.12213 16.844107 Europe ITP 2016
## Region Subregion order order2 orderold
## 1 Italy (1995) West Europe 20 12 12
## 2 Italy (1995) West Europe 24 16 16
## 3 Italy (1995) West Europe 21 13 13
## 4 Italy (modern) West Europe 23 15 15
## 5 Italy (modern) West Europe 18 10 10
## 6 Italy (modern) West Europe 28 20 20
## [1] OKI OKI OKI OKI OKI OKI
## 40 Levels: BEN BER BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ... YUN
Load the csv
countr <- read.csv(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "italy_US", "DAPC_countries_italy_US.csv"
))
df <- as.data.frame(countr)
head(df)
## pop country
## 1 OKI Japan
## 2 OKI Japan
## 3 OKI Japan
## 4 OKI Japan
## 5 OKI Japan
## 6 OKI Japan
## [1] Japan Japan Japan Japan
## [5] Japan Japan Japan Japan
## [9] Japan Japan Japan Japan
## [13] China China China China
## [17] China China China China
## [21] China China China China
## [25] China China China China
## [29] China China China China
## [33] China Nepal Nepal Nepal
## [37] Nepal Taiwan Taiwan Taiwan
## [41] Taiwan Taiwan Taiwan Taiwan
## [45] China China China China
## [49] China China China China
## [53] China China China China
## [57] Vietnam Vietnam Vietnam Vietnam
## [61] Trentino Trentino Trentino Trentino
## [65] Trentino Trentino Trentino Trentino
## [69] Trentino Trentino Trentino Trentino
## [73] Malaysia Malaysia Malaysia Malaysia
## [77] Sicilia Sicilia Sicilia Sicilia
## [81] Sicilia Sicilia Sicilia Sicilia
## [85] Sicilia Vietnam Vietnam Vietnam
## [89] Vietnam Vietnam Vietnam Vietnam
## [93] Vietnam Vietnam Vietnam Vietnam
## [97] Vietnam Vietnam Vietnam Vietnam
## [101] Vietnam Vietnam Vietnam Thailand
## [105] Thailand Thailand Thailand Thailand
## [109] Thailand Thailand Thailand Thailand
## [113] Thailand Thailand Thailand Thailand
## [117] Thailand Thailand Thailand Thailand
## [121] Thailand Thailand Thailand Thailand
## [125] Thailand Brescia Brescia Brescia
## [129] Desenzano Desenzano Cesena Cesena
## [133] Japan Japan Japan Japan
## [137] Japan Japan Japan Japan
## [141] Japan Japan Japan Japan
## [145] Japan Japan Japan Japan
## [149] Japan Japan Japan Japan
## [153] Japan Japan Japan Brescia
## [157] Brescia Brescia Brescia Brescia
## [161] Brescia Brescia Brescia Brescia
## [165] Brescia Desenzano Desenzano Desenzano
## [169] Desenzano Desenzano Desenzano Desenzano
## [173] Cesena Cesena Cesena Cesena
## [177] Cesena Cesena Cesena Imperia
## [181] Imperia Imperia Imperia Rome (Sapienza)
## [185] Rome (Sapienza) Rome (Sapienza) Rome (Sapienza) Cambodia
## [189] Cambodia Cambodia Cambodia Cambodia
## [193] Cambodia Japan Cambodia Cambodia
## [197] Cambodia Cambodia Cambodia Cambodia
## [201] Cesena Cesena Cesena Japan
## [205] Cesena Cesena Desenzano Desenzano
## [209] Desenzano Japan Japan Desenzano
## [213] Desenzano Desenzano Desenzano Japan
## [217] Japan Japan Japan Japan
## [221] Japan Japan Japan India
## [225] India India India India
## [229] India India India India
## [233] India India India Thailand
## [237] Thailand Thailand Thailand Thailand
## [241] Thailand Thailand Thailand Thailand
## [245] USA USA USA USA
## [249] USA USA USA USA
## [253] USA USA USA USA
## [257] USA USA USA USA
## [261] USA USA USA USA
## [265] USA USA USA Bhutan
## [269] Bhutan Nepal Nepal Sri Lanka
## [273] Sri Lanka Thailand Thailand Thailand
## [277] Thailand Thailand Thailand Thailand
## [281] Thailand Thailand Thailand Thailand
## [285] Indonesia Indonesia Indonesia Indonesia
## [289] Indonesia Indonesia Indonesia Indonesia
## [293] Indonesia Indonesia Indonesia Indonesia
## [297] Maldives Maldives Maldives Maldives
## [301] Bologna Bologna Bologna Bologna
## [305] Bologna Puglia Puglia Puglia
## [309] Puglia Puglia Puglia Puglia
## [313] Puglia Puglia Rome (Trappola) Rome (Trappola)
## [317] Rome (Trappola) Rome (Trappola) Rome (Trappola) Rome (Trappola)
## [321] Rome (Trappola) Rome (Trappola) Rome (Trappola) Rome (Trappola)
## [325] Rome (Trappola) Rome (Trappola) Indonesia Indonesia
## [329] Indonesia Indonesia Indonesia Indonesia
## [333] Indonesia Indonesia Indonesia Indonesia
## [337] Indonesia Indonesia Indonesia Indonesia
## [341] Indonesia Malaysia Malaysia Malaysia
## [345] Malaysia Malaysia Malaysia Malaysia
## [349] Malaysia Malaysia Malaysia Malaysia
## [353] Malaysia
## 24 Levels: Bhutan Bologna Brescia Cambodia Cesena China Desenzano ... Vietnam
Save the genind object
saveRDS(snp2, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "italy_US", "snp_country.rds"
))
Load the genind object
snp_country <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "italy_US", "snp_country.rds"
))
Scale
## NULL
## /// GENIND OBJECT /////////
##
## // 5 individuals; 3 loci; 5 alleles; size: 11.5 Kb
##
## // Basic content
## @tab: 5 x 5 matrix of allele counts
## @loc.n.all: number of alleles per locus (range: 1-2)
## @loc.fac: locus factor for the 5 columns of @tab
## @all.names: list of allele names for each locus
## @ploidy: ploidy of each individual (range: 2-2)
## @type: codom
## @call: .local(x = x, i = i, j = j, drop = drop)
##
## // Optional content
## @pop: population of each individual (group size range: 5-5)
## @strata: a data frame with 4 columns ( sex, phenotype, pat, mat )
## @other: a list containing: sex phenotype pat mat
grp <- find.clusters(snp_country, max.n.clust=10)
#retained 350
#Choose the number of clusters (>=2): 4
Save the genind object
saveRDS(grp, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "grp_country1.rds"
))
Load the genind object
grp <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "grp_country1.rds"
))
Save the genind object
saveRDS(dapc_country_1, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "dapc_country_1.rds"
))
Load the genind object
dapc_country_1 <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "dapc_country_1.rds"
))
Calculating the optimum PC number to rerun DAPC
optim.a.score(dapc_country_1, n.pca=1:ncol(dapc_country_1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=10) #calculating optimal number of PCs
## $pop.score
## $pop.score$`1`
## 1 2 3 4
## 0.0000000 0.8078431 0.3684211 0.5814815
##
## $pop.score$`50`
## 1 2 3 4
## 0.7034483 0.5274510 0.4657895 0.4703704
##
## $pop.score$`100`
## 1 2 3 4
## 0.4551724 0.3862745 0.3938596 0.3962963
##
## $pop.score$`150`
## 1 2 3 4
## 0.2310345 0.2774510 0.2640351 0.2361111
##
## $pop.score$`200`
## 1 2 3 4
## 0.1758621 0.1833333 0.1675439 0.1907407
##
## $pop.score$`250`
## 1 2 3 4
## 0.1379310 0.1294118 0.1254386 0.1342593
##
## $pop.score$`300`
## 1 2 3 4
## 0.04482759 0.06274510 0.06491228 0.04814815
##
##
## $mean
## 1 50 100 150 200 250 300
## 0.43943642 0.54176478 0.40790072 0.25215792 0.17937000 0.13176016 0.05515828
##
## $pred
## $pred$x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300
##
## $pred$y
## [1] 0.43943642 0.44272601 0.44601287 0.44929395 0.45256626 0.45582676
## [7] 0.45907244 0.46230027 0.46550724 0.46869033 0.47184652 0.47497279
## [13] 0.47806612 0.48112348 0.48414187 0.48711825 0.49004961 0.49293294
## [19] 0.49576520 0.49854339 0.50126448 0.50392545 0.50652328 0.50905496
## [25] 0.51151746 0.51390776 0.51622284 0.51845969 0.52061528 0.52268660
## [31] 0.52467062 0.52656433 0.52836470 0.53006872 0.53167336 0.53317561
## [37] 0.53457245 0.53586085 0.53703780 0.53810028 0.53904527 0.53986975
## [43] 0.54057069 0.54114508 0.54158991 0.54190214 0.54207876 0.54211676
## [49] 0.54201310 0.54176478 0.54136980 0.54083034 0.54014961 0.53933082
## [55] 0.53837717 0.53729187 0.53607812 0.53473915 0.53327815 0.53169833
## [61] 0.53000289 0.52819506 0.52627803 0.52425502 0.52212923 0.51990386
## [67] 0.51758214 0.51516725 0.51266242 0.51007085 0.50739575 0.50464033
## [73] 0.50180779 0.49890134 0.49592419 0.49287954 0.48977062 0.48660061
## [79] 0.48337274 0.48009020 0.47675621 0.47337398 0.46994671 0.46647761
## [85] 0.46296988 0.45942675 0.45585140 0.45224706 0.44861693 0.44496421
## [91] 0.44129212 0.43760386 0.43390264 0.43019167 0.42647416 0.42275331
## [97] 0.41903234 0.41531444 0.41160283 0.40790072 0.40421088 0.40053438
## [103] 0.39687184 0.39322391 0.38959122 0.38597440 0.38237408 0.37879089
## [109] 0.37522548 0.37167847 0.36815049 0.36464218 0.36115418 0.35768711
## [115] 0.35424161 0.35081831 0.34741785 0.34404086 0.34068797 0.33735981
## [121] 0.33405702 0.33078023 0.32753008 0.32430720 0.32111222 0.31794577
## [127] 0.31480849 0.31170101 0.30862397 0.30557800 0.30256372 0.29958178
## [133] 0.29663281 0.29371744 0.29083630 0.28799003 0.28517926 0.28240462
## [139] 0.27966675 0.27696628 0.27430384 0.27168008 0.26909561 0.26655107
## [145] 0.26404710 0.26158433 0.25916339 0.25678492 0.25444955 0.25215792
## [151] 0.24991042 0.24770658 0.24554569 0.24342704 0.24134992 0.23931362
## [157] 0.23731744 0.23536066 0.23344257 0.23156247 0.22971965 0.22791340
## [163] 0.22614300 0.22440776 0.22270696 0.22103990 0.21940586 0.21780413
## [169] 0.21623401 0.21469479 0.21318576 0.21170621 0.21025543 0.20883272
## [175] 0.20743736 0.20606864 0.20472586 0.20340831 0.20211527 0.20084605
## [181] 0.19959992 0.19837619 0.19717414 0.19599307 0.19483226 0.19369101
## [187] 0.19256861 0.19146434 0.19037751 0.18930739 0.18825329 0.18721450
## [193] 0.18619030 0.18517998 0.18418284 0.18319817 0.18222526 0.18126340
## [199] 0.18031189 0.17937000 0.17843706 0.17751248 0.17659568 0.17568608
## [205] 0.17478313 0.17388625 0.17299486 0.17210840 0.17122629 0.17034797
## [211] 0.16947286 0.16860039 0.16772999 0.16686109 0.16599311 0.16512550
## [217] 0.16425766 0.16338904 0.16251907 0.16164716 0.16077276 0.15989529
## [223] 0.15901417 0.15812884 0.15723872 0.15634325 0.15544186 0.15453396
## [229] 0.15361900 0.15269639 0.15176557 0.15082598 0.14987702 0.14891814
## [235] 0.14794877 0.14696833 0.14597625 0.14497195 0.14395488 0.14292446
## [241] 0.14188011 0.14082126 0.13974736 0.13865781 0.13755205 0.13642952
## [247] 0.13528964 0.13413183 0.13295553 0.13176016 0.13054532 0.12931124
## [253] 0.12805831 0.12678693 0.12549748 0.12419037 0.12286598 0.12152471
## [259] 0.12016695 0.11879309 0.11740354 0.11599868 0.11457890 0.11314460
## [265] 0.11169618 0.11023402 0.10875852 0.10727008 0.10576908 0.10425592
## [271] 0.10273099 0.10119470 0.09964742 0.09808955 0.09652150 0.09494364
## [277] 0.09335638 0.09176010 0.09015521 0.08854209 0.08692114 0.08529275
## [283] 0.08365732 0.08201523 0.08036689 0.07871268 0.07705300 0.07538824
## [289] 0.07371880 0.07204506 0.07036743 0.06868630 0.06700205 0.06531509
## [295] 0.06362580 0.06193458 0.06024183 0.05854793 0.05685328 0.05515828
##
##
## $best
## [1] 48
Run DAPC with object
Save it
saveRDS(
dapc_snp_country, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "dapc_snp_country.rds"
)
)
To load it
dapc_snp_country <- readRDS(
here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "dapc_snp_country.rds"
)
)
Plot with new colors - by region
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/italy_US/dapc_snp_country2_euro_global_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,43,60,62:64)
myCol2 <- c("#146c45", "#a113b2", "magenta", "#2524f9", "magenta", "#c41A1C", "magenta", "#a113b2", "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#146c45", "#c41A1C", "#2524f9", "#a113b2", "#66C2A5", "#2524f9")
op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
good.shapes = c(1:25,35:38,43,60,62:64)
myCol2 <- c("#146c45", "#a113b2", "magenta", "#2524f9", "magenta", "#c41A1C", "magenta", "#a113b2", "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#146c45", "#c41A1C", "#2524f9", "#a113b2", "#66C2A5", "#2524f9")
op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
Plot with new colors - by region
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/italy_US/dapc_snp_country2_euro_global_PC1_3.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,43,60,62:64)
myCol2 <- c("#146c45", "#a113b2", "magenta", "#2524f9", "magenta", "#c41A1C", "magenta", "#a113b2", "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#146c45", "#c41A1C", "#2524f9", "#a113b2", "#66C2A5", "#2524f9")
op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
dev.off()
good.shapes = c(1:25,35:38,43,60,62:64)
myCol2 <- c("#146c45", "#a113b2", "magenta", "#2524f9", "magenta", "#c41A1C", "magenta", "#a113b2", "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#146c45", "#c41A1C", "#2524f9", "#a113b2", "#66C2A5", "#2524f9")
op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
Set 3 Using r2<0.01 LD pruning and MAF 1% (scaled and plotting additional PCs)
cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--keep-fam output/neuroadmixture/native_albania_croatia_greece_US.txt \
--bfile output/snps_sets/r2_0.01_b \
--make-bed \
--out output/dapc/MAF_1/dapc_albania_croatia_greece_US \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_albania_croatia_greece_US.log
22642 variants loaded from .bim file. –keep-fam: 314 people remaining. Total genotyping rate in remaining samples is 0.968879. 22642 variants and 314 people pass filters and QC.
Convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/MAF_1/dapc_albania_croatia_greece_US \
--recodeA \
--out output/dapc/MAF_1/dapc_albania_croatia_greece_US \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_albania_croatia_greece_US.log
22642 variants loaded from .bim file. 22642 variants and 314 people pass filters and QC.
Import the data and covert it to genind format
# import the data
snp <-
read.PLINK(
here("euro_global/output/dapc/MAF_1/dapc_albania_croatia_greece_US.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
##
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## [1] 314
## [1] 22642
## [1] 36
## [1] "1001" "1002" "1003" "1004" "1005" "1006" "1007" "1008" "1009" "1010"
## [11] "1011" "1012" "1053" "1054" "1055" "1056" "1057" "1058" "1059" "1060"
## [21] "1061" "1062" "1063" "1064" "1089" "1090" "1091" "1092" "1093" "1094"
## [31] "1095" "1101" "1102" "1103" "1105" "1106" "1107" "1161" "1162" "1163"
## [41] "1165" "1166" "1167" "1168" "1169" "1170" "1171" "1172" "1173" "1174"
## [51] "1175" "1176" "1177" "1178" "1179" "1180" "1181" "1182" "1183" "1184"
## [61] "1214" "1215" "1216" "1217" "1238" "1239" "1240" "1241" "1242" "1243"
## [71] "1244" "1245" "1246" "1247" "1249" "1250" "1251" "1252" "1253" "1254"
## [81] "1255" "1256" "1257" "1258" "1259" "1260" "1261" "1262" "1263" "1264"
## [91] "1265" "1266" "1267" "1268" "1270" "1271" "1272" "1273" "1274" "1276"
## [101] "1282" "1283" "1285" "1286" "1325" "1326" "1328" "1329" "1330" "1331"
## [111] "1332" "1333" "1334" "1335" "1336" "1373" "1374" "1375" "1376" "1377"
## [121] "1378" "1379" "1380" "1381" "1382" "1383" "1384" "193" "194" "195"
## [131] "196" "2174" "2175" "2176" "2177" "2178" "2179" "217" "2180" "2181"
## [141] "2182" "2183" "2184" "2185" "218" "219" "2202" "220" "221" "222"
## [151] "223" "224" "225" "226" "227" "230" "255" "256" "257" "258"
## [161] "261" "262" "263" "264" "265" "266" "267" "268" "269" "270"
## [171] "271" "272" "273" "275" "276" "277" "278" "294" "295" "296"
## [181] "297" "298" "299" "301" "302" "303" "304" "305" "435" "436"
## [191] "437" "438" "439" "440" "441" "442" "443" "444" "445" "446"
## [201] "602" "603" "604" "607" "609" "610" "623" "624" "625" "626"
## [211] "627" "628" "629" "630" "631" "632" "633" "666" "669" "670"
## [221] "671" "672" "673" "674" "675" "676" "677" "678" "679" "680"
## [231] "681" "682" "683" "711" "712" "713" "714" "715" "716" "717"
## [241] "718" "719" "720" "721" "722" "723" "724" "725" "726" "727"
## [251] "728" "729" "730" "731" "732" "733" "735" "736" "737" "741"
## [261] "742" "743" "744" "745" "746" "801" "802" "803" "804" "805"
## [271] "806" "807" "808" "809" "810" "916" "917" "918" "919" "920"
## [281] "921" "922" "923" "924" "925" "926" "927" "964" "965" "966"
## [291] "967" "972" "973" "975" "976" "977" "978" "979" "980" "981"
## [301] "982" "983" "984" "985" "986" "987" "988" "989" "990" "991"
## [311] "992" "993" "994" "995"
## Starting gl2gi
## Processing genlight object with SNP data
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi
Save it
To load it
Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.
xvalDapc(snp3, populations, n.pca.max = 200, n.da = NULL,
training.set = 0.9, result = c("groupMean", "overall"),
center = TRUE, scale = FALSE,
n.pca=NULL, n.rep = 40, xval.plot = TRUE)
## $`Cross-Validation Results`
## n.pca success
## 1 20 0.7121733
## 2 20 0.7586347
## 3 20 0.6918624
## 4 20 0.7410415
## 5 20 0.7175208
## 6 20 0.7450930
## 7 20 0.7247377
## 8 20 0.7209678
## 9 20 0.7653311
## 10 20 0.7004817
## 11 20 0.6883289
## 12 20 0.7660484
## 13 20 0.7263472
## 14 20 0.6799508
## 15 20 0.7295107
## 16 20 0.6956206
## 17 20 0.7390754
## 18 20 0.7329402
## 19 20 0.7433662
## 20 20 0.7288815
## 21 20 0.7191869
## 22 20 0.7221908
## 23 20 0.7323017
## 24 20 0.6526194
## 25 20 0.7740501
## 26 20 0.6995899
## 27 20 0.7732676
## 28 20 0.7341519
## 29 20 0.7722779
## 30 20 0.6951946
## 31 20 0.7453346
## 32 20 0.7078346
## 33 20 0.6717171
## 34 20 0.7100867
## 35 20 0.6456569
## 36 20 0.7573269
## 37 20 0.7407391
## 38 20 0.7231782
## 39 20 0.7625194
## 40 20 0.7108108
## 41 40 0.7872334
## 42 40 0.8047725
## 43 40 0.7506963
## 44 40 0.7829471
## 45 40 0.7737122
## 46 40 0.7607249
## 47 40 0.7670157
## 48 40 0.7880247
## 49 40 0.7763592
## 50 40 0.7683322
## 51 40 0.7126060
## 52 40 0.7412828
## 53 40 0.7988809
## 54 40 0.7512279
## 55 40 0.7894947
## 56 40 0.7298717
## 57 40 0.7756443
## 58 40 0.7635027
## 59 40 0.7412984
## 60 40 0.7625959
## 61 40 0.7571691
## 62 40 0.7736241
## 63 40 0.7422269
## 64 40 0.7367106
## 65 40 0.6928590
## 66 40 0.7687290
## 67 40 0.7787173
## 68 40 0.7284152
## 69 40 0.7464795
## 70 40 0.7623122
## 71 40 0.7983358
## 72 40 0.7745735
## 73 40 0.7524213
## 74 40 0.7544142
## 75 40 0.7568142
## 76 40 0.7862862
## 77 40 0.7027281
## 78 40 0.7609155
## 79 40 0.7411608
## 80 40 0.7065247
## 81 60 0.7117034
## 82 60 0.8206816
## 83 60 0.7835784
## 84 60 0.6940216
## 85 60 0.7856909
## 86 60 0.7854925
## 87 60 0.7762663
## 88 60 0.7500422
## 89 60 0.7215764
## 90 60 0.7938463
## 91 60 0.7446195
## 92 60 0.8283497
## 93 60 0.8073296
## 94 60 0.7742530
## 95 60 0.6899884
## 96 60 0.8169860
## 97 60 0.8125671
## 98 60 0.7964840
## 99 60 0.7750554
## 100 60 0.7800412
## 101 60 0.7627904
## 102 60 0.8183007
## 103 60 0.7678630
## 104 60 0.7331934
## 105 60 0.7968416
## 106 60 0.7832020
## 107 60 0.7113562
## 108 60 0.7777544
## 109 60 0.7966737
## 110 60 0.7598681
## 111 60 0.7681694
## 112 60 0.7930614
## 113 60 0.7706816
## 114 60 0.7649849
## 115 60 0.7740342
## 116 60 0.7145804
## 117 60 0.8203975
## 118 60 0.7679243
## 119 60 0.7829739
## 120 60 0.7852241
## 121 80 0.7716737
## 122 80 0.7270440
## 123 80 0.7882207
## 124 80 0.7269758
## 125 80 0.7275444
## 126 80 0.7418149
## 127 80 0.7753239
## 128 80 0.7122958
## 129 80 0.7425335
## 130 80 0.7573267
## 131 80 0.7649860
## 132 80 0.7173098
## 133 80 0.8571078
## 134 80 0.7978350
## 135 80 0.7109185
## 136 80 0.7289714
## 137 80 0.7410685
## 138 80 0.7631507
## 139 80 0.7340155
## 140 80 0.7674213
## 141 80 0.7318715
## 142 80 0.6978466
## 143 80 0.7661969
## 144 80 0.8400444
## 145 80 0.7481437
## 146 80 0.7455644
## 147 80 0.7636220
## 148 80 0.8302638
## 149 80 0.7188299
## 150 80 0.7942927
## 151 80 0.7202213
## 152 80 0.7635958
## 153 80 0.7534097
## 154 80 0.7398664
## 155 80 0.7243697
## 156 80 0.7272100
## 157 80 0.8030229
## 158 80 0.7645396
## 159 80 0.7650083
## 160 80 0.7839140
## 161 100 0.7428077
## 162 100 0.7369927
## 163 100 0.7509891
## 164 100 0.6631589
## 165 100 0.7152859
## 166 100 0.7313603
## 167 100 0.7140929
## 168 100 0.7083683
## 169 100 0.7190892
## 170 100 0.7956437
## 171 100 0.7561975
## 172 100 0.7708699
## 173 100 0.7579423
## 174 100 0.7074872
## 175 100 0.7812829
## 176 100 0.6964257
## 177 100 0.7625257
## 178 100 0.7078023
## 179 100 0.7369384
## 180 100 0.7309198
## 181 100 0.7822798
## 182 100 0.7727328
## 183 100 0.7325251
## 184 100 0.6587404
## 185 100 0.7311424
## 186 100 0.7127013
## 187 100 0.7770396
## 188 100 0.7460872
## 189 100 0.7988110
## 190 100 0.7309351
## 191 100 0.7485477
## 192 100 0.7217729
## 193 100 0.7802229
## 194 100 0.6795781
## 195 100 0.7243270
## 196 100 0.7139239
## 197 100 0.7027385
## 198 100 0.7604313
## 199 100 0.6487817
## 200 100 0.7528653
## 201 120 0.6535000
## 202 120 0.7735360
## 203 120 0.7083683
## 204 120 0.7543910
## 205 120 0.7547656
## 206 120 0.7105639
## 207 120 0.6470180
## 208 120 0.7002172
## 209 120 0.7606140
## 210 120 0.6569351
## 211 120 0.7610833
## 212 120 0.8014082
## 213 120 0.7396046
## 214 120 0.7459742
## 215 120 0.7159671
## 216 120 0.6845434
## 217 120 0.6523006
## 218 120 0.7464957
## 219 120 0.7606612
## 220 120 0.7586363
## 221 120 0.8278916
## 222 120 0.7433367
## 223 120 0.7065314
## 224 120 0.6771465
## 225 120 0.7456203
## 226 120 0.8095377
## 227 120 0.7199033
## 228 120 0.6962063
## 229 120 0.7338055
## 230 120 0.7225392
## 231 120 0.7245345
## 232 120 0.7657571
## 233 120 0.7756660
## 234 120 0.7656546
## 235 120 0.7025064
## 236 120 0.7486481
## 237 120 0.7329994
## 238 120 0.6776547
## 239 120 0.7169407
## 240 120 0.7792343
## 241 140 0.5953957
## 242 140 0.6932313
## 243 140 0.5946500
## 244 140 0.5992452
## 245 140 0.6186539
## 246 140 0.6145183
## 247 140 0.6839249
## 248 140 0.6430357
## 249 140 0.6266671
## 250 140 0.6692819
## 251 140 0.6345279
## 252 140 0.6683775
## 253 140 0.6403557
## 254 140 0.6472984
## 255 140 0.7664450
## 256 140 0.6961100
## 257 140 0.6697948
## 258 140 0.6627671
## 259 140 0.6674179
## 260 140 0.6294109
## 261 140 0.6401575
## 262 140 0.6056796
## 263 140 0.7561352
## 264 140 0.5294660
## 265 140 0.6332691
## 266 140 0.6751694
## 267 140 0.6863031
## 268 140 0.6639830
## 269 140 0.5989413
## 270 140 0.6473471
## 271 140 0.6995742
## 272 140 0.6961458
## 273 140 0.6987286
## 274 140 0.5928510
## 275 140 0.6832247
## 276 140 0.5680849
## 277 140 0.6930560
## 278 140 0.6518435
## 279 140 0.6510469
## 280 140 0.6939725
## 281 160 0.5259826
## 282 160 0.5273388
## 283 160 0.4974642
## 284 160 0.5130137
## 285 160 0.6147667
## 286 160 0.5140835
## 287 160 0.5214828
## 288 160 0.5593421
## 289 160 0.5502144
## 290 160 0.5089491
## 291 160 0.5217682
## 292 160 0.5915813
## 293 160 0.5858521
## 294 160 0.5535258
## 295 160 0.4531839
## 296 160 0.5319293
## 297 160 0.5298387
## 298 160 0.5638113
## 299 160 0.4681568
## 300 160 0.5030138
## 301 160 0.6223696
## 302 160 0.5675059
## 303 160 0.5906784
## 304 160 0.5598905
## 305 160 0.5190702
## 306 160 0.5134123
## 307 160 0.5304884
## 308 160 0.5295380
## 309 160 0.5107148
## 310 160 0.5034419
## 311 160 0.5927687
## 312 160 0.5803295
## 313 160 0.5392707
## 314 160 0.5082775
## 315 160 0.5629371
## 316 160 0.4415218
## 317 160 0.5358065
## 318 160 0.4849450
## 319 160 0.5568267
## 320 160 0.5379564
##
## $`Median and Confidence Interval for Random Chance`
## 2.5% 50% 97.5%
## 0.02446939 0.04021864 0.06202084
##
## $`Mean Successful Assignment by Number of PCs of PCA`
## 20 40 60 80 100 120 140 160
## 0.7239487 0.7586910 0.7724612 0.7558843 0.7340591 0.7314674 0.6521522 0.5355762
##
## $`Number of PCs Achieving Highest Mean Success`
## [1] "60"
##
## $`Root Mean Squared Error by Number of PCs of PCA`
## 20 40 60 80 100 120 140 160
## 0.2778009 0.2427225 0.2301483 0.2466599 0.2683058 0.2718915 0.3509615 0.4661233
##
## $`Number of PCs Achieving Lowest MSE`
## [1] "60"
##
## $DAPC
## #################################################
## # Discriminant Analysis of Principal Components #
## #################################################
## class: dapc
## $call: dapc.data.frame(x = as.data.frame(x), grp = ..1, n.pca = ..2,
## n.da = ..3)
##
## $n.pca: 60 first PCs of PCA used
## $n.da: 23 discriminant functions saved
## $var (proportion of conserved variance): 0.408
##
## $eig (eigenvalues): 2269 1325 823.1 592.4 450 ...
##
## vector length content
## 1 $eig 23 eigenvalues
## 2 $grp 353 prior group assignment
## 3 $prior 24 prior group probabilities
## 4 $assign 353 posterior group assignment
## 5 $pca.cent 45280 centring vector of PCA
## 6 $pca.norm 45280 scaling vector of PCA
## 7 $pca.eig 352 eigenvalues of PCA
##
## data.frame nrow ncol content
## 1 $tab 353 60 retained PCs of PCA
## 2 $means 24 60 group means
## 3 $loadings 60 23 loadings of variables
## 4 $ind.coord 353 23 coordinates of individuals (principal components)
## 5 $grp.coord 24 23 coordinates of groups
## 6 $posterior 353 24 posterior membership probabilities
## 7 $pca.loadings 45280 60 PCA loadings of original variables
## 8 $var.contr 45280 23 contribution of original variables
$n.pca: 60 first PCs of PCA used $n.da: 35 discriminant functions saved $var (proportion of conserved variance): 0.408
Run dapc using these #s from CV
Save the genind object
saveRDS(dapc1, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc1.rds"
))
Load the genind object
dapc1 <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc1.rds"
))
Calculating the optimum PC number to rerun DAPC
optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs
## $pop.score
## $pop.score$`1`
## ALD ALV BEN BER CAM CHA
## 0.49000000 0.24583333 0.88750000 0.65000000 0.37500000 0.25833333
## CRO GEL GRA GRC HAI HAN
## 0.46250000 0.00000000 0.78181818 -0.01000000 0.50416667 1.00000000
## HOC HUN INJ INW JAF KAC
## 0.28571429 0.71250000 0.86818182 1.00000000 0.00000000 0.00000000
## KAG KAN KAT KLP KUN LAM
## 0.57083333 0.52272727 0.00000000 0.00000000 0.00000000 0.00000000
## MAT OKI PAL QNC SON SSK
## 0.49583333 0.62916667 0.54545455 0.07272727 0.00000000 0.51250000
## SUF SUU TAI TIR UTS YUN
## 0.00000000 1.00000000 0.00000000 0.00000000 0.62083333 0.00000000
##
## $pop.score$`5`
## ALD ALV BEN BER CAM CHA
## 0.65000000 0.46666667 0.88333333 0.57500000 0.54583333 0.30000000
## CRO GEL GRA GRC HAI HAN
## 0.57916667 -0.07500000 0.94090909 0.84500000 0.39166667 0.90000000
## HOC HUN INJ INW JAF KAC
## 0.23571429 0.91666667 0.94545455 0.96250000 -0.12500000 -0.04166667
## KAG KAN KAT KLP KUN LAM
## 0.75000000 0.70454545 -0.05833333 -0.07500000 0.96250000 -0.02777778
## MAT OKI PAL QNC SON SSK
## 0.70833333 0.79583333 0.89545455 0.95909091 -0.05000000 0.67500000
## SUF SUU TAI TIR UTS YUN
## 0.30833333 0.90833333 0.91428571 -0.05000000 0.96250000 -0.02222222
##
## $pop.score$`10`
## ALD ALV BEN BER CAM CHA CRO
## 0.7350000 0.4625000 0.8583333 0.9208333 0.6625000 0.6333333 0.7291667
## GEL GRA GRC HAI HAN HOC HUN
## 0.7250000 0.8454545 0.8400000 0.5416667 0.8250000 0.6071429 0.8291667
## INJ INW JAF KAC KAG KAN KAT
## 0.9272727 0.7625000 -0.1750000 -0.0750000 0.8916667 0.7318182 -0.1083333
## KLP KUN LAM MAT OKI PAL QNC
## 0.9125000 0.8500000 0.3722222 0.7291667 0.9166667 0.9272727 0.7954545
## SON SSK SUF SUU TAI TIR UTS
## -0.2000000 0.6500000 0.2833333 0.9166667 0.9000000 0.4125000 0.9208333
## YUN
## -0.1111111
##
## $pop.score$`15`
## ALD ALV BEN BER CAM CHA
## 0.65000000 0.42916667 0.83750000 0.92916667 0.63333333 0.52916667
## CRO GEL GRA GRC HAI HAN
## 0.78750000 0.60000000 0.89545455 0.82500000 0.52083333 0.81250000
## HOC HUN INJ INW JAF KAC
## 0.83571429 0.90000000 0.90454545 0.75000000 -0.32500000 -0.08333333
## KAG KAN KAT KLP KUN LAM
## 0.89166667 0.90909091 0.55000000 0.85000000 0.82500000 0.55555556
## MAT OKI PAL QNC SON SSK
## 0.73333333 0.92500000 0.95000000 0.90000000 0.20000000 0.44583333
## SUF SUU TAI TIR UTS YUN
## 0.71666667 0.77500000 0.88571429 0.31250000 0.90833333 0.09444444
##
## $pop.score$`20`
## ALD ALV BEN BER CAM CHA
## 0.67000000 0.48333333 0.84583333 0.89166667 0.64166667 0.58333333
## CRO GEL GRA GRC HAI HAN
## 0.81250000 0.47500000 0.88181818 0.74000000 0.73333333 0.63750000
## HOC HUN INJ INW JAF KAC
## 0.85000000 0.86250000 0.83636364 0.77500000 -0.42500000 0.13333333
## KAG KAN KAT KLP KUN LAM
## 0.87500000 0.85000000 0.48333333 0.76250000 0.76250000 0.72222222
## MAT OKI PAL QNC SON SSK
## 0.73333333 0.87500000 0.85454545 0.86363636 0.40000000 0.45416667
## SUF SUU TAI TIR UTS YUN
## 0.62500000 0.84166667 0.87857143 0.30000000 0.85833333 0.08888889
##
## $pop.score$`25`
## ALD ALV BEN BER CAM CHA
## 0.73000000 0.58750000 0.82083333 0.85416667 0.59166667 0.67500000
## CRO GEL GRA GRC HAI HAN
## 0.77083333 0.40000000 0.80454545 0.75500000 0.65833333 0.75000000
## HOC HUN INJ INW JAF KAC
## 0.80714286 0.81250000 0.85000000 0.73750000 -0.05000000 0.40000000
## KAG KAN KAT KLP KUN LAM
## 0.87083333 0.77727273 0.47500000 0.75000000 0.65000000 0.80555556
## MAT OKI PAL QNC SON SSK
## 0.77500000 0.85000000 0.80454545 0.85454545 0.18333333 0.43750000
## SUF SUU TAI TIR UTS YUN
## 0.60833333 0.78333333 0.75714286 0.47500000 0.85416667 0.07777778
##
## $pop.score$`30`
## ALD ALV BEN BER CAM CHA CRO GEL
## 0.6800000 0.6625000 0.8041667 0.7666667 0.5666667 0.6500000 0.8708333 0.3750000
## GRA GRC HAI HAN HOC HUN INJ INW
## 0.8272727 0.6750000 0.7875000 0.6125000 0.7571429 0.8333333 0.8363636 0.6750000
## JAF KAC KAG KAN KAT KLP KUN LAM
## 0.4250000 0.2416667 0.8041667 0.7909091 0.4250000 0.6500000 0.5500000 0.7944444
## MAT OKI PAL QNC SON SSK SUF SUU
## 0.6041667 0.8583333 0.8090909 0.8590909 0.2333333 0.2416667 0.5333333 0.6750000
## TAI TIR UTS YUN
## 0.7642857 0.6125000 0.8125000 0.5722222
##
## $pop.score$`35`
## ALD ALV BEN BER CAM CHA CRO GEL
## 0.6600000 0.5291667 0.6541667 0.7958333 0.5083333 0.7083333 0.8166667 0.3250000
## GRA GRC HAI HAN HOC HUN INJ INW
## 0.7409091 0.7200000 0.7375000 0.5625000 0.7214286 0.8458333 0.8136364 0.5875000
## JAF KAC KAG KAN KAT KLP KUN LAM
## 0.4250000 0.3666667 0.7833333 0.8000000 0.3333333 0.6375000 0.5625000 0.7722222
## MAT OKI PAL QNC SON SSK SUF SUU
## 0.6666667 0.8250000 0.7590909 0.7727273 0.1666667 0.2458333 0.4416667 0.6416667
## TAI TIR UTS YUN
## 0.7142857 0.5500000 0.7500000 0.5944444
##
## $pop.score$`40`
## ALD ALV BEN BER CAM CHA CRO GEL
## 0.7200000 0.5375000 0.5875000 0.7958333 0.5208333 0.8041667 0.7291667 0.3250000
## GRA GRC HAI HAN HOC HUN INJ INW
## 0.7590909 0.6650000 0.6958333 0.5250000 0.6928571 0.7250000 0.7454545 0.5500000
## JAF KAC KAG KAN KAT KLP KUN LAM
## 0.4500000 0.4000000 0.7791667 0.7681818 0.3000000 0.5500000 0.5375000 0.7166667
## MAT OKI PAL QNC SON SSK SUF SUU
## 0.5500000 0.8000000 0.8090909 0.7136364 0.1500000 0.0500000 0.4000000 0.6416667
## TAI TIR UTS YUN
## 0.6285714 0.5125000 0.7791667 0.4944444
##
## $pop.score$`45`
## ALD ALV BEN BER CAM CHA CRO GEL
## 0.6150000 0.5416667 0.6166667 0.8208333 0.5000000 0.7541667 0.7458333 0.2500000
## GRA GRC HAI HAN HOC HUN INJ INW
## 0.6727273 0.5950000 0.7625000 0.5125000 0.6214286 0.7625000 0.7454545 0.4875000
## JAF KAC KAG KAN KAT KLP KUN LAM
## 0.3250000 0.4000000 0.7250000 0.7181818 0.3000000 0.5125000 0.4125000 0.7333333
## MAT OKI PAL QNC SON SSK SUF SUU
## 0.5125000 0.7666667 0.7363636 0.7590909 0.1000000 0.1000000 0.4500000 0.6000000
## TAI TIR UTS YUN
## 0.6285714 0.5375000 0.7458333 0.4722222
##
## $pop.score$`50`
## ALD ALV BEN BER CAM CHA CRO
## 0.54500000 0.57083333 0.65833333 0.75000000 0.48333333 0.70416667 0.65416667
## GEL GRA GRC HAI HAN HOC HUN
## 0.27500000 0.67727273 0.54500000 0.77083333 0.48750000 0.60000000 0.73333333
## INJ INW JAF KAC KAG KAN KAT
## 0.74090909 0.45000000 0.30000000 0.19166667 0.74166667 0.71818182 0.25000000
## KLP KUN LAM MAT OKI PAL QNC
## 0.36250000 0.48750000 0.61111111 0.52916667 0.73333333 0.70000000 0.76363636
## SON SSK SUF SUU TAI TIR UTS
## 0.36666667 0.02916667 0.35833333 0.58333333 0.62857143 0.38750000 0.69583333
## YUN
## 0.43333333
##
## $pop.score$`55`
## ALD ALV BEN BER CAM CHA
## 0.545000000 0.520833333 0.587500000 0.762500000 0.437500000 0.679166667
## CRO GEL GRA GRC HAI HAN
## 0.625000000 0.325000000 0.686363636 0.540000000 0.687500000 0.337500000
## HOC HUN INJ INW JAF KAC
## 0.528571429 0.733333333 0.663636364 0.375000000 0.200000000 0.350000000
## KAG KAN KAT KLP KUN LAM
## 0.725000000 0.663636364 0.158333333 0.387500000 0.387500000 0.477777778
## MAT OKI PAL QNC SON SSK
## 0.633333333 0.750000000 0.659090909 0.654545455 0.466666667 0.004166667
## SUF SUU TAI TIR UTS YUN
## 0.400000000 0.491666667 0.550000000 0.437500000 0.670833333 0.605555556
##
## $pop.score$`60`
## ALD ALV BEN BER CAM CHA CRO GEL
## 0.5900000 0.4791667 0.5708333 0.6583333 0.4208333 0.6083333 0.6166667 0.2250000
## GRA GRC HAI HAN HOC HUN INJ INW
## 0.6590909 0.6350000 0.6208333 0.3750000 0.5428571 0.6458333 0.6272727 0.3750000
## JAF KAC KAG KAN KAT KLP KUN LAM
## 0.2750000 0.3583333 0.6916667 0.6409091 0.0750000 0.3750000 0.4250000 0.5333333
## MAT OKI PAL QNC SON SSK SUF SUU
## 0.5666667 0.6625000 0.6000000 0.6045455 0.3000000 0.1416667 0.3000000 0.4666667
## TAI TIR UTS YUN
## 0.5571429 0.3625000 0.6541667 0.4666667
##
##
## $mean
## 1 5 10 15 20 25 30 35
## 0.3744895 0.5321423 0.6234869 0.6627691 0.6571355 0.6595656 0.6565738 0.6259836
## 40 45 50 55 60
## 0.5946897 0.5705289 0.5421440 0.5196531 0.4918561
##
## $pred
## $pred$x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51 52 53 54 55 56 57 58 59 60
##
## $pred$y
## [1] 0.3744895 0.4181755 0.4601581 0.4987195 0.5321423 0.5592002 0.5806327
## [8] 0.5976708 0.6115452 0.6234869 0.6344505 0.6442864 0.6525687 0.6588716
## [15] 0.6627691 0.6640422 0.6632999 0.6613575 0.6590309 0.6571355 0.6563023
## [22] 0.6564223 0.6572020 0.6583476 0.6595656 0.6605631 0.6610510 0.6607409
## [29] 0.6593446 0.6565738 0.6522519 0.6466496 0.6401492 0.6331331 0.6259836
## [36] 0.6190277 0.6123706 0.6060621 0.6001518 0.5946897 0.5896829 0.5849687
## [43] 0.5803418 0.5755970 0.5705289 0.5650042 0.5591772 0.5532741 0.5475209
## [50] 0.5421440 0.5372960 0.5328367 0.5285523 0.5242290 0.5196531 0.5146617
## [57] 0.5092951 0.5036445 0.4978011 0.4918561
##
##
## $best
## [1] 16
Run DAPC with object
Save it
saveRDS(
dapc_snp, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc_snp.rds"
)
)
To load it
dapc_snp <- readRDS(
here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc_snp.rds"
)
)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b", "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")
Plot using different discriminant functions
1 & 2
# 1 and 2
scatter(
dapc_snp,
bg = "white",
scree.da = TRUE,
cex = 1,
pch = 20,
cex.lab = 0.1,
col = myCol,
xax = 1,
yax = 2
)
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/albania_croatia_greece_US/dapc_euro_global_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
dev.off()
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/albania_croatia_greece_US/dapc_euro_global_region_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
myCol2 <- c("#a113b2", "#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#a113b2", "#146c45", "magenta", "magenta", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#a113b2", "#c41A1C", "#c41A1C")
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
dev.off()
myCol2 <- c("#a113b2", "#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#a113b2", "#146c45", "magenta", "magenta", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#a113b2", "#c41A1C", "#c41A1C")
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/albania_croatia_greece_US/dapc_euro_global_region_PC1_3.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
myCol2 <- c("#a113b2", "#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#a113b2", "#146c45", "magenta", "magenta", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#a113b2", "#c41A1C", "#c41A1C")
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=3)
dev.off()
myCol2 <- c("#a113b2", "#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#a113b2", "#146c45", "magenta", "magenta", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#a113b2", "#c41A1C", "#c41A1C")
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=3)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_albania_croatia_greece_US.rds"))
head(sampling_loc)
## Pop_City Location Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.92910 Americas BER 2018
## 2 Palm Beach USA 26.70560 -80.03640 Americas PAL 2018
## 3 Dubrovnik Croatia 42.60654 18.22661 Europe CRO 2017
## 4 Vlore Albania 40.46600 19.48970 Europe ALV 2020
## 5 Durres Albania 41.29704 19.50373 Europe ALD 2018
## 6 Tirana Albania 41.31473 19.83172 Europe TIR 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 3 NA 77
## 3 Southern Europe East Europe 31 23 23
## 4 Southern Europe East Europe 32 24 24
## 5 Southern Europe East Europe 33 25 25
## 6 Southern Europe East Europe 34 26 26
## [1] OKI OKI OKI OKI OKI OKI
## 36 Levels: ALD ALV BEN BER CAM CHA CRO GEL GRA GRC HAI HAN HOC HUN INJ ... YUN
Load the csv
countr <- read.csv(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "DAPC_alb_cro_gre_US.csv"
))
df <- as.data.frame(countr)
head(df)
## pop country
## 1 OKI Japan
## 2 OKI Japan
## 3 OKI Japan
## 4 OKI Japan
## 5 OKI Japan
## 6 OKI Japan
## [1] Japan Japan Japan Japan Japan Japan Japan
## [8] Japan Japan Japan Japan Japan China China
## [15] China China China China China China China
## [22] China China China China China China China
## [29] China China China China China Nepal Nepal
## [36] Nepal Nepal Taiwan Taiwan Taiwan Taiwan Taiwan
## [43] Taiwan Taiwan China China China China China
## [50] China China China China China China China
## [57] Vietnam Vietnam Vietnam Vietnam Malaysia Malaysia Malaysia
## [64] Malaysia Vietnam Vietnam Vietnam Vietnam Vietnam Vietnam
## [71] Vietnam Vietnam Vietnam Vietnam Vietnam Vietnam Vietnam
## [78] Vietnam Vietnam Vietnam Vietnam Vietnam Thailand Thailand
## [85] Thailand Thailand Thailand Thailand Thailand Thailand Thailand
## [92] Thailand Thailand Thailand Thailand Thailand Thailand Thailand
## [99] Thailand Thailand Thailand Thailand Thailand Thailand Japan
## [106] Japan Japan Japan Japan Japan Japan Japan
## [113] Japan Japan Japan Japan Japan Japan Japan
## [120] Japan Japan Japan Japan Japan Japan Japan
## [127] Japan Albania Albania Albania Albania Cambodia Cambodia
## [134] Cambodia Cambodia Cambodia Cambodia Japan Cambodia Cambodia
## [141] Cambodia Cambodia Cambodia Cambodia Japan Japan Greece
## [148] Japan Japan Japan Japan Japan Japan Japan
## [155] Japan Japan India India India India India
## [162] India India India India India India India
## [169] Thailand Thailand Thailand Thailand Thailand Thailand Thailand
## [176] Thailand Thailand USA USA USA USA USA
## [183] USA USA USA USA USA USA USA
## [190] USA USA USA USA USA USA USA
## [197] USA USA USA USA Bhutan Bhutan Nepal
## [204] Nepal Sri Lanka Sri Lanka Thailand Thailand Thailand Thailand
## [211] Thailand Thailand Thailand Thailand Thailand Thailand Thailand
## [218] Indonesia Indonesia Indonesia Indonesia Indonesia Indonesia Indonesia
## [225] Indonesia Indonesia Indonesia Indonesia Indonesia Maldives Maldives
## [232] Maldives Maldives Croatia Croatia Croatia Croatia Croatia
## [239] Croatia Croatia Croatia Croatia Croatia Croatia Croatia
## [246] Greece Greece Greece Greece Greece Greece Greece
## [253] Greece Greece Greece Greece Greece Greece Greece
## [260] Greece Greece Greece Greece Greece Greece Albania
## [267] Albania Albania Albania Albania Albania Albania Albania
## [274] Albania Albania Albania Albania Albania Albania Albania
## [281] Albania Albania Albania Albania Albania Albania Albania
## [288] Indonesia Indonesia Indonesia Indonesia Indonesia Indonesia Indonesia
## [295] Indonesia Indonesia Indonesia Indonesia Indonesia Indonesia Indonesia
## [302] Indonesia Malaysia Malaysia Malaysia Malaysia Malaysia Malaysia
## [309] Malaysia Malaysia Malaysia Malaysia Malaysia Malaysia
## 17 Levels: Albania Bhutan Cambodia China Croatia Greece India ... Vietnam
Save the genind object
saveRDS(snp2, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "snp_country.rds"
))
Load the genind object
snp_country <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "snp_country.rds"
))
Scale
## Warning in .local(x, ...): Some scaling values are null.
## Corresponding alleles are removed.
## [1] "matrix" "array"
## [1] 314 45278
## AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1001 0.9454195 -0.9454195 0.8715638 -0.8715638
## 1002 0.0000000 0.0000000 -1.6005080 1.6005080
## 1003 -0.3445529 0.3445529 -1.6005080 1.6005080
## 1004 -0.3445529 0.3445529 -1.6005080 1.6005080
## 1005 -0.3445529 0.3445529 -0.3644721 0.3644721
## AX-583036983_C.T
## 1001 1.30258591
## 1002 0.02858339
## 1003 1.30258591
## 1004 0.02858339
## 1005 1.30258591
grp <- find.clusters(snp_country, max.n.clust=10)
#retained 300
#Choose the number of clusters (>=2): 4
Save it
To load it
##
## 1 2 3 4
## Albania 0 26 0 0
## Bhutan 0 0 2 0
## Cambodia 0 0 12 0
## China 0 22 11 0
## Croatia 0 12 0 0
## Greece 0 21 0 0
## India 0 0 12 0
## Indonesia 0 0 0 27
## Japan 35 12 0 0
## Malaysia 0 0 16 0
## Maldives 0 0 4 0
## Nepal 0 0 4 2
## Sri Lanka 0 0 2 0
## Taiwan 0 7 0 0
## Thailand 0 0 42 0
## USA 23 0 0 0
## Vietnam 0 7 15 0
Save the genind object
saveRDS(dapc_country_1, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc_country_1.rds"
))
Load the genind object
dapc_country_1 <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc_country_1.rds"
))
Calculating the optimum PC number to rerun DAPC
optim.a.score(dapc_country_1, n.pca=1:ncol(dapc_country_1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs
## $pop.score
## $pop.score$`1`
## 1 2 3 4
## 0.0000000 0.5879167 0.0000000 0.2729839
##
## $pop.score$`50`
## 1 2 3 4
## 0.7073171 0.4016667 0.6724138 0.3806452
##
## $pop.score$`100`
## 1 2 3 4
## 0.4426829 0.3070833 0.4413793 0.2987903
##
## $pop.score$`150`
## 1 2 3 4
## 0.2292683 0.2066667 0.2224138 0.2016129
##
## $pop.score$`200`
## 1 2 3 4
## 0.1621951 0.1412500 0.1534483 0.1149194
##
## $pop.score$`250`
## 1 2 3 4
## 0.09024390 0.07166667 0.07931034 0.06532258
##
## $pop.score$`300`
## 1 2 3 4
## 0.02073171 0.01250000 0.01206897 0.01129032
##
##
## $mean
## 1 50 100 150 200 250 300
## 0.21522513 0.54051067 0.37248397 0.21499041 0.14295319 0.07663587 0.01414775
##
## $pred
## $pred$x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## [217] 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## [235] 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## [253] 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## [271] 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## [289] 289 290 291 292 293 294 295 296 297 298 299 300
##
## $pred$y
## [1] 0.21522513 0.22449083 0.23375055 0.24299769 0.25222565 0.26142782
## [7] 0.27059759 0.27972837 0.28881353 0.29784648 0.30682062 0.31572932
## [13] 0.32456599 0.33332403 0.34199682 0.35057776 0.35906024 0.36743766
## [19] 0.37570341 0.38385088 0.39187348 0.39976458 0.40751760 0.41512591
## [25] 0.42258291 0.42988201 0.43701659 0.44398004 0.45076576 0.45736714
## [31] 0.46377758 0.46999048 0.47599921 0.48179719 0.48737780 0.49273443
## [37] 0.49786049 0.50274936 0.50739444 0.51178912 0.51592679 0.51980086
## [43] 0.52340471 0.52673173 0.52977533 0.53252889 0.53498582 0.53713949
## [49] 0.53898331 0.54051067 0.54171741 0.54260912 0.54319384 0.54347961
## [55] 0.54347446 0.54318644 0.54262358 0.54179393 0.54070551 0.53936637
## [61] 0.53778454 0.53596806 0.53392498 0.53166332 0.52919113 0.52651645
## [67] 0.52364731 0.52059175 0.51735781 0.51395352 0.51038693 0.50666607
## [73] 0.50279899 0.49879371 0.49465828 0.49040073 0.48602911 0.48155145
## [79] 0.47697579 0.47231016 0.46756261 0.46274117 0.45785389 0.45290879
## [85] 0.44791392 0.44287731 0.43780701 0.43271105 0.42759747 0.42247430
## [91] 0.41734959 0.41223138 0.40712769 0.40204658 0.39699607 0.39198421
## [97] 0.38701903 0.38210857 0.37726087 0.37248397 0.36778442 0.36316278
## [103] 0.35861814 0.35414959 0.34975621 0.34543708 0.34119128 0.33701791
## [109] 0.33291603 0.32888474 0.32492313 0.32103026 0.31720523 0.31344712
## [115] 0.30975501 0.30612799 0.30256513 0.29906553 0.29562826 0.29225242
## [121] 0.28893707 0.28568132 0.28248423 0.27934489 0.27626239 0.27323581
## [127] 0.27026423 0.26734674 0.26448242 0.26167035 0.25890962 0.25619930
## [133] 0.25353849 0.25092627 0.24836171 0.24584391 0.24337194 0.24094490
## [139] 0.23856185 0.23622190 0.23392411 0.23166758 0.22945138 0.22727460
## [145] 0.22513632 0.22303564 0.22097162 0.21894335 0.21694992 0.21499041
## [151] 0.21306393 0.21116968 0.20930688 0.20747475 0.20567253 0.20389944
## [157] 0.20215470 0.20043754 0.19874718 0.19708284 0.19544376 0.19382916
## [163] 0.19223825 0.19067028 0.18912445 0.18760000 0.18609616 0.18461213
## [169] 0.18314716 0.18170047 0.18027127 0.17885880 0.17746229 0.17608094
## [175] 0.17471400 0.17336068 0.17202021 0.17069182 0.16937472 0.16806815
## [181] 0.16677133 0.16548349 0.16420384 0.16293162 0.16166604 0.16040634
## [187] 0.15915174 0.15790146 0.15665473 0.15541077 0.15416881 0.15292808
## [193] 0.15168779 0.15044717 0.14920545 0.14796185 0.14671560 0.14546593
## [199] 0.14421205 0.14295319 0.14168874 0.14041872 0.13914333 0.13786276
## [205] 0.13657720 0.13528682 0.13399184 0.13269243 0.13138879 0.13008110
## [211] 0.12876955 0.12745434 0.12613566 0.12481369 0.12348862 0.12216065
## [217] 0.12082996 0.11949674 0.11816119 0.11682350 0.11548384 0.11414242
## [223] 0.11279942 0.11145503 0.11010945 0.10876286 0.10741545 0.10606741
## [229] 0.10471893 0.10337021 0.10202142 0.10067277 0.09932444 0.09797662
## [235] 0.09662950 0.09528327 0.09393813 0.09259425 0.09125183 0.08991106
## [241] 0.08857213 0.08723523 0.08590055 0.08456828 0.08323861 0.08191172
## [247] 0.08058782 0.07926708 0.07794970 0.07663587 0.07532574 0.07401926
## [253] 0.07271638 0.07141701 0.07012107 0.06882850 0.06753922 0.06625315
## [259] 0.06497022 0.06369035 0.06241347 0.06113951 0.05986838 0.05860002
## [265] 0.05733435 0.05607129 0.05481077 0.05355271 0.05229705 0.05104370
## [271] 0.04979258 0.04854364 0.04729678 0.04605193 0.04480903 0.04356799
## [277] 0.04232874 0.04109121 0.03985532 0.03862099 0.03738815 0.03615673
## [283] 0.03492665 0.03369783 0.03247020 0.03124369 0.03001823 0.02879372
## [289] 0.02757011 0.02634731 0.02512526 0.02390387 0.02268307 0.02146279
## [295] 0.02024295 0.01902348 0.01780429 0.01658533 0.01536651 0.01414775
##
##
## $best
## [1] 54
Run DAPC with object
Save it
saveRDS(
dapc_snp_country, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "albania_croatia_greece_US", "dapc_snp_country.rds"
)
)
To load it
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/albania_croatia_greece_US/dapc_snp_country_albania_croatia_greece_US_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
myCol2 <- c("#a113b2", "#146c45", "#2524f9", "#c41A1C", "#a113b2", "#a113b2", "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#146c45", "#c41A1C", "#2524f9", "#66C2A5", "#2524f9")
op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
good.shapes = c(1:25,35:38,1:25)
myCol2 <- c("#a113b2", "#146c45", "#2524f9", "#c41A1C", "#a113b2", "#a113b2", "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#146c45", "#c41A1C", "#2524f9", "#66C2A5", "#2524f9")
op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
Plot with new colors - by region
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/albania_croatia_greece_US/dapc_snp_country_albania_croatia_greece_US_PC1_3.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
myCol2 <- c("#a113b2", "#146c45", "#2524f9", "#c41A1C", "#a113b2", "#a113b2", "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#146c45", "#c41A1C", "#2524f9", "#66C2A5", "#2524f9")
op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
dev.off()
good.shapes = c(1:25,35:38,1:25)
myCol2 <- c("#a113b2", "#146c45", "#2524f9", "#c41A1C", "#a113b2", "#a113b2", "#146c45", "#2524f9", "#c41A1C", "#2524f9", "#146c45", "#146c45", "#146c45", "#c41A1C", "#2524f9", "#66C2A5", "#2524f9")
op <- par(cex = 0.39)
scatter(dapc_snp_country, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
Using r2<0.01 LD pruning and MAF 1% (scaled and plotting additional PCs)
Create files
cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--keep-fam output/neuroadmixture/native_far_east_euro.txt \
--bfile output/snps_sets/r2_0.01_b \
--make-bed \
--out output/dapc/MAF_1/dapc_native_far_east_euro \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_native_far_east_euro.log
22642 variants loaded from .bim file. –keep-fam: 338 people remaining. Total genotyping rate in remaining samples is 0.970255. 22642 variants and 338 people pass filters and QC.
Convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/MAF_1/dapc_native_far_east_euro \
--recodeA \
--out output/dapc/MAF_1/dapc_native_far_east_euro \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_native_far_east_euro.log
22642 variants loaded from .bim file. 22642 variants and 338 people pass filters and QC.
Clean env & memory
# Remove all objects from the environment
rm(list = ls())
# Run the garbage collector to free up memory
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 5970497 318.9 16401712 876.0 24799007 1324.5
## Vcells 10172350 77.7 434414951 3314.4 848466578 6473.3
Import the data and covert it to genind format
# import the data
snp <-
read.PLINK(
here("euro_global/output/dapc/MAF_1/dapc_native_far_east_euro.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
##
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## [1] 338
## [1] 22642
## [1] 37
## [1] "1001" "1002" "1003" "1004" "1005" "1006" "1007" "1008" "1009" "1010"
## [11] "1011" "1012" "1053" "1054" "1055" "1056" "1057" "1058" "1059" "1060"
## [21] "1061" "1062" "1063" "1064" "1065" "1066" "1067" "1068" "1069" "1070"
## [31] "1071" "1072" "1073" "1074" "1075" "1076" "1077" "1078" "1079" "1080"
## [41] "1081" "1082" "1083" "1084" "1085" "1086" "1087" "1088" "1089" "1090"
## [51] "1091" "1092" "1093" "1094" "1095" "1101" "1102" "1103" "1105" "1106"
## [61] "1107" "1109" "1110" "1111" "1112" "1113" "1114" "1115" "1116" "1117"
## [71] "1118" "1119" "1120" "1121" "1122" "1123" "1124" "1125" "1126" "1127"
## [81] "1128" "1129" "1130" "1131" "1132" "1133" "1134" "1135" "1136" "1137"
## [91] "1138" "1139" "1140" "1141" "1142" "1143" "1144" "1145" "1146" "1147"
## [101] "1148" "1149" "1150" "1151" "1152" "1153" "1154" "1155" "1156" "1157"
## [111] "1158" "1159" "1160" "1161" "1162" "1163" "1165" "1166" "1167" "1168"
## [121] "1169" "1170" "1171" "1172" "1173" "1174" "1175" "1176" "1177" "1178"
## [131] "1179" "1180" "1181" "1182" "1183" "1184" "1202" "1203" "1204" "1205"
## [141] "1206" "1207" "1208" "1209" "1210" "1211" "1212" "1213" "1214" "1215"
## [151] "1216" "1217" "1238" "1239" "1240" "1241" "1242" "1243" "1244" "1245"
## [161] "1246" "1247" "1249" "1250" "1251" "1252" "1253" "1254" "1255" "1256"
## [171] "1257" "1258" "1259" "1260" "1261" "1262" "1263" "1264" "1265" "1266"
## [181] "1267" "1268" "1270" "1271" "1272" "1273" "1274" "1276" "1282" "1283"
## [191] "1285" "1286" "1325" "1326" "1328" "1329" "1330" "1331" "1332" "1333"
## [201] "1334" "1335" "1336" "1373" "1374" "1375" "1376" "1377" "1378" "1379"
## [211] "1380" "1381" "1382" "1383" "1384" "159" "162" "167" "169" "170"
## [221] "171" "172" "173" "2174" "2175" "2176" "2177" "2178" "2179" "217"
## [231] "2180" "2181" "2182" "2183" "2184" "2185" "218" "219" "220" "221"
## [241] "222" "223" "224" "225" "226" "227" "230" "255" "256" "257"
## [251] "258" "261" "262" "263" "264" "265" "266" "267" "268" "269"
## [261] "270" "271" "272" "273" "275" "276" "277" "278" "602" "603"
## [271] "604" "607" "609" "610" "623" "624" "625" "626" "627" "628"
## [281] "629" "630" "631" "632" "633" "666" "669" "670" "671" "672"
## [291] "673" "674" "675" "676" "677" "678" "679" "680" "681" "682"
## [301] "683" "901" "902" "903" "904" "905" "906" "907" "908" "909"
## [311] "910" "964" "965" "966" "967" "972" "973" "975" "976" "977"
## [321] "978" "979" "980" "981" "982" "983" "984" "985" "986" "987"
## [331] "988" "989" "990" "991" "992" "993" "994" "995"
## Starting gl2gi
## Processing genlight object with SNP data
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi
Save it
To load it
## Warning in .local(x, ...): Some scaling values are null.
## Corresponding alleles are removed.
## [1] "matrix" "array"
Save it
To load it
## [1] 338 45284
## AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1001 1.0174555 -1.0174555 0.8124603 -0.8124603
## 1002 0.0000000 0.0000000 -1.8770635 1.8770635
## 1003 -0.2486263 0.2486263 -1.8770635 1.8770635
## 1004 -0.2486263 0.2486263 -1.8770635 1.8770635
## 1005 -0.2486263 0.2486263 -0.5323016 0.5323016
## AX-583036983_C.T
## 1001 1.4460470
## 1002 0.1495911
## 1003 1.4460470
## 1004 0.1495911
## 1005 1.4460470
Save it
To load it
Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.
xvalDapc(snp3, populations, n.pca.max = 200, n.da = NULL,
training.set = 0.9, result = c("groupMean", "overall"),
center = TRUE, scale = FALSE,
n.pca=NULL, n.rep = 40, xval.plot = TRUE)
$n.pca: 60 first PCs of PCA used $n.da: 36 discriminant functions saved $var (proportion of conserved variance): 0.426
Run dapc using these #s from CV
Save the genind object
saveRDS(dapc1, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "far_east_euro", "dapc1.rds"
))
Load the genind object
dapc1 <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "far_east_euro", "dapc1.rds"
))
Calculating the optimum PC number to rerun DAPC
optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs
## $pop.score
## $pop.score$`1`
## ALU ARM BEN CAM CHA GEL
## 0.54166667 -0.03000000 0.92083333 0.35833333 0.18750000 0.00000000
## GES HAI HAN HOC HUN INJ
## 0.57083333 0.47916667 0.75000000 0.14285714 0.56666667 0.90909091
## INW JAF KAC KAG KAN KAT
## 1.00000000 0.00000000 0.00000000 0.66250000 0.00000000 0.00000000
## KER KLP KRA KUN LAM MAT
## 0.05833333 0.00000000 0.33750000 0.00000000 0.00000000 0.47916667
## OKI QNC RAR SEV SOC SON
## 0.58333333 -0.03636364 0.21666667 0.04166667 0.04166667 0.00000000
## SSK SUF SUU TAI TIK UTS
## 0.31666667 0.00000000 0.83333333 0.00000000 0.07916667 0.43333333
## YUN
## 0.00000000
##
## $pop.score$`5`
## ALU ARM BEN CAM CHA GEL
## 0.91666667 0.92500000 0.89166667 0.56666667 0.41666667 -0.10000000
## GES HAI HAN HOC HUN INJ
## 0.40416667 0.37083333 0.97500000 0.37142857 0.83750000 0.88636364
## INW JAF KAC KAG KAN KAT
## 0.95000000 -0.07500000 -0.02500000 0.90416667 0.58636364 -0.06666667
## KER KLP KRA KUN LAM MAT
## 0.36250000 -0.06250000 0.07083333 0.93750000 -0.03333333 0.62500000
## OKI QNC RAR SEV SOC SON
## 0.92083333 0.90000000 0.03750000 0.68750000 -0.01666667 -0.10000000
## SSK SUF SUU TAI TIK UTS
## 0.70833333 0.29166667 0.97500000 0.92142857 0.25416667 0.86666667
## YUN
## -0.05000000
##
## $pop.score$`10`
## ALU ARM BEN CAM CHA GEL
## 0.75000000 0.85500000 0.88333333 0.65833333 0.65416667 -0.27500000
## GES HAI HAN HOC HUN INJ
## 0.53333333 0.45416667 0.80000000 0.64285714 0.94166667 0.81363636
## INW JAF KAC KAG KAN KAT
## 0.82500000 -0.22500000 -0.08333333 0.87916667 0.61818182 -0.13333333
## KER KLP KRA KUN LAM MAT
## 0.41666667 0.15000000 -0.03750000 0.81250000 -0.10000000 0.76250000
## OKI QNC RAR SEV SOC SON
## 0.86250000 0.90000000 0.30416667 0.71666667 0.38750000 -0.20000000
## SSK SUF SUU TAI TIK UTS
## 0.53333333 0.21666667 0.94166667 0.90000000 0.70833333 0.91666667
## YUN
## -0.05000000
##
## $pop.score$`15`
## ALU ARM BEN CAM CHA GEL GES
## 0.8125000 0.9050000 0.8375000 0.6333333 0.6458333 0.5750000 0.5375000
## HAI HAN HOC HUN INJ INW JAF
## 0.4500000 0.7750000 0.6000000 0.8916667 0.9090909 0.7875000 -0.4000000
## KAC KAG KAN KAT KER KLP KRA
## -0.2083333 0.9000000 0.9136364 -0.1416667 0.7125000 0.7500000 0.2958333
## KUN LAM MAT OKI QNC RAR SEV
## 0.7625000 0.3388889 0.7166667 0.8416667 0.8727273 0.2708333 0.8666667
## SOC SON SSK SUF SUU TAI TIK
## 0.5083333 -0.3000000 0.4250000 0.1583333 0.8583333 0.8785714 0.5291667
## UTS YUN
## 0.8333333 0.1000000
##
## $pop.score$`20`
## ALU ARM BEN CAM CHA GEL
## 0.73333333 0.83500000 0.84166667 0.55833333 0.64583333 0.52500000
## GES HAI HAN HOC HUN INJ
## 0.43333333 0.50416667 0.68750000 0.70714286 0.85416667 0.81363636
## INW JAF KAC KAG KAN KAT
## 0.66250000 -0.45000000 -0.09166667 0.87500000 0.88181818 0.43333333
## KER KLP KRA KUN LAM MAT
## 0.69583333 0.68750000 0.22500000 0.75000000 0.40000000 0.67083333
## OKI QNC RAR SEV SOC SON
## 0.86666667 0.82727273 0.69166667 0.88333333 0.44166667 -0.05000000
## SSK SUF SUU TAI TIK UTS
## 0.33750000 0.61666667 0.72500000 0.77142857 0.70416667 0.82916667
## YUN
## 0.15000000
##
## $pop.score$`25`
## ALU ARM BEN CAM CHA GEL
## 0.87083333 0.77500000 0.81666667 0.56666667 0.45416667 0.35000000
## GES HAI HAN HOC HUN INJ
## 0.37500000 0.59583333 0.61250000 0.74285714 0.80416667 0.85909091
## INW JAF KAC KAG KAN KAT
## 0.55000000 -0.57500000 0.07500000 0.76666667 0.82727273 0.40833333
## KER KLP KRA KUN LAM MAT
## 0.76250000 0.55000000 0.27916667 0.51250000 0.57777778 0.67083333
## OKI QNC RAR SEV SOC SON
## 0.80000000 0.87727273 0.74166667 0.84583333 0.53333333 -0.06666667
## SSK SUF SUU TAI TIK UTS
## 0.40416667 0.55833333 0.76666667 0.80714286 0.72500000 0.79583333
## YUN
## 0.11111111
##
## $pop.score$`30`
## ALU ARM BEN CAM CHA GEL
## 0.82500000 0.81500000 0.78750000 0.56666667 0.56250000 0.40000000
## GES HAI HAN HOC HUN INJ
## 0.52916667 0.66666667 0.55000000 0.75000000 0.77916667 0.80454545
## INW JAF KAC KAG KAN KAT
## 0.55000000 -0.12500000 0.12500000 0.76666667 0.78181818 0.28333333
## KER KLP KRA KUN LAM MAT
## 0.69166667 0.22500000 0.40000000 0.62500000 0.59444444 0.70833333
## OKI QNC RAR SEV SOC SON
## 0.74166667 0.82272727 0.72916667 0.82916667 0.71250000 0.15000000
## SSK SUF SUU TAI TIK UTS
## 0.41666667 0.50000000 0.69166667 0.66428571 0.79583333 0.80000000
## YUN
## 0.08333333
##
## $pop.score$`35`
## ALU ARM BEN CAM CHA GEL GES HAI
## 0.8208333 0.6900000 0.6958333 0.4875000 0.4666667 0.3250000 0.5833333 0.6625000
## HAN HOC HUN INJ INW JAF KAC KAG
## 0.5500000 0.6857143 0.7708333 0.7863636 0.5375000 0.3500000 0.1000000 0.7750000
## KAN KAT KER KLP KRA KUN LAM MAT
## 0.6863636 0.2416667 0.7500000 0.5625000 0.3958333 0.5875000 0.6833333 0.6041667
## OKI QNC RAR SEV SOC SON SSK SUF
## 0.7791667 0.7318182 0.6916667 0.7583333 0.6708333 0.1666667 0.2333333 0.4750000
## SUU TAI TIK UTS YUN
## 0.6166667 0.6714286 0.7375000 0.7416667 0.1944444
##
## $pop.score$`40`
## ALU ARM BEN CAM CHA GEL GES HAI
## 0.7500000 0.7150000 0.6583333 0.4958333 0.6625000 0.3750000 0.6666667 0.6500000
## HAN HOC HUN INJ INW JAF KAC KAG
## 0.4000000 0.6071429 0.7000000 0.7909091 0.4000000 0.4750000 0.0250000 0.7291667
## KAN KAT KER KLP KRA KUN LAM MAT
## 0.6863636 0.1833333 0.7125000 0.5125000 0.4458333 0.4750000 0.5888889 0.5583333
## OKI QNC RAR SEV SOC SON SSK SUF
## 0.7125000 0.7227273 0.7291667 0.7625000 0.6083333 0.1000000 0.2416667 0.3916667
## SUU TAI TIK UTS YUN
## 0.6333333 0.5928571 0.7416667 0.6916667 0.4333333
##
## $pop.score$`45`
## ALU ARM BEN CAM CHA GEL
## 0.63750000 0.65500000 0.49166667 0.43750000 0.62500000 0.27500000
## GES HAI HAN HOC HUN INJ
## 0.52916667 0.61666667 0.48750000 0.57142857 0.67916667 0.68181818
## INW JAF KAC KAG KAN KAT
## 0.43750000 0.32500000 0.15000000 0.74583333 0.65909091 0.25833333
## KER KLP KRA KUN LAM MAT
## 0.61250000 0.46250000 0.46250000 0.38750000 0.60555556 0.51666667
## OKI QNC RAR SEV SOC SON
## 0.68750000 0.67272727 0.65000000 0.70833333 0.73750000 -0.01666667
## SSK SUF SUU TAI TIK UTS
## -0.05416667 0.37500000 0.55833333 0.60714286 0.76250000 0.68750000
## YUN
## 0.41111111
##
## $pop.score$`50`
## ALU ARM BEN CAM CHA
## 6.750000e-01 6.250000e-01 5.375000e-01 4.458333e-01 5.875000e-01
## GEL GES HAI HAN HOC
## 3.750000e-01 4.958333e-01 6.250000e-01 3.875000e-01 5.785714e-01
## HUN INJ INW JAF KAC
## 6.333333e-01 6.727273e-01 4.000000e-01 3.000000e-01 3.500000e-01
## KAG KAN KAT KER KLP
## 6.875000e-01 6.454545e-01 1.500000e-01 6.666667e-01 3.750000e-01
## KRA KUN LAM MAT OKI
## 4.083333e-01 4.875000e-01 5.111111e-01 5.708333e-01 6.833333e-01
## QNC RAR SEV SOC SON
## 5.863636e-01 7.416667e-01 6.833333e-01 6.375000e-01 3.333333e-01
## SSK SUF SUU TAI TIK
## -1.387779e-17 4.250000e-01 4.000000e-01 5.000000e-01 6.833333e-01
## UTS YUN
## 6.625000e-01 3.722222e-01
##
## $pop.score$`55`
## ALU ARM BEN CAM CHA GEL
## 0.64583333 0.60000000 0.54166667 0.37500000 0.36666667 0.27500000
## GES HAI HAN HOC HUN INJ
## 0.47083333 0.56666667 0.33750000 0.47857143 0.65833333 0.68181818
## INW JAF KAC KAG KAN KAT
## 0.37500000 0.20000000 -0.05833333 0.60833333 0.64090909 0.06666667
## KER KLP KRA KUN LAM MAT
## 0.62916667 0.33750000 0.51666667 0.41250000 0.42222222 0.54583333
## OKI QNC RAR SEV SOC SON
## 0.64166667 0.63181818 0.60416667 0.70000000 0.66666667 0.30000000
## SSK SUF SUU TAI TIK UTS
## -0.07500000 0.34166667 0.45833333 0.52142857 0.57916667 0.65833333
## YUN
## 0.30555556
##
## $pop.score$`60`
## ALU ARM BEN CAM CHA GEL
## 0.62500000 0.55000000 0.65416667 0.38750000 0.52916667 0.22500000
## GES HAI HAN HOC HUN INJ
## 0.43333333 0.65833333 0.35000000 0.50714286 0.63750000 0.57272727
## INW JAF KAC KAG KAN KAT
## 0.40000000 0.25000000 -0.06666667 0.65416667 0.55000000 0.09166667
## KER KLP KRA KUN LAM MAT
## 0.61250000 0.37500000 0.46666667 0.38750000 0.41111111 0.52083333
## OKI QNC RAR SEV SOC SON
## 0.66250000 0.51363636 0.70000000 0.60000000 0.57916667 0.28333333
## SSK SUF SUU TAI TIK UTS
## -0.02916667 0.25833333 0.40833333 0.46428571 0.56666667 0.60000000
## YUN
## 0.44444444
##
##
## $mean
## 1 5 10 15 20 25 30 35
## 0.2822680 0.4873581 0.5063201 0.5633220 0.5857783 0.5710142 0.5837699 0.5747829
## 40 45 50 55 60
## 0.5574249 0.5161408 0.5108050 0.4602204 0.4549779
##
## $pred
## $pred$x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
## [51] 51 52 53 54 55 56 57 58 59 60
##
## $pred$y
## [1] 0.3255465 0.3531379 0.3801034 0.4058130 0.4296369 0.4510852 0.4702284
## [8] 0.4872768 0.5024409 0.5159312 0.5279346 0.5385445 0.5478306 0.5558629
## [15] 0.5627113 0.5684469 0.5731475 0.5768919 0.5797591 0.5818282 0.5831878
## [22] 0.5839646 0.5842953 0.5843164 0.5841643 0.5839437 0.5836316 0.5831735
## [29] 0.5825146 0.5816002 0.5803809 0.5788284 0.5769197 0.5746318 0.5719418
## [36] 0.5688334 0.5653180 0.5614137 0.5571385 0.5525106 0.5475600 0.5423646
## [43] 0.5370139 0.5315979 0.5262061 0.5209039 0.5156588 0.5104140 0.5051126
## [50] 0.4996978 0.4941396 0.4885161 0.4829323 0.4774930 0.4723033 0.4674387
## [57] 0.4628576 0.4584889 0.4542615 0.4501044
##
##
## $best
## [1] 24
Run DAPC with object
Save it
saveRDS(
dapc_snp, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "far_east_euro", "dapc_snp.rds"
)
)
To load it
dapc_snp <- readRDS(
here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "far_east_euro", "dapc_snp.rds"
)
)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b", "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")
PCs 1 & 2
# 1 and 2
scatter(
dapc_snp,
bg = "white",
scree.da = TRUE,
cex = 1,
pch = 20,
cex.lab = 0.1,
col = myCol,
xax = 1,
yax = 2
)
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/far_east_euro/dapc_far_east_euro_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
dev.off()
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/far_east_euro/dapc_far_east_euro_region_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
myCol2 <- c("goldenrod", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#146c45", "goldenrod", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "goldenrod", "#2524f9", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "goldenrod", "goldenrod", "goldenrod", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "#c41A1C", "#c41A1C")
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
dev.off()
myCol2 <- c("goldenrod", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#146c45", "goldenrod", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "goldenrod", "#2524f9", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "goldenrod", "goldenrod", "goldenrod", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "#c41A1C", "#c41A1C")
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=2)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/far_east_euro/dapc_far_east_euro_region_PC1_3.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
myCol2 <- c("goldenrod", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#146c45", "goldenrod", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "goldenrod", "#2524f9", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "goldenrod", "goldenrod", "goldenrod", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "#c41A1C", "#c41A1C")
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=3)
dev.off()
myCol2 <- c("goldenrod", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#146c45", "goldenrod", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "goldenrod", "#2524f9", "goldenrod", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "goldenrod", "goldenrod", "goldenrod", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "#c41A1C", "#c41A1C")
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0,cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", cex.leg=1.0, xax =1, yax=3)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "cex.leg" is not a
## graphical parameter
Using r2<0.01 LD pruning and MAF 1% (scaled and plotting additional PCs)
Create files
cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--keep-fam output/neuroadmixture/native_sicily_and_Americas.txt \
--bfile output/snps_sets/r2_0.01_b \
--make-bed \
--out output/dapc/MAF_1/dapc_sicily_and_Americas \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_sicily_and_Americas.log
22642 variants loaded from .bim file. –keep-fam: 287 people remaining. Total genotyping rate in remaining samples is 0.967131. 22642 variants and 287 people pass filters and QC.
Convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/MAF_1/dapc_sicily_and_Americas \
--recodeA \
--out output/dapc/MAF_1/dapc_sicily_and_Americas \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_sicily_and_Americas.log
22642 variants loaded from .bim file. 22642 variants and 287 people pass filters and QC.
Clean env & memory
# Remove all objects from the environment
rm(list = ls())
# Run the garbage collector to free up memory
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 5970864 318.9 16401712 876.0 24799007 1324.5
## Vcells 10179893 77.7 347531961 2651.5 848466578 6473.3
Import the data and covert it to genind format
# import the data
snp <-
read.PLINK(
here("euro_global/output/dapc/MAF_1/dapc_sicily_and_Americas.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
##
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## [1] 287
## [1] 22642
## [1] 33
## [1] "1001" "1002" "1003" "1004" "1005" "1006" "1007" "1008" "1009" "1010"
## [11] "1011" "1012" "1053" "1054" "1055" "1056" "1057" "1058" "1059" "1060"
## [21] "1061" "1062" "1063" "1064" "1089" "1090" "1091" "1092" "1093" "1094"
## [31] "1095" "1101" "1102" "1103" "1105" "1106" "1107" "1161" "1162" "1163"
## [41] "1165" "1166" "1167" "1168" "1169" "1170" "1171" "1172" "1173" "1174"
## [51] "1175" "1176" "1177" "1178" "1179" "1180" "1181" "1182" "1183" "1184"
## [61] "1214" "1215" "1216" "1217" "1226" "1227" "1228" "1229" "1230" "1232"
## [71] "1233" "1234" "1237" "1238" "1239" "1240" "1241" "1242" "1243" "1244"
## [81] "1245" "1246" "1247" "1249" "1250" "1251" "1252" "1253" "1254" "1255"
## [91] "1256" "1257" "1258" "1259" "1260" "1261" "1262" "1263" "1264" "1265"
## [101] "1266" "1267" "1268" "1270" "1271" "1272" "1273" "1274" "1276" "1282"
## [111] "1283" "1285" "1286" "1325" "1326" "1328" "1329" "1330" "1331" "1332"
## [121] "1333" "1334" "1335" "1336" "1373" "1374" "1375" "1376" "1377" "1378"
## [131] "1379" "1380" "1381" "1382" "1383" "1384" "175" "176" "177" "178"
## [141] "180" "181" "182" "183" "184" "185" "186" "2174" "2175" "2176"
## [151] "2177" "2178" "2179" "217" "2180" "2181" "2182" "2183" "2184" "2185"
## [161] "218" "219" "220" "221" "222" "223" "224" "225" "226" "227"
## [171] "230" "255" "256" "257" "258" "261" "262" "263" "264" "265"
## [181] "266" "267" "268" "269" "270" "271" "272" "273" "275" "276"
## [191] "277" "278" "294" "295" "296" "297" "298" "299" "301" "302"
## [201] "303" "304" "305" "435" "436" "437" "438" "439" "440" "441"
## [211] "442" "443" "444" "445" "446" "602" "603" "604" "607" "609"
## [221] "610" "623" "624" "625" "626" "627" "628" "629" "630" "631"
## [231] "632" "633" "666" "669" "670" "671" "672" "673" "674" "675"
## [241] "676" "677" "678" "679" "680" "681" "682" "683" "684" "685"
## [251] "686" "687" "688" "689" "690" "691" "692" "693" "694" "695"
## [261] "964" "965" "966" "967" "972" "973" "975" "976" "977" "978"
## [271] "979" "980" "981" "982" "983" "984" "985" "986" "987" "988"
## [281] "989" "990" "991" "992" "993" "994" "995"
## Starting gl2gi
## Processing genlight object with SNP data
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi
Save it
To load it
## Warning in .local(x, ...): Some scaling values are null.
## Corresponding alleles are removed.
## [1] "matrix" "array"
Save it
To load it
## [1] 287 45274
## AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1001 0.9547918 -0.9547918 0.8231945 -0.8231945
## 1002 0.0000000 0.0000000 -1.5830663 1.5830663
## 1003 -0.3289235 0.3289235 -1.5830663 1.5830663
## 1004 -0.3289235 0.3289235 -1.5830663 1.5830663
## 1005 -0.3289235 0.3289235 -0.3799359 0.3799359
## AX-583036983_C.T
## 1001 1.29864538
## 1002 0.04387315
## 1003 1.29864538
## 1004 0.04387315
## 1005 1.29864538
Save it
To load it
Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.
xvalDapc(snp3, populations, n.pca.max = 200, n.da = NULL,
training.set = 0.9, result = c("groupMean", "overall"),
center = TRUE, scale = FALSE,
n.pca=NULL, n.rep = 40, xval.plot = TRUE)
$n.pca: 40 first PCs of PCA used $n.da: 32 discriminant functions saved $var (proportion of conserved variance): 0.354
Run dapc using these #s from CV
Save the genind object
saveRDS(dapc1, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "sicily_americas", "dapc1.rds"
))
Load the genind object
dapc1 <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "sicily_americas", "dapc1.rds"
))
Calculating the optimum PC number to rerun DAPC
optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs
## $pop.score
## $pop.score$`1`
## BEN BER CAM CHA GEL GRV
## 0.95000000 0.80833333 0.39583333 0.10000000 0.00000000 0.44166667
## HAI HAN HOC HUN INJ INW
## 0.22083333 0.75000000 0.14285714 0.54583333 0.89090909 1.00000000
## JAF KAC KAG KAN KAT KLP
## 0.00000000 0.00000000 0.75416667 0.53636364 0.00000000 0.00000000
## KUN LAM MAT OKI PAL QNC
## 0.00000000 0.00000000 0.42083333 0.65833333 0.52272727 -0.08181818
## REC SIC SON SSK SUF SUU
## 0.90454545 0.77777778 0.00000000 0.55833333 0.00000000 0.83333333
## TAI UTS YUN
## 0.00000000 0.57083333 0.00000000
##
## $pop.score$`5`
## BEN BER CAM CHA GEL GRV
## 0.87500000 0.87916667 0.47500000 0.39583333 -0.10000000 0.91666667
## HAI HAN HOC HUN INJ INW
## 0.52500000 0.91250000 0.65714286 0.92500000 0.82727273 0.91250000
## JAF KAC KAG KAN KAT KLP
## -0.10000000 0.00000000 0.89583333 0.62272727 -0.07500000 -0.01250000
## KUN LAM MAT OKI PAL QNC
## 1.00000000 -0.03888889 0.59166667 0.82916667 0.94090909 0.93181818
## REC SIC SON SSK SUF SUU
## 0.92272727 0.91666667 -0.03333333 0.38750000 0.29166667 1.00000000
## TAI UTS YUN
## 0.97142857 0.92083333 -0.01666667
##
## $pop.score$`10`
## BEN BER CAM CHA GEL GRV
## 0.88333333 0.87916667 0.62500000 0.60416667 -0.27500000 0.87916667
## HAI HAN HOC HUN INJ INW
## 0.72916667 0.87500000 0.93571429 0.85833333 0.78181818 0.88750000
## JAF KAC KAG KAN KAT KLP
## -0.35000000 -0.10000000 0.83750000 0.70454545 -0.10833333 0.30000000
## KUN LAM MAT OKI PAL QNC
## 0.88750000 0.27777778 0.71250000 0.86666667 0.87272727 0.85909091
## REC SIC SON SSK SUF SUU
## 0.91818182 0.96111111 -0.13333333 0.30833333 0.28333333 0.88333333
## TAI UTS YUN
## 0.97142857 0.90416667 -0.05555556
##
## $pop.score$`15`
## BEN BER CAM CHA GEL GRV
## 0.88750000 0.85833333 0.62916667 0.52083333 0.67500000 0.86250000
## HAI HAN HOC HUN INJ INW
## 0.59166667 0.82500000 0.92857143 0.87500000 0.93636364 0.81250000
## JAF KAC KAG KAN KAT KLP
## -0.47500000 0.03333333 0.89583333 0.91363636 0.51666667 0.83750000
## KUN LAM MAT OKI PAL QNC
## 0.86250000 0.36666667 0.79583333 0.89166667 0.89545455 0.88636364
## REC SIC SON SSK SUF SUU
## 0.82272727 0.93888889 0.11666667 0.22500000 0.66666667 0.88333333
## TAI UTS YUN
## 0.82857143 0.84166667 0.10555556
##
## $pop.score$`20`
## BEN BER CAM CHA GEL GRV
## 0.87083333 0.81666667 0.57500000 0.47916667 0.47500000 0.92083333
## HAI HAN HOC HUN INJ INW
## 0.57083333 0.73750000 0.87142857 0.85416667 0.89545455 0.68750000
## JAF KAC KAG KAN KAT KLP
## -0.47500000 0.13333333 0.88750000 0.86363636 0.54166667 0.76250000
## KUN LAM MAT OKI PAL QNC
## 0.81250000 0.77777778 0.74166667 0.85833333 0.85000000 0.84545455
## REC SIC SON SSK SUF SUU
## 0.87272727 0.83333333 0.40000000 0.45416667 0.66666667 0.84166667
## TAI UTS YUN
## 0.83571429 0.85000000 0.07222222
##
## $pop.score$`25`
## BEN BER CAM CHA GEL GRV HAI HAN
## 0.8708333 0.8750000 0.5583333 0.6375000 0.4250000 0.8583333 0.6250000 0.7250000
## HOC HUN INJ INW JAF KAC KAG KAN
## 0.7428571 0.8583333 0.7954545 0.7000000 0.5250000 0.2500000 0.7708333 0.8409091
## KAT KLP KUN LAM MAT OKI PAL QNC
## 0.5166667 0.6500000 0.7000000 0.7166667 0.7208333 0.8541667 0.8409091 0.8636364
## REC SIC SON SSK SUF SUU TAI UTS
## 0.8090909 0.8388889 0.2833333 0.4416667 0.5750000 0.8333333 0.7714286 0.8541667
## YUN
## 0.2666667
##
## $pop.score$`30`
## BEN BER CAM CHA GEL GRV HAI HAN
## 0.7458333 0.8250000 0.4666667 0.6083333 0.5000000 0.8083333 0.7416667 0.6500000
## HOC HUN INJ INW JAF KAC KAG KAN
## 0.8214286 0.8125000 0.8090909 0.6250000 0.4250000 0.3833333 0.8375000 0.8090909
## KAT KLP KUN LAM MAT OKI PAL QNC
## 0.4000000 0.6375000 0.6000000 0.6611111 0.6166667 0.8000000 0.8590909 0.8318182
## REC SIC SON SSK SUF SUU TAI UTS
## 0.8136364 0.7833333 0.2166667 0.4125000 0.5583333 0.7500000 0.7500000 0.8333333
## YUN
## 0.4277778
##
## $pop.score$`35`
## BEN BER CAM CHA GEL GRV HAI HAN
## 0.5666667 0.8375000 0.5666667 0.8291667 0.4250000 0.8250000 0.7041667 0.6000000
## HOC HUN INJ INW JAF KAC KAG KAN
## 0.7642857 0.7791667 0.7863636 0.5375000 0.4250000 0.3750000 0.7875000 0.7909091
## KAT KLP KUN LAM MAT OKI PAL QNC
## 0.3916667 0.6125000 0.6125000 0.7388889 0.5291667 0.8125000 0.8181818 0.7863636
## REC SIC SON SSK SUF SUU TAI UTS
## 0.8090909 0.7500000 0.1666667 0.3208333 0.4916667 0.7250000 0.7428571 0.8666667
## YUN
## 0.6833333
##
## $pop.score$`40`
## BEN BER CAM CHA GEL GRV HAI HAN
## 0.6125000 0.8250000 0.5416667 0.7125000 0.3500000 0.7666667 0.7750000 0.5250000
## HOC HUN INJ INW JAF KAC KAG KAN
## 0.7000000 0.7500000 0.7590909 0.5375000 0.2500000 0.3333333 0.7708333 0.7590909
## KAT KLP KUN LAM MAT OKI PAL QNC
## 0.3416667 0.5250000 0.5250000 0.6944444 0.5333333 0.7583333 0.7909091 0.8181818
## REC SIC SON SSK SUF SUU TAI UTS
## 0.7318182 0.6666667 0.5166667 0.1375000 0.4250000 0.6333333 0.7142857 0.7833333
## YUN
## 0.7444444
##
##
## $mean
## 1 5 10 15 20 25 30 35
## 0.3848999 0.5802314 0.5898285 0.6743020 0.6721288 0.6846922 0.6612286 0.6502356
## 40
## 0.6153969
##
## $pred
## $pred$x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
##
## $pred$y
## [1] 0.4343442 0.4570347 0.4793634 0.5009661 0.5214787 0.5406094 0.5583547
## [8] 0.5747835 0.5899647 0.6039670 0.6168420 0.6285712 0.6391189 0.6484494
## [15] 0.6565269 0.6633374 0.6689545 0.6734733 0.6769891 0.6795972 0.6813837
## [22] 0.6823979 0.6826799 0.6822698 0.6812077 0.6795381 0.6773224 0.6746267
## [29] 0.6715169 0.6680587 0.6643099 0.6602942 0.6560273 0.6515247 0.6468021
## [36] 0.6418791 0.6367926 0.6315834 0.6262927 0.6209612
##
##
## $best
## [1] 23
Run DAPC with object
Save it
saveRDS(
dapc_snp, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "sicily_americas", "dapc_snp.rds"
)
)
To load it
dapc_snp <- readRDS(
here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "sicily_americas", "dapc_snp.rds"
)
)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b", "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")
PCs 1 & 2
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sicily_americas/dapc_snp_sicily_Americas_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
PCs 1 & 2 - colored by region
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sicily_americas/dapc_snp_sicily_Americas_region_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:24,35:38,40,1:25)
myCol2 <- c("#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#f365e7", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45","#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#f365e7", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#c41A1C", "#c41A1C")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
good.shapes = c(1:24,35:38,40,1:25)
myCol2 <- c("#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#f365e7", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45","#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#f365e7", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#c41A1C", "#c41A1C")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
PCs 1 & 3
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sicily_americas/dapc_snp_sicily_Americas_region_PC1_3.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:24,35:38,40,1:25)
myCol2 <- c("#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#f365e7", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45","#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#f365e7", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#c41A1C", "#c41A1C")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
dev.off()
good.shapes = c(1:24,35:38,40,1:25)
myCol2 <- c("#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#f365e7", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45","#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#f365e7", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#c41A1C", "#c41A1C")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
PCs 1 & 4
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sicily_americas/dapc_snp_sicily_Americas_region_PC1_4.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:24,35:38,40,1:25)
myCol2 <- c("#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#f365e7", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45","#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#f365e7", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#c41A1C", "#c41A1C")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
dev.off()
good.shapes = c(1:24,35:38,40,1:25)
myCol2 <- c("#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#f365e7", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45","#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#2524f9", "#f365e7", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "#c41A1C", "#c41A1C")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
Using SNP Set 3 - r2<0.01 LD pruning and MAF 1% (scaled and plotting additional PCs)
Create files
cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--keep-fam output/neuroadmixture/native_turkey_iberia_US.txt \
--bfile output/snps_sets/r2_0.01_b \
--make-bed \
--out output/dapc/MAF_1/dapc_native_turkey_iberia_US \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_native_turkey_iberia_US.log
22642 variants loaded from .bim file. –keep-fam: 329 people remaining. Total genotyping rate in remaining samples is 0.969285. 22642 variants and 329 people pass filters and QC.
Convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/MAF_1/dapc_native_turkey_iberia_US \
--recodeA \
--out output/dapc/MAF_1/dapc_native_turkey_iberia_US \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_native_turkey_iberia_US.log
22642 variants loaded from .bim file. 22642 variants and 287 people pass filters and QC.
Clean env & memory
# Remove all objects from the environment
rm(list = ls())
# Run the garbage collector to free up memory
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 5971163 318.9 16401712 876.0 24799007 1324.5
## Vcells 10186560 77.8 278025569 2121.2 848466578 6473.3
Import the data and covert it to genind format
# import the data
snp <-
read.PLINK(
here("euro_global/output/dapc/MAF_1/dapc_native_turkey_iberia_US.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
##
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## [1] 329
## [1] 22642
## [1] 39
## [1] "1001" "1002" "1003" "1004" "1005" "1006" "1007" "1008" "1009" "1010"
## [11] "1011" "1012" "1053" "1054" "1055" "1056" "1057" "1058" "1059" "1060"
## [21] "1061" "1062" "1063" "1064" "1089" "1090" "1091" "1092" "1093" "1094"
## [31] "1095" "1101" "1102" "1103" "1105" "1106" "1107" "1161" "1162" "1163"
## [41] "1165" "1166" "1167" "1168" "1169" "1170" "1171" "1172" "1173" "1174"
## [51] "1175" "1176" "1177" "1178" "1179" "1180" "1181" "1182" "1183" "1184"
## [61] "1214" "1215" "1216" "1217" "1238" "1239" "1240" "1241" "1242" "1243"
## [71] "1244" "1245" "1246" "1247" "1249" "1250" "1251" "1252" "1253" "1254"
## [81] "1255" "1256" "1257" "1258" "1259" "1260" "1261" "1262" "1263" "1264"
## [91] "1265" "1266" "1267" "1268" "1270" "1271" "1272" "1273" "1274" "1276"
## [101] "1282" "1283" "1285" "1286" "1325" "1326" "1328" "1329" "1330" "1331"
## [111] "1332" "1333" "1334" "1335" "1336" "1373" "1374" "1375" "1376" "1377"
## [121] "1378" "1379" "1380" "1381" "1382" "1383" "1384" "2174" "2175" "2176"
## [131] "2177" "2178" "2179" "217" "2180" "2181" "2182" "2183" "2184" "2185"
## [141] "218" "219" "220" "221" "222" "223" "224" "225" "226" "227"
## [151] "230" "255" "256" "257" "258" "261" "262" "263" "264" "265"
## [161] "266" "267" "268" "269" "270" "271" "272" "273" "275" "276"
## [171] "277" "278" "279" "280" "281" "282" "283" "284" "285" "286"
## [181] "287" "289" "290" "291" "294" "295" "296" "297" "298" "299"
## [191] "301" "302" "303" "304" "305" "435" "436" "437" "438" "439"
## [201] "440" "441" "442" "443" "444" "445" "446" "602" "603" "604"
## [211] "607" "609" "610" "623" "624" "625" "626" "627" "628" "629"
## [221] "630" "631" "632" "633" "666" "669" "670" "671" "672" "673"
## [231] "674" "675" "676" "677" "678" "679" "680" "681" "682" "683"
## [241] "765" "766" "769" "770" "771" "772" "773" "774" "775" "776"
## [251] "777" "778" "781" "782" "784" "785" "786" "787" "788" "789"
## [261] "790" "791" "792" "793" "794" "795" "835" "836" "837" "838"
## [271] "839" "840" "841" "842" "843" "844" "845" "846" "847" "848"
## [281] "877" "878" "879" "880" "881" "882" "883" "884" "885" "886"
## [291] "887" "888" "889" "890" "891" "892" "893" "894" "911" "912"
## [301] "913" "915" "964" "965" "966" "967" "972" "973" "975" "976"
## [311] "977" "978" "979" "980" "981" "982" "983" "984" "985" "986"
## [321] "987" "988" "989" "990" "991" "992" "993" "994" "995"
## Starting gl2gi
## Processing genlight object with SNP data
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================ | 64%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi
Save it
To load it
Save it
To load it
## [1] 329 45278
## AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1001 1.0423735 -1.0423735 0.8067209 -0.8067209
## 1002 0.0000000 0.0000000 -1.7420494 1.7420494
## 1003 -0.2605934 0.2605934 -1.7420494 1.7420494
## 1004 -0.2605934 0.2605934 -1.7420494 1.7420494
## 1005 -0.2605934 0.2605934 -0.4676643 0.4676643
## AX-583036983_C.T
## 1001 1.19822555
## 1002 -0.09459675
## 1003 1.19822555
## 1004 -0.09459675
## 1005 1.19822555
Save it
To load it
Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.
xvalDapc(snp3, populations, n.pca.max = 200, n.da = NULL,
training.set = 0.9, result = c("groupMean", "overall"),
center = TRUE, scale = FALSE,
n.pca=NULL, n.rep = 40, xval.plot = TRUE)
$n.pca: 40 first PCs of PCA used $n.da: 38 discriminant functions saved $var (proportion of conserved variance): 0.346
Run dapc using these #s from CV
Save the genind object
saveRDS(dapc1, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "iberia_turkey_US", "dapc1.rds"
))
Load the genind object
dapc1 <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "iberia_turkey_US", "dapc1.rds"
))
Calculating the optimum PC number to rerun DAPC
optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs
## $pop.score
## $pop.score$`1`
## BAR BEN BER CAM CHA GEL
## 0.600000000 0.845833333 0.658333333 0.491666667 0.241666667 0.000000000
## HAI HAN HOC HUN INJ INW
## 0.562500000 1.000000000 0.142857143 0.708333333 0.881818182 1.000000000
## JAF KAC KAG KAN KAT KLP
## 0.000000000 0.000000000 0.404166667 0.545454545 0.000000000 0.000000000
## KUN LAM MAT OKI PAL POL
## 0.000000000 0.000000000 0.516666667 0.679166667 0.527272727 0.000000000
## POP QNC SON SPB SPC SPM
## -0.062500000 -0.095454545 0.000000000 0.500000000 0.000000000 0.000000000
## SPS SSK SUF SUU TAI TUA
## 0.000000000 0.220833333 0.000000000 0.833333333 0.000000000 -0.005555556
## TUH UTS YUN
## 0.158333333 0.520833333 0.000000000
##
## $pop.score$`5`
## BAR BEN BER CAM CHA GEL
## 0.88333333 0.86250000 0.60833333 0.39166667 0.38333333 -0.10000000
## HAI HAN HOC HUN INJ INW
## 0.42083333 0.97500000 0.14285714 0.89166667 0.94090909 0.90000000
## JAF KAC KAG KAN KAT KLP
## -0.12500000 -0.04166667 0.93333333 0.55000000 -0.07500000 -0.07500000
## KUN LAM MAT OKI PAL POL
## 0.97500000 -0.07222222 0.58333333 0.55000000 0.90000000 -0.15000000
## POP QNC SON SPB SPC SPM
## 0.40416667 0.93181818 -0.06666667 0.70625000 0.08333333 -0.04000000
## SPS SSK SUF SUU TAI TUA
## 0.68750000 0.62083333 0.30833333 0.96666667 0.55000000 0.08333333
## TUH UTS YUN
## 0.45833333 0.87500000 -0.09444444
##
## $pop.score$`10`
## BAR BEN BER CAM CHA GEL
## 0.86250000 0.91250000 0.93750000 0.53750000 0.66250000 -0.22500000
## HAI HAN HOC HUN INJ INW
## 0.66250000 0.86250000 0.61428571 0.87500000 0.79545455 0.83750000
## JAF KAC KAG KAN KAT KLP
## -0.30000000 -0.02500000 0.91666667 0.66363636 -0.05000000 0.17500000
## KUN LAM MAT OKI PAL POL
## 0.85000000 -0.03888889 0.77083333 0.86666667 0.90454545 0.72500000
## POP QNC SON SPB SPC SPM
## 0.78333333 0.94090909 -0.23333333 0.81875000 0.60833333 -0.12000000
## SPS SSK SUF SUU TAI TUA
## 0.75625000 0.59583333 0.23333333 0.90000000 0.93571429 0.05555556
## TUH UTS YUN
## 0.50833333 0.87500000 -0.08888889
##
## $pop.score$`15`
## BAR BEN BER CAM CHA GEL HAI
## 0.9208333 0.9208333 0.9375000 0.6208333 0.5291667 0.6500000 0.6875000
## HAN HOC HUN INJ INW JAF KAC
## 0.8625000 0.7642857 0.8875000 0.9045455 0.8000000 -0.3000000 -0.0750000
## KAG KAN KAT KLP KUN LAM MAT
## 0.8958333 0.8227273 -0.1166667 0.5625000 0.7500000 0.1277778 0.6875000
## OKI PAL POL POP QNC SON SPB
## 0.8041667 0.9227273 0.6000000 0.7083333 0.9090909 -0.2000000 0.8062500
## SPC SPM SPS SSK SUF SUU TAI
## 0.5416667 -0.1900000 0.7750000 0.4666667 0.3916667 0.8750000 0.8285714
## TUA TUH UTS YUN
## 0.3611111 0.6750000 0.9291667 0.2888889
##
## $pop.score$`20`
## BAR BEN BER CAM CHA GEL
## 0.92083333 0.90000000 0.90416667 0.62916667 0.65833333 0.60000000
## HAI HAN HOC HUN INJ INW
## 0.57916667 0.76250000 0.85000000 0.89166667 0.83636364 0.72500000
## JAF KAC KAG KAN KAT KLP
## -0.40000000 -0.20833333 0.87500000 0.87272727 0.50000000 0.76250000
## KUN LAM MAT OKI PAL POL
## 0.81250000 0.66666667 0.77916667 0.90000000 0.92727273 0.52500000
## POP QNC SON SPB SPC SPM
## 0.71666667 0.89090909 -0.03333333 0.58125000 0.33333333 -0.15000000
## SPS SSK SUF SUU TAI TUA
## 0.87500000 0.32083333 0.65833333 0.81666667 0.79285714 0.56666667
## TUH UTS YUN
## 0.79583333 0.85416667 0.32777778
##
## $pop.score$`25`
## BAR BEN BER CAM CHA GEL
## 0.89583333 0.85000000 0.92083333 0.61666667 0.58750000 0.45000000
## HAI HAN HOC HUN INJ INW
## 0.64583333 0.71250000 0.87142857 0.85833333 0.90909091 0.66250000
## JAF KAC KAG KAN KAT KLP
## 0.00000000 -0.03333333 0.89583333 0.84090909 0.46666667 0.76250000
## KUN LAM MAT OKI PAL POL
## 0.78750000 0.75555556 0.70833333 0.83750000 0.86818182 0.35000000
## POP QNC SON SPB SPC SPM
## 0.70416667 0.86363636 0.23333333 0.61875000 0.28333333 -0.27000000
## SPS SSK SUF SUU TAI TUA
## 0.83125000 0.44583333 0.59166667 0.81666667 0.80000000 0.35555556
## TUH UTS YUN
## 0.68750000 0.87916667 0.20000000
##
## $pop.score$`30`
## BAR BEN BER CAM CHA GEL HAI
## 0.8291667 0.7166667 0.8541667 0.5958333 0.5500000 0.3500000 0.7833333
## HAN HOC HUN INJ INW JAF KAC
## 0.6125000 0.7571429 0.8000000 0.8090909 0.6625000 0.4500000 0.4333333
## KAG KAN KAT KLP KUN LAM MAT
## 0.8875000 0.8181818 0.4500000 0.6000000 0.6500000 0.5888889 0.7375000
## OKI PAL POL POP QNC SON SPB
## 0.9041667 0.8409091 0.4250000 0.7208333 0.8500000 0.2166667 0.5750000
## SPC SPM SPS SSK SUF SUU TAI
## 0.2166667 -0.2800000 0.8187500 0.3583333 0.6333333 0.7916667 0.8142857
## TUA TUH UTS YUN
## 0.5444444 0.6708333 0.8666667 0.4722222
##
## $pop.score$`35`
## BAR BEN BER CAM CHA GEL HAI
## 0.8583333 0.7708333 0.8333333 0.5041667 0.6708333 0.5750000 0.7750000
## HAN HOC HUN INJ INW JAF KAC
## 0.5500000 0.7571429 0.7833333 0.7954545 0.6875000 0.3250000 0.3416667
## KAG KAN KAT KLP KUN LAM MAT
## 0.8666667 0.8090909 0.4416667 0.5875000 0.5250000 0.6833333 0.6750000
## OKI PAL POL POP QNC SON SPB
## 0.8250000 0.8227273 0.3750000 0.6458333 0.8272727 0.1666667 0.4750000
## SPC SPM SPS SSK SUF SUU TAI
## 0.1916667 -0.1200000 0.8125000 0.2958333 0.5833333 0.6500000 0.6928571
## TUA TUH UTS YUN
## 0.3833333 0.7041667 0.8541667 0.5833333
##
## $pop.score$`40`
## BAR BEN BER CAM CHA GEL HAI
## 0.7833333 0.6458333 0.8000000 0.6166667 0.7250000 0.3500000 0.7500000
## HAN HOC HUN INJ INW JAF KAC
## 0.5000000 0.6714286 0.8416667 0.7409091 0.5750000 0.3750000 0.3500000
## KAG KAN KAT KLP KUN LAM MAT
## 0.7958333 0.7954545 0.3583333 0.5625000 0.5625000 0.5500000 0.5500000
## OKI PAL POL POP QNC SON SPB
## 0.8333333 0.8045455 0.2750000 0.6708333 0.7590909 0.1500000 0.5875000
## SPC SPM SPS SSK SUF SUU TAI
## 0.1416667 -0.2400000 0.6312500 0.2708333 0.5083333 0.6250000 0.6857143
## TUA TUH UTS YUN
## 0.3277778 0.7125000 0.7458333 0.5000000
##
##
## $mean
## 1 5 10 15 20 25 30 35
## 0.3045015 0.4545556 0.5477519 0.5982943 0.6311963 0.6220776 0.6250149 0.6047319
## 40
## 0.5612480
##
## $pred
## $pred$x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
##
## $pred$y
## [1] 0.3045015 0.3457581 0.3855216 0.4222886 0.4545556 0.4812350 0.5029027
## [8] 0.5205503 0.5351694 0.5477519 0.5591570 0.5697155 0.5796256 0.5890857
## [15] 0.5982943 0.6073191 0.6157061 0.6228710 0.6282293 0.6311963 0.6314468
## [22] 0.6296908 0.6268978 0.6240370 0.6220776 0.6217111 0.6225186 0.6238029
## [29] 0.6248673 0.6250149 0.6236858 0.6208680 0.6166864 0.6112661 0.6047319
## [36] 0.5972212 0.5889195 0.5800248 0.5707350 0.5612480
##
##
## $best
## [1] 21
Run DAPC with object
Save it
saveRDS(
dapc_snp, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "iberia_turkey_US", "dapc_snp.rds"
)
)
To load it
dapc_snp <- readRDS(
here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "iberia_turkey_US", "dapc_snp.rds"
)
)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b", "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")
PCs 1 & 2
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/iberia_turkey_US/dapc_snp_iberia_turkey_US_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
PCs 1 & 2 - colored by region
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/iberia_turkey_US/dapc_snp_iberia_turkey_US_region_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,3:4,6:7,10:14,17)
myCol2 <- c("#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "goldenrod", "#c41A1C", "#c41A1C")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
good.shapes = c(1:25,35:38,3:4,6:7,10:14,17)
myCol2 <- c("#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "goldenrod", "#c41A1C", "#c41A1C")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
PCs 1 & 3
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/iberia_turkey_US/dapc_snp_iberia_turkey_US_region_PC1_3.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,3:4,6:7,10:14,17)
myCol2 <- c("#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "goldenrod", "#c41A1C", "#c41A1C")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
dev.off()
good.shapes = c(1:25,35:38,3:4,6:7,10:14,17)
myCol2 <- c("#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "goldenrod", "#c41A1C", "#c41A1C")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
PCs 1 & 4
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/iberia_turkey_US/dapc_snp_iberia_turkey_US_region_PC1_4.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,3:4,6:7,10:14,17)
myCol2 <- c("#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "goldenrod", "#c41A1C", "#c41A1C")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
dev.off()
good.shapes = c(1:25,35:38,3:4,6:7,10:14,17)
myCol2 <- c("#a113b2", "#146c45", "#66C2A5", "#2524f9", "#2524f9", "#146c45", "#c41A1C", "#2524f9", "#2524f9", "#c41A1C", "#2524f9", "#2524f9", "#146c45", "#2524f9", "#c41A1C", "#c41A1C", "#146c45", "#2524f9", "#146c45", "#2524f9", "#2524f9", "#c41A1C", "#66C2A5", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#2524f9", "#2524f9", "#2524f9", "#c41A1C", "goldenrod", "goldenrod", "#c41A1C", "#c41A1C")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
Using r2<0.01 LD pruning and MAF 1% (scaled and plotting additional PCs)
echo "FRS
STS
POP
POL
SPB
SPS
SPC
BAR
SPM
IMP
ITG
BRE
DES
TRE
ITB
CES
ROM
ITR
SIC
ITP
MAL
SLO
CRO
ALV
ALD
TIR
SER
GRA
GRC
ROS
BUL
TUA
TUH
SEV
ALU
KER
KRA
SOC
TIK
RAR
GES
ARM
" > euro_global/output/neuroadmixture/europe_all.txt
Create files
cd /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global
plink \
--allow-extra-chr \
--keep-allele-order \
--keep-fam output/neuroadmixture/europe_all.txt \
--bfile output/snps_sets/r2_0.01_b \
--make-bed \
--out output/dapc/MAF_1/dapc_europe_MAF01 \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_europe_MAF01.log
22642 variants loaded from .bim file. –keep-fam: 410 people remaining. Total genotyping rate in remaining samples is 0.970962. 22642 variants and 410 people pass filters and QC.
Convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/dapc/MAF_1/dapc_europe_MAF01 \
--recodeA \
--out output/dapc/MAF_1/dapc_europe_MAF01 \
--silent;
grep 'samples\|variants\|remaining' output/dapc/MAF_1/dapc_europe_MAF01.log
22642 variants loaded from .bim file. 22642 variants and 287 people pass filters and QC.
Clean env & memory
# Remove all objects from the environment
rm(list = ls())
# Run the garbage collector to free up memory
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 5971456 319.0 16401712 876.0 24799007 1324.5
## Vcells 10193610 77.8 177936365 1357.6 848466578 6473.3
Import the data and covert it to genind format
# import the data
snp <-
read.PLINK(
here("euro_global/output/dapc/MAF_1/dapc_europe_MAF01.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
##
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## [1] 410
## [1] 22642
## [1] 41
## [1] "1065" "1066" "1067" "1068" "1069" "1070" "1071" "1072" "1073" "1074"
## [11] "1075" "1076" "1077" "1078" "1079" "1080" "1081" "1082" "1083" "1084"
## [21] "1085" "1086" "1087" "1088" "1109" "1110" "1111" "1112" "1113" "1114"
## [31] "1115" "1116" "1117" "1118" "1119" "1120" "1121" "1122" "1123" "1124"
## [41] "1125" "1126" "1127" "1128" "1129" "1130" "1131" "1132" "1133" "1134"
## [51] "1135" "1136" "1137" "1138" "1139" "1140" "1141" "1142" "1143" "1144"
## [61] "1145" "1146" "1147" "1148" "1149" "1150" "1151" "1152" "1153" "1154"
## [71] "1155" "1156" "1157" "1158" "1159" "1160" "1185" "1186" "1187" "1188"
## [81] "1189" "1190" "1191" "1192" "1193" "1194" "1195" "1201" "1202" "1203"
## [91] "1204" "1205" "1206" "1207" "1208" "1209" "1210" "1211" "1212" "1213"
## [101] "1218" "1219" "1220" "1221" "1222" "1223" "1224" "1225" "1226" "1227"
## [111] "1228" "1229" "1230" "1232" "1233" "1234" "1237" "1287" "1288" "1289"
## [121] "1292" "1293" "1294" "1295" "1426" "1427" "1428" "1429" "1430" "1431"
## [131] "1432" "1433" "1434" "1435" "1436" "1437" "1438" "1439" "1440" "1441"
## [141] "1443" "1444" "1446" "1447" "1449" "1451" "1452" "1454" "1456" "1458"
## [151] "1460" "1461" "159" "162" "167" "169" "170" "171" "172" "173"
## [161] "193" "194" "195" "196" "197" "198" "199" "200" "201" "202"
## [171] "203" "204" "2187" "2188" "2189" "2191" "2192" "2193" "2194" "2195"
## [181] "2202" "2215" "2216" "2217" "2218" "279" "280" "281" "282" "283"
## [191] "284" "285" "286" "287" "289" "290" "291" "701" "702" "703"
## [201] "704" "705" "706" "707" "708" "709" "710" "711" "712" "713"
## [211] "714" "715" "716" "717" "718" "719" "720" "721" "722" "723"
## [221] "724" "725" "726" "727" "728" "729" "730" "731" "732" "733"
## [231] "735" "736" "737" "741" "742" "743" "744" "745" "746" "747"
## [241] "749" "750" "751" "752" "753" "754" "755" "756" "757" "758"
## [251] "759" "760" "761" "762" "763" "764" "765" "766" "769" "770"
## [261] "771" "772" "773" "774" "775" "776" "777" "778" "781" "782"
## [271] "784" "785" "786" "787" "788" "789" "790" "791" "792" "793"
## [281] "794" "795" "801" "802" "803" "804" "805" "806" "807" "808"
## [291] "809" "810" "811" "812" "813" "814" "815" "816" "817" "818"
## [301] "819" "820" "821" "822" "824" "825" "826" "827" "829" "830"
## [311] "831" "833" "834" "835" "836" "837" "838" "839" "840" "841"
## [321] "842" "843" "844" "845" "846" "847" "848" "849" "850" "851"
## [331] "852" "853" "854" "855" "856" "857" "859" "860" "861" "862"
## [341] "863" "864" "865" "866" "867" "868" "869" "870" "871" "872"
## [351] "873" "874" "875" "876" "877" "878" "879" "880" "881" "882"
## [361] "883" "884" "885" "886" "887" "888" "889" "890" "891" "892"
## [371] "893" "894" "901" "902" "903" "904" "905" "906" "907" "908"
## [381] "909" "910" "911" "912" "913" "915" "916" "917" "918" "919"
## [391] "920" "921" "922" "923" "924" "925" "926" "927" "928" "929"
## [401] "930" "931" "932" "933" "934" "935" "936" "937" "938" "939"
## Starting gl2gi
## Processing genlight object with SNP data
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## Matrix converted.. Prepare genind object...
## Completed: gl2gi
Save it
To load it
Save it
To load it
## [1] 410 45220
## AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1065 1.0694662 -1.0694662 0.07327617 -0.07327617
## 1066 1.0694662 -1.0694662 1.37950360 -1.37950360
## 1067 1.0694662 -1.0694662 1.37950360 -1.37950360
## 1068 -0.2365789 0.2365789 0.07327617 -0.07327617
## 1069 1.0694662 -1.0694662 1.37950360 -1.37950360
## AX-583036983_C.A
## 1065 0.1719037
## 1066 1.5336507
## 1067 1.5336507
## 1068 0.1719037
## 1069 1.5336507
Save it
To load it
Cross-validation: The Discriminant Analysis of Principal Components (DAPC) relies on dimension reduction of the data using PCA followed by a linear discriminant analysis. How many PCA axes to retain is often a non-trivial question. Cross validation provides an objective way to decide how many axes to retain: different numbers are tried and the quality of the corresponding DAPC is assessed by cross- validation: DAPC is performed on a training set, typically made of 90% of the observations (comprising 90% of the observations in each subpopulation) , and then used to predict the groups of the 10% of remaining observations. The current method uses the average prediction success per group (result=“groupMean”), or the overall prediction success (result=“overall”). The number of PCs associated with the lowest Mean Squared Error is then retained in the DAPC.
xvalDapc(snp3, populations, n.pca.max = 200, n.da = NULL,
training.set = 0.9, result = c("groupMean", "overall"),
center = TRUE, scale = FALSE,
n.pca=NULL, n.rep = 30, xval.plot = TRUE)
$n.pca: 100 first PCs of PCA used $n.da: 40 discriminant functions saved $var (proportion of conserved variance): 0.518
Run dapc using these #s from CV
Save the genind object
saveRDS(dapc1, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc1.rds"
))
Load the genind object
dapc1 <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc1.rds"
))
Calculating the optimum PC number to rerun DAPC
optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs
## $pop.score
## $pop.score$`1`
## ALD ALU ALV ARM BAR BRE
## 0.000000000 0.833333333 0.008333333 0.075000000 0.229166667 0.284615385
## BUL CES CRO DES FRS GES
## -0.020000000 -0.132142857 -0.058333333 0.303125000 0.141666667 -0.004166667
## GRA GRC IMP ITB ITP ITR
## 0.795454545 0.000000000 0.000000000 0.000000000 -0.022222222 0.225000000
## KER KRA MAL POL POP RAR
## 0.500000000 0.083333333 -0.025000000 0.000000000 0.166666667 0.454166667
## ROM ROS SER SEV SIC SLO
## 0.000000000 -0.045454545 0.000000000 -0.020833333 -0.022222222 0.166666667
## SOC SPB SPC SPM SPS STS
## -0.008333333 -0.031250000 0.000000000 0.000000000 0.000000000 0.250000000
## TIK TIR TRE TUA TUH
## 0.570833333 0.000000000 0.445833333 0.000000000 -0.020833333
##
## $pop.score$`10`
## ALD ALU ALV ARM BAR BRE
## 0.75000000 0.62500000 0.41666667 0.80000000 0.85833333 0.76538462
## BUL CES CRO DES FRS GES
## 0.57500000 0.86071429 0.53750000 0.87812500 0.59583333 0.22500000
## GRA GRC IMP ITB ITP ITR
## 0.91363636 0.53000000 -0.08750000 0.15000000 0.68888889 0.72083333
## KER KRA MAL POL POP RAR
## 0.39166667 0.07916667 0.89166667 0.37500000 0.06250000 -0.08750000
## ROM ROS SER SEV SIC SLO
## 0.37500000 0.74090909 0.90000000 0.75000000 0.40555556 0.86666667
## SOC SPB SPC SPM SPS STS
## 0.26250000 0.83750000 0.58333333 -0.14000000 0.80000000 0.66666667
## TIK TIR TRE TUA TUH
## 0.66666667 0.10000000 0.48333333 0.23333333 0.42083333
##
## $pop.score$`20`
## ALD ALU ALV ARM BAR BRE
## 0.675000000 0.816666667 0.354166667 0.885000000 0.900000000 0.738461538
## BUL CES CRO DES FRS GES
## 0.665000000 0.871428571 0.625000000 0.815625000 0.700000000 0.525000000
## GRA GRC IMP ITB ITP ITR
## 0.868181818 0.640000000 0.250000000 0.530000000 0.855555556 0.687500000
## KER KRA MAL POL POP RAR
## 0.625000000 -0.004166667 0.712500000 0.475000000 0.500000000 0.341666667
## ROM ROS SER SEV SIC SLO
## 0.675000000 0.868181818 0.712500000 0.825000000 0.483333333 0.862500000
## SOC SPB SPC SPM SPS STS
## 0.354166667 0.656250000 0.300000000 -0.210000000 0.693750000 0.750000000
## TIK TIR TRE TUA TUH
## 0.508333333 0.287500000 0.604166667 0.083333333 0.625000000
##
## $pop.score$`30`
## ALD ALU ALV ARM BAR BRE
## 0.58000000 0.61666667 0.46250000 0.76000000 0.78333333 0.71923077
## BUL CES CRO DES FRS GES
## 0.65500000 0.83571429 0.55000000 0.78437500 0.64583333 0.46250000
## GRA GRC IMP ITB ITP ITR
## 0.77727273 0.54000000 0.01250000 0.42000000 0.68888889 0.55000000
## KER KRA MAL POL POP RAR
## 0.76250000 0.03333333 0.62083333 0.40000000 0.37500000 0.55000000
## ROM ROS SER SEV SIC SLO
## 0.55000000 0.78181818 0.51250000 0.80833333 0.41666667 0.79166667
## SOC SPB SPC SPM SPS STS
## 0.31666667 0.62500000 0.13333333 -0.38000000 0.63125000 0.72916667
## TIK TIR TRE TUA TUH
## 0.66250000 0.16250000 0.65833333 0.19444444 0.79583333
##
## $pop.score$`40`
## ALD ALU ALV ARM BAR BRE BUL
## 0.4750000 0.7083333 0.3041667 0.7350000 0.7416667 0.6307692 0.7200000
## CES CRO DES FRS GES GRA GRC
## 0.7714286 0.6500000 0.7468750 0.5333333 0.3083333 0.7227273 0.5450000
## IMP ITB ITP ITR KER KRA MAL
## -0.0750000 0.3000000 0.6277778 0.4958333 0.6583333 0.0750000 0.6708333
## POL POP RAR ROM ROS SER SEV
## 0.1000000 0.3916667 0.6750000 0.4375000 0.6909091 0.3625000 0.7291667
## SIC SLO SOC SPB SPC SPM SPS
## 0.3277778 0.7083333 0.2875000 0.5000000 0.0250000 -0.5200000 0.6312500
## STS TIK TIR TRE TUA TUH
## 0.6208333 0.5583333 -0.0750000 0.5541667 0.3166667 0.6666667
##
## $pop.score$`50`
## ALD ALU ALV ARM BAR BRE
## 0.40500000 0.69583333 0.31666667 0.65000000 0.67916667 0.65769231
## BUL CES CRO DES FRS GES
## 0.57000000 0.65000000 0.52500000 0.71875000 0.57083333 0.22083333
## GRA GRC IMP ITB ITP ITR
## 0.63181818 0.38000000 -0.20000000 0.17000000 0.55555556 0.56666667
## KER KRA MAL POL POP RAR
## 0.55000000 0.14166667 0.64166667 0.15000000 0.42916667 0.60416667
## ROM ROS SER SEV SIC SLO
## 0.32500000 0.65909091 0.36250000 0.65833333 0.36666667 0.62083333
## SOC SPB SPC SPM SPS STS
## 0.48750000 0.42500000 -0.01666667 -0.67000000 0.58125000 0.58750000
## TIK TIR TRE TUA TUH
## 0.57916667 0.08750000 0.54583333 0.12222222 0.63750000
##
## $pop.score$`60`
## ALD ALU ALV ARM BAR BRE
## 0.41500000 0.60000000 0.28333333 0.50500000 0.56250000 0.61538462
## BUL CES CRO DES FRS GES
## 0.59000000 0.62142857 0.59166667 0.66250000 0.57083333 0.25000000
## GRA GRC IMP ITB ITP ITR
## 0.57272727 0.35500000 -0.32500000 0.22000000 0.54444444 0.38750000
## KER KRA MAL POL POP RAR
## 0.48333333 0.26666667 0.58333333 0.00000000 0.40416667 0.49583333
## ROM ROS SER SEV SIC SLO
## 0.20000000 0.55909091 0.23750000 0.57083333 0.39444444 0.49166667
## SOC SPB SPC SPM SPS STS
## 0.51666667 0.31875000 0.06666667 -0.69000000 0.47500000 0.48750000
## TIK TIR TRE TUA TUH
## 0.56666667 0.06250000 0.57500000 0.01666667 0.47916667
##
## $pop.score$`70`
## ALD ALU ALV ARM BAR BRE
## 0.34000000 0.50416667 0.34583333 0.50000000 0.48750000 0.53076923
## BUL CES CRO DES FRS GES
## 0.47500000 0.53571429 0.47500000 0.55625000 0.50833333 0.16250000
## GRA GRC IMP ITB ITP ITR
## 0.42727273 0.22500000 -0.32500000 0.20000000 0.43333333 0.45416667
## KER KRA MAL POL POP RAR
## 0.47916667 0.27083333 0.42916667 0.02500000 0.42083333 0.54166667
## ROM ROS SER SEV SIC SLO
## 0.21250000 0.47272727 0.05000000 0.50416667 0.23333333 0.55416667
## SOC SPB SPC SPM SPS STS
## 0.42500000 0.27500000 -0.04166667 -0.80000000 0.34375000 0.50833333
## TIK TIR TRE TUA TUH
## 0.53333333 0.20000000 0.47916667 0.10000000 0.37916667
##
## $pop.score$`80`
## ALD ALU ALV ARM BAR BRE
## 0.25500000 0.44583333 0.24166667 0.40000000 0.47500000 0.44230769
## BUL CES CRO DES FRS GES
## 0.31500000 0.50714286 0.42916667 0.53125000 0.42083333 0.31666667
## GRA GRC IMP ITB ITP ITR
## 0.38636364 0.29000000 -0.17500000 0.11000000 0.26111111 0.20833333
## KER KRA MAL POL POP RAR
## 0.31666667 0.20416667 0.32916667 0.00000000 0.37083333 0.43333333
## ROM ROS SER SEV SIC SLO
## 0.11250000 0.38181818 0.10000000 0.43750000 0.26666667 0.43750000
## SOC SPB SPC SPM SPS STS
## 0.32083333 0.11875000 -0.13333333 -0.70000000 0.27500000 0.50833333
## TIK TIR TRE TUA TUH
## 0.40000000 0.10000000 0.45000000 0.02777778 0.31250000
##
## $pop.score$`90`
## ALD ALU ALV ARM BAR BRE
## 0.19500000 0.40000000 0.22500000 0.31000000 0.34166667 0.42692308
## BUL CES CRO DES FRS GES
## 0.33500000 0.39642857 0.35000000 0.44375000 0.37916667 0.17916667
## GRA GRC IMP ITB ITP ITR
## 0.28636364 0.21500000 -0.21250000 0.11000000 0.27777778 0.24583333
## KER KRA MAL POL POP RAR
## 0.33750000 0.13750000 0.27083333 0.00000000 0.27916667 0.34583333
## ROM ROS SER SEV SIC SLO
## 0.06250000 0.36363636 0.05000000 0.37916667 0.24444444 0.34166667
## SOC SPB SPC SPM SPS STS
## 0.39583333 0.05000000 -0.20833333 -0.73000000 0.09375000 0.30416667
## TIK TIR TRE TUA TUH
## 0.34583333 0.05000000 0.31250000 -0.06666667 0.29166667
##
## $pop.score$`100`
## ALD ALU ALV ARM BAR BRE
## 0.07000000 0.28750000 0.15833333 0.22000000 0.24166667 0.35000000
## BUL CES CRO DES FRS GES
## 0.19500000 0.31071429 0.32500000 0.38125000 0.30416667 0.07083333
## GRA GRC IMP ITB ITP ITR
## 0.33181818 0.15500000 -0.21250000 0.05000000 0.20555556 0.17083333
## KER KRA MAL POL POP RAR
## 0.34166667 0.13333333 0.30416667 0.02500000 0.14166667 0.25833333
## ROM ROS SER SEV SIC SLO
## 0.06250000 0.26363636 0.01250000 0.31666667 0.18333333 0.31250000
## SOC SPB SPC SPM SPS STS
## 0.29166667 0.05625000 -0.10000000 -0.73000000 0.03125000 0.22500000
## TIK TIR TRE TUA TUH
## 0.33333333 0.03750000 0.34166667 -0.03333333 0.21666667
##
##
## $mean
## 1 10 20 30 40 50 60 70
## 0.1249367 0.5236150 0.5885756 0.5359877 0.4715532 0.4302369 0.3800920 0.3275972
## 80 90 100
## 0.2666021 0.2086725 0.1619628
##
## $pred
## $pred$x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100
##
## $pred$y
## [1] 0.1249367 0.1779653 0.2303467 0.2814228 0.3305355 0.3770268 0.4202384
## [8] 0.4595124 0.4941906 0.5236150 0.5473318 0.5657054 0.5793043 0.5886973
## [15] 0.5944530 0.5971401 0.5973272 0.5955831 0.5924763 0.5885756 0.5843546
## [22] 0.5799069 0.5752310 0.5703254 0.5651887 0.5598195 0.5542162 0.5483774
## [29] 0.5423018 0.5359877 0.5294459 0.5227356 0.5159282 0.5090951 0.5023076
## [36] 0.4956372 0.4891551 0.4829329 0.4770418 0.4715532 0.4665143 0.4618750
## [43] 0.4575611 0.4534983 0.4496124 0.4458291 0.4420742 0.4382733 0.4343523
## [50] 0.4302369 0.4258708 0.4212700 0.4164685 0.4115003 0.4063992 0.4011994
## [57] 0.3959348 0.3906394 0.3853471 0.3800920 0.3748985 0.3697533 0.3646337
## [64] 0.3595168 0.3543799 0.3492002 0.3439548 0.3386210 0.3331761 0.3275972
## [71] 0.3218686 0.3160036 0.3100223 0.3039451 0.2977922 0.2915839 0.2853405
## [78] 0.2790822 0.2728293 0.2666021 0.2604194 0.2542936 0.2482361 0.2422577
## [85] 0.2363697 0.2305831 0.2249089 0.2193584 0.2139426 0.2086725 0.2035549
## [92] 0.1985784 0.1937273 0.1889860 0.1843387 0.1797698 0.1752634 0.1708039
## [99] 0.1663756 0.1619628
##
##
## $best
## [1] 17
Run DAPC with object
Save it
saveRDS(
dapc_snp, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc_snp.rds"
)
)
To load it
dapc_snp <- readRDS(
here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc_snp.rds"
)
)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b", "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")
PCs 1 & 2
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_europe_all_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
PCs 1 & 3
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_europe_all_PC1_3.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
dev.off()
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
PCs 1 & 4
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_europe_all_PC1_4.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
dev.off()
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
PCs 1 & 2
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_europe_all_PC1_2_region.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)
myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2", "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)
myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2", "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
PCs 1 & 3
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_europe_all_PC1_3_region.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)
myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2", "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
dev.off()
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)
myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2", "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
PCs 1 & 4
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_europe_all_PC1_4_region.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)
myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2", "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
dev.off()
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)
myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2", "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
PCs 1 & 5
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_europe_all_PC1_5_region.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)
myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2", "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=5)
dev.off()
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)
myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2", "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=5)
PCs 1 & 6
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_europe_all_PC1_6_region.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)
myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2", "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=6)
dev.off()
good.shapes = c(1:20,2,35:38,1,3,5:8,9:14,19:20,6,11)
myCol2 <- c("#a113b2", "goldenrod", "#a113b2", "goldenrod", "#a113b2", "magenta", "goldenrod", "magenta", "#a113b2", "magenta", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "goldenrod", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "#a113b2", "#a113b2", "#a113b2", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=6)
Import Sample Locations
sampling_loc <- read.csv(
here("scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sampling_loc_europe_all.csv"))
saveRDS(sampling_loc, here("scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sampling_loc_europe_all.rds"))
sampling_loc <- readRDS(here("scripts/RMarkdowns/output/euro_global/dapc/MAF_1/sampling_loc_europe_all.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Vlore Albania 40.46600 19.48970 Europe ALV 2020
## 2 Durres Albania 41.29704 19.50373 Europe ALD 2018
## 3 Tirana Albania 41.31473 19.83172 Europe TIR 2017
## 4 Ijevan Armenia 40.87971 45.14764 Europe ARM 2020
## 5 Lom Bulgaria 43.80489 23.23634 Europe BUL 2019
## 6 Dubrovnik Croatia 42.60654 18.22661 Europe CRO 2017
## Region Subregion order order2 orderold
## 1 Southern Europe East Europe 32 24 24
## 2 Southern Europe East Europe 33 25 25
## 3 Southern Europe East Europe 34 26 26
## 4 Eastern Europe East Europe 50 42 42
## 5 Eastern Europe East Europe 39 31 31
## 6 Southern Europe East Europe 31 23 23
## [1] SOC SOC SOC SOC SOC SOC
## 41 Levels: ALD ALU ALV ARM BAR BRE BUL CES CRO DES FRS GES GRA GRC IMP ... TUH
Load the csv
countr <- read.csv(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "europe_all", "DAPC_europe_all.csv"
))
df <- as.data.frame(countr)
head(df)
## pop country
## 1 SOC Russia
## 2 SOC Russia
## 3 SOC Russia
## 4 SOC Russia
## 5 SOC Russia
## 6 SOC Russia
## [1] Russia Russia Russia Russia Russia
## [6] Russia Russia Russia Russia Russia
## [11] Russia Russia Ukraine Ukraine Ukraine
## [16] Ukraine Ukraine Ukraine Ukraine Ukraine
## [21] Ukraine Ukraine Ukraine Ukraine Georgia
## [26] Georgia Georgia Georgia Georgia Georgia
## [31] Georgia Georgia Georgia Georgia Georgia
## [36] Georgia Ukraine Ukraine Ukraine Ukraine
## [41] Ukraine Ukraine Ukraine Ukraine Ukraine
## [46] Ukraine Ukraine Ukraine Russia Russia
## [51] Russia Russia Russia Russia Russia
## [56] Russia Russia Russia Russia Russia
## [61] Russia Russia Russia Russia Russia
## [66] Russia Russia Russia Russia Russia
## [71] Russia Russia Russia Russia Russia
## [76] Russia Italy Italy Italy Italy
## [81] Italy Italy Italy Italy Italy
## [86] Italy Italy Italy Ukraine Ukraine
## [91] Ukraine Ukraine Ukraine Ukraine Ukraine
## [96] Ukraine Ukraine Ukraine Ukraine Ukraine
## [101] France France France France France
## [106] France France France Italy Italy
## [111] Italy Italy Italy Italy Italy
## [116] Italy Italy Italy (1995) Italy (1995) Italy (1995)
## [121] Italy (1995) Italy (1995) Italy (1995) Italy (1995) France
## [126] France France France Italy (1995) Italy (1995)
## [131] Italy (1995) Italy (1995) Italy (1995) Italy (1995) Italy (1995)
## [136] Italy (1995) Italy (1995) Italy (1995) Italy (1995) Italy (1995)
## [141] Italy (1995) Italy (1995) Italy (1995) Italy (1995) Italy (1995)
## [146] Italy (1995) Italy (1995) Italy (1995) Italy (1995) Italy (1995)
## [151] Italy (1995) Italy (1995) Russia Russia Russia
## [156] Russia Russia Russia Russia Russia
## [161] Albania Albania Albania Albania Italy
## [166] Italy Italy Italy Italy Italy
## [171] Italy Italy Italy (1995) Italy (1995) Italy (1995)
## [176] Italy (1995) Italy (1995) Italy (1995) Italy (1995) Italy (1995)
## [181] Greece Italy (1995) Italy (1995) Italy (1995) Italy (1995)
## [186] Spain Spain Spain Spain Spain
## [191] Spain Spain Spain Spain Spain
## [196] Spain Spain Bulgaria Bulgaria Bulgaria
## [201] Bulgaria Bulgaria Bulgaria Bulgaria Bulgaria
## [206] Bulgaria Bulgaria Croatia Croatia Croatia
## [211] Croatia Croatia Croatia Croatia Croatia
## [216] Croatia Croatia Croatia Croatia Greece
## [221] Greece Greece Greece Greece Greece
## [226] Greece Greece Greece Greece Greece
## [231] Greece Greece Greece Greece Greece
## [236] Greece Greece Greece Greece Italy
## [241] Italy Italy Italy Italy Malta
## [246] Malta Malta Malta Malta Malta
## [251] Malta Malta Malta Malta Malta
## [256] Malta Spain Spain Spain Spain
## [261] Spain Turkey Turkey Turkey Turkey
## [266] Turkey Turkey Turkey Turkey Turkey
## [271] Turkey Turkey Turkey Turkey Turkey
## [276] Turkey Turkey Turkey Turkey Turkey
## [281] Turkey Turkey Albania Albania Albania
## [286] Albania Albania Albania Albania Albania
## [291] Albania Albania France France France
## [296] France France France France France
## [301] France France France France Italy
## [306] Italy Italy Italy Italy Italy
## [311] Italy Italy Italy Portugal Portugal
## [316] Portugal Portugal Portugal Portugal Portugal
## [321] Portugal Portugal Portugal Portugal Portugal
## [326] Portugal Portugal Romania Romania Romania
## [331] Romania Romania Romania Romania Romania
## [336] Romania Romania Romania Serbia Serbia
## [341] Serbia Serbia Slovenia Slovenia Slovenia
## [346] Slovenia Slovenia Slovenia Slovenia Slovenia
## [351] Slovenia Slovenia Slovenia Slovenia Spain
## [356] Spain Spain Spain Spain Spain
## [361] Spain Spain Spain Spain Spain
## [366] Spain Spain Spain Spain Spain
## [371] Spain Spain Armenia Armenia Armenia
## [376] Armenia Armenia Armenia Armenia Armenia
## [381] Armenia Armenia Spain Spain Spain
## [386] Spain Albania Albania Albania Albania
## [391] Albania Albania Albania Albania Albania
## [396] Albania Albania Albania Italy Italy
## [401] Italy Italy Italy Italy Italy
## [406] Italy Italy Italy Italy Italy
## 18 Levels: Albania Armenia Bulgaria Croatia France Georgia Greece ... Ukraine
Save the genind object
saveRDS(snp2, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "europe_all", "snp_country.rds"
))
Load the genind object
snp_country <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "europe_all", "snp_country.rds"
))
Scale
## Warning in .local(x, ...): Some scaling values are null.
## Corresponding alleles are removed.
## [1] "matrix" "array"
## [1] 410 45220
## AX-583033370_G.G AX-583033370_G.C AX-583034838_T.A AX-583034838_T.T
## 1065 1.0694662 -1.0694662 0.07327617 -0.07327617
## 1066 1.0694662 -1.0694662 1.37950360 -1.37950360
## 1067 1.0694662 -1.0694662 1.37950360 -1.37950360
## 1068 -0.2365789 0.2365789 0.07327617 -0.07327617
## 1069 1.0694662 -1.0694662 1.37950360 -1.37950360
## AX-583036983_C.A
## 1065 0.1719037
## 1066 1.5336507
## 1067 1.5336507
## 1068 0.1719037
## 1069 1.5336507
grp <- find.clusters(snp_country, max.n.clust=12)
#retained 350
#Choose the number of clusters (>=2): 5
Save the genind object
saveRDS(grp, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "grp_countries.rds"
))
Load the genind object
grp <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "grp_countries.rds"
))
##
## 1 2 3 4 5
## Albania 0 26 0 0 0
## Armenia 0 0 10 0 0
## Bulgaria 10 0 0 0 0
## Croatia 0 12 0 0 0
## France 24 0 0 0 0
## Georgia 0 0 12 0 0
## Greece 0 21 0 0 0
## Italy 55 0 0 0 0
## Italy (1995) 0 0 0 0 43
## Malta 12 0 0 0 0
## Portugal 14 0 0 0 0
## Romania 11 0 0 0 0
## Russia 0 0 48 0 0
## Serbia 4 0 0 0 0
## Slovenia 12 0 0 0 0
## Spain 0 0 0 22 17
## Turkey 3 0 0 18 0
## Ukraine 0 0 36 0 0
Cross-validation
xvalDapc(snp_country, populations, n.pca.max = 200, n.da = NULL,
training.set = 0.9, result = c("groupMean", "overall"),
center = TRUE, scale = FALSE,
n.pca=NULL, n.rep = 30, xval.plot = TRUE)
$n.pca: 120 first PCs of PCA used $n.da: 16 discriminant functions saved $var (proportion of conserved variance): 0.571
Run dapc using these #s from CV
Save the genind object
saveRDS(dapc1, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc1_country.rds"
))
Load the genind object
dapc1 <- readRDS(here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc1_country.rds"
))
Calculating the optimum PC number to rerun DAPC
optim.a.score(dapc1, n.pca=1:ncol(dapc1$tab), smart=TRUE, n=10, plot=TRUE, n.sim=20) #calculating optimal number of PCs
## $pop.score
## $pop.score$`1`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.00000000 0.60000000 0.00000000 0.00000000 0.00000000 0.00000000
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.23809524 0.19727273 0.88372093 0.00000000 0.00000000 0.00000000
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.70937500 0.00000000 0.00000000 -0.02692308 0.00000000 0.22083333
##
## $pop.score$`10`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.7942308 0.8400000 0.5300000 0.5916667 0.7145833 0.1750000
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.8880952 0.1345455 0.8174419 0.2666667 0.2750000 0.2045455
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.3687500 0.8375000 0.6000000 0.3820513 0.7166667 0.4222222
##
## $pop.score$`20`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.8115385 0.8200000 0.6200000 0.5708333 0.8604167 0.4166667
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.9190476 0.3890909 0.7581395 0.4291667 0.7142857 0.8772727
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.3864583 0.6500000 0.8458333 0.7115385 0.8333333 0.4472222
##
## $pop.score$`30`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.7153846 0.7200000 0.6200000 0.5500000 0.7354167 0.4250000
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.8214286 0.5163636 0.7127907 0.4541667 0.5535714 0.7772727
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.4531250 0.4500000 0.7833333 0.6833333 0.7761905 0.5069444
##
## $pop.score$`40`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.6480769 0.6500000 0.6650000 0.5791667 0.6645833 0.2541667
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.7380952 0.5545455 0.7151163 0.5541667 0.5500000 0.6363636
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.4739583 0.4750000 0.6500000 0.6038462 0.6976190 0.4750000
##
## $pop.score$`50`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.6115385 0.6050000 0.4900000 0.5541667 0.6625000 0.3416667
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.7142857 0.5863636 0.6779070 0.4291667 0.3642857 0.5772727
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.4625000 0.3375000 0.6416667 0.6217949 0.5333333 0.6208333
##
## $pop.score$`60`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.6442308 0.4600000 0.4950000 0.5125000 0.5562500 0.2500000
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.6142857 0.5772727 0.6151163 0.3125000 0.4357143 0.4681818
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.4322917 0.1750000 0.5583333 0.5576923 0.4952381 0.6069444
##
## $pop.score$`70`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.6384615 0.4100000 0.4250000 0.5041667 0.5229167 0.3958333
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.5976190 0.5136364 0.5988372 0.2583333 0.3678571 0.4045455
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.4406250 0.1125000 0.4500000 0.5217949 0.4357143 0.6027778
##
## $pop.score$`80`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.5673077 0.3350000 0.3150000 0.3833333 0.5000000 0.0500000
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.5190476 0.5463636 0.5360465 0.1125000 0.5000000 0.3363636
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.3916667 0.1000000 0.3416667 0.4935897 0.4333333 0.5513889
##
## $pop.score$`90`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.4865385 0.2600000 0.2300000 0.2541667 0.4104167 0.0875000
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.4500000 0.4954545 0.4895349 0.1208333 0.3535714 0.2409091
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.4135417 0.0500000 0.3791667 0.4128205 0.3571429 0.5305556
##
## $pop.score$`100`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.43076923 0.14500000 0.21500000 0.27500000 0.43541667 -0.07083333
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.39047619 0.46363636 0.45930233 0.09583333 0.37857143 0.24090909
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.34062500 0.01250000 0.24583333 0.35000000 0.27619048 0.48888889
##
## $pop.score$`110`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.3653846 0.1750000 0.1500000 0.2291667 0.3770833 -0.1500000
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.3714286 0.4563636 0.4430233 0.1541667 0.2500000 0.1727273
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.3375000 0.0375000 0.2208333 0.3358974 0.2404762 0.4430556
##
## $pop.score$`120`
## Albania Armenia Bulgaria Croatia France Georgia
## 0.3230769 0.1400000 0.1300000 0.2250000 0.3541667 -0.1458333
## Greece Italy Italy (1995) Malta Portugal Romania
## 0.2952381 0.4218182 0.4058140 0.1041667 0.2392857 0.1409091
## Russia Serbia Slovenia Spain Turkey Ukraine
## 0.2500000 0.0250000 0.1541667 0.3217949 0.2071429 0.3736111
##
##
## $mean
## 1 10 20 30 40 50 60 70
## 0.1567986 0.5310536 0.6700469 0.6252401 0.5880391 0.5462101 0.4870306 0.4555899
## 80 90 100 110 120
## 0.3895893 0.3345640 0.2873955 0.2560893 0.2202976
##
## $pred
## $pred$x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120
##
## $pred$y
## [1] 0.1567986 0.2040849 0.2509478 0.2969578 0.3416853 0.3847008 0.4255749
## [8] 0.4638781 0.4991808 0.5310536 0.5591705 0.5836188 0.6045895 0.6222735
## [15] 0.6368617 0.6485450 0.6575143 0.6639604 0.6680743 0.6700469 0.6700803
## [22] 0.6684216 0.6653292 0.6610614 0.6558766 0.6500332 0.6437894 0.6374038
## [29] 0.6311345 0.6252401 0.6199231 0.6151633 0.6108849 0.6070119 0.6034686
## [36] 0.6001789 0.5970670 0.5940570 0.5910730 0.5880391 0.5848875 0.5815818
## [43] 0.5780940 0.5743958 0.5704591 0.5662556 0.5617571 0.5569355 0.5517626
## [50] 0.5462101 0.5402733 0.5340410 0.5276258 0.5211398 0.5146955 0.5084051
## [57] 0.5023811 0.4967358 0.4915815 0.4870306 0.4831516 0.4798377 0.4769381
## [64] 0.4743024 0.4717797 0.4692195 0.4664710 0.4633837 0.4598069 0.4555899
## [71] 0.4506266 0.4449890 0.4387934 0.4321565 0.4251947 0.4180245 0.4107624
## [78] 0.4035248 0.3964283 0.3895893 0.3830979 0.3769385 0.3710689 0.3654471
## [85] 0.3600310 0.3547786 0.3496477 0.3445964 0.3395825 0.3345640 0.3295113
## [92] 0.3244450 0.3193982 0.3144039 0.3094953 0.3047055 0.3000675 0.2956144
## [99] 0.2913794 0.2873955 0.2836833 0.2802130 0.2769424 0.2738293 0.2708313
## [106] 0.2679061 0.2650116 0.2621055 0.2591455 0.2560893 0.2529040 0.2495941
## [113] 0.2461736 0.2426563 0.2390561 0.2353868 0.2316625 0.2278969 0.2241040
## [120] 0.2202976
##
##
## $best
## [1] 21
Run DAPC with object
Save it
saveRDS(
dapc_snp1, here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc_snp_country.rds"
)
)
To load it
dapc_snp <- readRDS(
here("scripts", "RMarkdowns",
"output", "euro_global", "dapc", "MAF_1", "europe_all", "dapc_snp_country.rds"
)
)
myCol <- c("#52ef99", "#146c45", "#75d5e1", "#FB8072", "#2c4a5e", "#6a8fe0", "#8c61cd", "#f365e7", "#871550", "#a113b2", "#BF5B17", "#1F78B4", "#cf749b", "#FF7F00","#2524f9", "#799d10", "#a7e831", "#984EA3", "#754819", "#fda547", "#a41415", "#fd5917", "#fd4e8b", "#ead624", "#6A3D9A", "#21a708", "#332288", "#51f310", "#9d8d88", "#66C2A5", "#E41A1C", "#BC80BD", "#E7297A", "darkgray", "orange", "aquamarine3", "magenta", "gold4", "purple")
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_2.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_3.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
dev.off()
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_4.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
dev.off()
good.shapes = c(1:25,35:38,1:25)
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
Check R symbols for plot
#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,35:38)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
scale_shape_manual(values=good.shapes[1:N]) +
geom_point()
## Warning: Removed 698 rows containing missing values or values outside the scale range
## (`geom_point()`).
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_2_region.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:20,35:38,1:25)
myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
dev.off()
good.shapes = c(1:20,35:38,1:25)
myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=2)
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_3_region.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
dev.off()
good.shapes = c(1:25,35:38,1:25)
myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=3)
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_4_region.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
dev.off()
good.shapes = c(1:25,35:38,1:25)
myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=4)
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_5_region.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=5)
dev.off()
good.shapes = c(1:25,35:38,1:25)
myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=5)
pdf(file = "scripts/RMarkdowns/output/euro_global/dapc/MAF_1/europe_all/dapc_snp_country_euro_all_PC1_6_region.pdf", # The directory you want to save the file in
width = 7, # The width of the plot in inches
height = 7) # The height of the plot in inches
good.shapes = c(1:25,35:38,1:25)
myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=6)
dev.off()
good.shapes = c(1:25,35:38,1:25)
myCol2 <- c("#a113b2", "goldenrod", "goldenrod", "#a113b2", "#FF7F00", "goldenrod", "#a113b2", "#a113b2", "magenta", "#a113b2", "#a113b2", "#a113b2", "goldenrod", "goldenrod", "#a113b2", "#a113b2", "goldenrod", "goldenrod")
op <- par(cex = 0.39)
scatter(dapc_snp, pch = good.shapes, cstar = 0, col=myCol2, label=NULL, mstree = FALSE, legend=TRUE, posi.da="bottomleft", cex=1.0, cex.lab=0.5, cex.main=0.5, cellipse=TRUE, posi.leg="topleft", xax =1, yax=6)