Load LEA Libraries

library(LEA)
library(vcfR)
library(RColorBrewer)
library(adegenet)
#library(ape)
library(tidyverse)
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
library(here)
library(dplyr)
library(ggplot2)
library(colorout)
library(extrafont)
#library(scales)
library(stringr)
library(ggtext)

1. LEA for r<0.1 (LD) dataset for Europe_global

1.1 Check and import the data

Check data - we created vcf files with LD pruning r2<0.01 (SNP Set 1) and r2<0.1 (SNP Set 2) and r2<0.01 and MAF>1% (SNP Set 3) after QC

ls euro_global/output/snps_sets/*.vcf
## euro_global/output/snps_sets/neutral.vcf
## euro_global/output/snps_sets/r2_0.01.vcf
## euro_global/output/snps_sets/r2_0.01_b.vcf
## euro_global/output/snps_sets/r2_0.1.vcf
## euro_global/output/snps_sets/r2_0.1_b.vcf

Import the data for SNP Set 2 (r2<0.1)

genotype <- here(
   "euro_global/output/snps_sets/r2_0.1.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 56384
##   column count: 697
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 56384
##   Character matrix gt cols: 697
##   skip: 0
##   nrows: 56384
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant: 56384
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS GEL GES GRA GRC GRV 
##  10  12  12  10  12  12  12  13  10  12  14  12  12  16  12   2  12  11  10  12 
## HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC KAG KAN KAT KER KLP KRA KUN LAM 
##  12   4   7  12   4  11   4   5   9  12   2   6  12  11   6  12   4  12   4   9 
## MAL MAT OKI PAL POL POP QNC RAR REC ROM ROS SER SEV SIC SLO SOC SON SPB SPC SPM 
##  12  12  12  11   2  12  11  12  11   4  11   4  12   9  12  12   3   8   6   5 
## SPS SSK STS SUF SUU TAI TIK TIR TRE TUA TUH UTS YUN 
##   8  12  12   6   6   7  12   4  12   9  12  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   688
##  - number of detected loci:      56384
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   688
##  - number of detected loci:      56384
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.removed file, for more informations.
## 
## 
##  - number of detected individuals:   688
##  - number of detected loci:      56384
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.lfmm"

PCA for SNP Set 2 (r2<0.1)

nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          688
##         -L (number of loci)                 56384
##         -K (number of principal components) 688
##         -x (genotype file)                  /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.lfmm
##         -a (eigenvalue file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.pca/r2_0.1.eigenvalues
##         -e (eigenvector file)               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.pca/r2_0.1.eigenvectors
##         -d (standard deviation file)        /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.pca/r2_0.1.sdev
##         -p (projection file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.pca/r2_0.1.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/ 
## pca result directory:            r2_0.1.pca/ 
## input file:                      r2_0.1.lfmm 
## eigenvalue file:                 r2_0.1.eigenvalues 
## eigenvector file:                r2_0.1.eigenvectors 
## standard deviation file:         r2_0.1.sdev 
## projection file:                 r2_0.1.projections 
## pcaProject file:                   r2_0.1.pcaProject 
## number of individuals:           688 
## number of loci:                  56384 
## number of principal components:  688 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          688
##         -i (input file)                     /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.pca/r2_0.1.eigenvalues
##         -o (output file)                    /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.pca/r2_0.1.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)

Get values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)

Check R symbols for plot

#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,33:127)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
  scale_shape_manual(values=good.shapes[1:N]) +
  geom_point()

sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "sampling_loc_all.csv"
    ))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", 
  "output", "sampling_loc_all.rds"
))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_all.rds"))
head(sampling_loc)
##       Pop_City    Country  Latitude Longitude Continent Abbreviation
## 1  Franceville      Gabon  -1.59207  13.53242    Africa          GAB
## 2 Antananarivo Madagascar -18.87920  47.50790    Africa          ANT
## 3  Diego ville Madagascar -12.27361  49.29372    Africa          DGV
## 4    Morondava Madagascar -20.28420  44.27940    Africa          MAD
## 5     Vohimasy Madagascar -22.81591  47.75026    Africa          VOH
## 6      Dauguet  Mauritius -20.18530  57.52154    Africa          DAU
##          Year         Region    Subregion order
## 1        2015 Central Africa       Africa    72
## 2        2022    East Africa  East Africa    76
## 3        2022    East Africa  East Africa    77
## 4        2016    East Africa  East Africa    78
## 5 2016 & 2017    East Africa  East Africa    79
## 6        2022   Indian Ocean Indian Ocean    80

More sample data

# import sample attributes
samples2 <- read.delim(
  here("scripts", "RMarkdowns", 
    "output", "Population_meta_data.txt"
  ),
  head = TRUE
)

samples2<- samples2 |>
  dplyr::select(
    region, pop
  )

# check head of the file
head(samples2)
##           region pop
## 1 Central Africa GAB
## 2    East Africa ANT
## 3    East Africa MAD
## 4    East Africa DGV
## 5    East Africa VOH
## 6   Indian Ocean DAU

Merge with sampling_loc

merged_loc <- merge(samples2, sampling_loc, by.x = "pop", by.y = "Abbreviation")
head(merged_loc)
##   pop          region           Pop_City    Country  Latitude Longitude
## 1 AIZ       East Asia Aizuwakamatsu City      Japan  37.49240 139.99360
## 2 ALD Southern Europe             Durres    Albania  41.29704  19.50373
## 3 ALU  Eastern Europe            Alushta    Ukraine  44.68289  34.40368
## 4 ALV Southern Europe              Vlore    Albania  40.46600  19.48970
## 5 ANT     East Africa       Antananarivo Madagascar -18.87920  47.50790
## 6 ARM  Eastern Europe             Ijevan    Armenia  40.87971  45.14764
##   Continent Year          Region   Subregion order
## 1      Asia 2008       East Asia                NA
## 2    Europe 2018 Southern Europe East Europe    25
## 3    Europe 2021  Eastern Europe East Europe    35
## 4    Europe 2020 Southern Europe East Europe    24
## 5    Africa 2022     East Africa East Africa    76
## 6    Europe 2020  Eastern Europe East Europe    42

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 73 Levels: ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 73

Check the regions

unique(merged_loc$region)
##  [1] "East Asia"       "Southern Europe" "Eastern Europe"  "East Africa"    
##  [5] "West Africa"     "North America"   "South Asia"      "Caribbean"      
##  [9] "Southeast Asia"  "Indian Ocean"    "Western Europe"  "Central Africa" 
## [13] "South America"   "North Africa"

Merge

merged_data <- merge(pc.coord, merged_loc, by.x = "Population", by.y = "pop")
head(merged_data)
##   Population     PC1     PC2     PC3     PC4      PC5       PC6       PC7
## 1        ALD 15.2256 6.46126 32.1280 3.23808 -49.7953 -10.92670 -0.572034
## 2        ALD 13.7685 5.90307 35.0021 3.82902 -57.4622 -10.96090  2.392940
## 3        ALD 14.1156 5.15973 41.4119 4.86405 -44.7047 -13.55490 -1.567570
## 4        ALD 14.0351 4.63880 37.2412 2.50662 -51.3634  -9.20733  6.339370
## 5        ALD 13.7638 7.77443 29.0741 1.91275 -46.4292  -7.44999  6.612820
## 6        ALD 11.7649 4.68709 36.5414 2.42992 -53.2824 -15.04960  4.084150
##         PC8      PC9    PC10      PC11       PC12     PC13      PC14       PC15
## 1 -14.30210 -2.22894 3.28104 -0.260834  -6.495890  1.12102  1.481720 -1.4254100
## 2 -17.34370 -4.73847 2.72773 -4.790710  -6.313720 10.88700  1.135290 -1.9179500
## 3 -21.48710 -5.15265 3.90904 -5.802810 -11.831500  6.07002 -0.741001  2.3593900
## 4 -12.26110 -7.07453 3.17415  0.719609   4.085840  2.68096  1.818100 -1.0343100
## 5  -7.38579 -4.23032 3.51418 -2.237670   0.521779 -4.26046  1.549950 -0.0944897
## 6 -12.77440 -1.34912 3.70964 -4.860350  -8.905270  4.72987 -1.311110 -4.1860700
##        PC16       PC17    PC18      PC19     PC20     PC21        PC22
## 1 -3.404840  -6.681380 2.26284 -3.934980  9.94505 -2.59977  -8.6026000
## 2  0.850241 -11.734100 4.24561 -3.817430 11.08090 -8.02989  -4.9901200
## 3 -3.358110  -9.281240 4.13695 -6.140290  3.20650 -7.80397   0.0856546
## 4  0.848314   0.500187 1.92693 -0.991450 -4.81889  1.79756  -0.2692420
## 5 -3.502980 -10.684700 1.66666 -0.583166 12.22450  1.32066 -11.0036000
## 6 -0.362751  -7.160270 2.79407 -1.804800  6.41620 -4.73082  -6.8650400
##        PC23      PC24      PC25      PC26      PC27      PC28      PC29
## 1 -0.393056  0.142389 -0.508575  -9.88046 -17.25000   4.67932  0.258118
## 2 -4.072270  3.701780 -1.030180  -9.99636 -13.44080   2.03050 -3.606230
## 3 -0.983792  1.240310 -1.573270 -10.22980 -16.59200   4.56860 -6.373030
## 4  0.865494  3.112280  3.126080  -8.86689   2.55136   7.30868  0.245592
## 5  0.793089 -0.259153 -8.438140  -6.71741 -14.32400 -24.81810 -4.207450
## 6 -0.533011  1.521040 -5.311990 -13.53110 -10.86650   6.41190  7.641670
##       PC30     PC31      PC32      PC33      PC34      PC35     PC36     PC37
## 1 6.032530 12.34690 -1.306060  -3.39428 -8.958020 -1.565250 -6.81837 13.14430
## 2 3.346470 12.51080  2.294700  -1.19521 -4.538200  0.256068 -7.48108 11.47800
## 3 4.385860 17.39720  2.733380  -9.22178 -6.485740  4.242090 -4.12368 15.03180
## 4 0.401121 -5.34980  5.408590   1.90772  0.521117  0.751213 -2.01571 -6.56855
## 5 4.365140 -4.09776  6.179930 -22.42640  1.867930  1.421670 -6.04500 11.81370
## 6 6.832900  4.53687 -0.433561  -6.12641 -8.780930 -1.704770 -1.79774  1.42995
##         PC38     PC39       PC40      PC41       PC42     PC43      PC44
## 1   4.437370 14.26960  -0.773884 -0.763226  -0.384879  4.53096 14.802200
## 2   0.792022 10.56230   3.527150 -4.092060   6.732950  3.39944  6.985610
## 3  -4.418860  7.86127   1.617910 -9.194360   8.817040 -1.81030  1.179980
## 4 -12.985200  9.25693   0.816089 -2.778960  10.981600 -7.97712  0.613751
## 5  -2.263540  2.18789  -1.435380  3.200060  -4.626190 10.26960  9.016560
## 6  15.989700  3.46830 -12.421200  2.007890 -20.046400 -1.68250 15.598900
##       PC45      PC46      PC47       PC48      PC49      PC50     PC51
## 1  8.95944 13.522100 22.170400  9.4360900 -2.945040  -4.63524 15.27060
## 2  7.10364  6.176390 17.104800  1.1119500 -1.499020  -4.81574  1.96486
## 3  3.03013  0.344237  0.809553  0.0702351  0.986384  -8.20198  2.71953
## 4  5.64426 10.530100 -0.936301  0.5337370 -3.987930 -10.01840 -2.18238
## 5 -8.49261  9.256920  9.657540  1.1076700 -0.835895   2.51008  6.75000
## 6 -9.02604  9.009420  6.946320 -8.9872100  5.886990  -1.55644 -6.45180
##        PC52      PC53     PC54       PC55      PC56      PC57       PC58
## 1  0.491098  -4.79928  1.73700 -10.735800 12.205200 -21.28840  -9.170540
## 2 -0.913348  -2.60682  6.27043  -7.785170  5.292690 -10.72860  -9.560770
## 3 -0.709451 -11.31390 12.27440  -1.729650 12.321900  -9.22542 -11.273500
## 4  2.514070  -1.36192 -6.23275  -4.475910 -0.171674  -2.56913   0.962375
## 5  1.430250   1.47281 -0.65328  -5.467850  2.789630 -10.37030   0.178506
## 6 -1.936830  11.55600 -2.18732  -0.172528 -7.732820  -4.36260   6.071840
##         PC59      PC60      PC61      PC62       PC63      PC64      PC65
## 1   0.558107 -14.42350 -2.720380  2.669140  -9.918800  0.468175  -9.58663
## 2  -4.820260  -6.67264 -9.335350 -0.198429 -10.951600 -0.773179  -2.79265
## 3  -2.944490  -9.15708  0.578892  4.932590 -15.587500 -6.857900  -9.70347
## 4  14.683600  10.29440 -4.486420 -0.741527   7.001920  8.943210 -11.48830
## 5   1.023870  -3.04103  8.526610  2.262430  -7.140300 -2.183480  -5.95505
## 6 -12.300800   4.77015 -4.122340  3.441300  -0.637977 -2.148020  50.38250
##       PC66      PC67      PC68     PC69      PC70      PC71       PC72
## 1 -2.58420 -0.707528   3.20538  1.28916  -8.62617   4.75803   0.982763
## 2 -4.68237 -7.628080  10.04620 -2.02789  -4.81989  12.59320   1.944160
## 3 -4.18801  3.127010  11.53110 -1.50220   1.88179   5.00590   0.751111
## 4 -1.46146  4.356940   7.68418 -3.44514 -12.42100  16.30550   7.590790
## 5 -9.59028 -8.745600  -6.94160 -5.41227   1.03817   5.74930  -4.315440
## 6  4.37345 -1.813220 -19.65520 20.37500   8.47092 -13.32410 -36.429000
##         PC73      PC74      PC75      PC76      PC77      PC78      PC79
## 1  -2.218060 -15.72030  2.642970 -3.800860 -7.739280 -3.681390 -0.233420
## 2  -7.196370 -18.20340  7.807600 -6.785510 -4.169990 -2.282600 -2.871490
## 3 -10.343400 -18.68270  4.092350 -3.039770  4.008260 -9.653350 -0.661432
## 4  -1.635540  -2.51278  0.353208  3.140340 -6.819570 -6.359990  5.004960
## 5   0.244696  -8.40124  0.959723 -0.287991  0.817661  0.987632  0.917362
## 6   6.689240   8.66856 -4.062010 -4.625740 24.070400 27.470100 -5.363180
##        PC80       PC81      PC82     PC83       PC84      PC85      PC86
## 1 -8.457150   4.238230  0.284226 -7.43818   3.389350  3.588980 -2.165900
## 2 -1.705140   6.921080 -9.030500 -6.39152   3.472570  6.366870 -0.645937
## 3  3.247060   0.761202  3.098860 -5.12717   2.775320 -0.441982  2.288010
## 4  0.287495   9.662640 -0.948469 -6.76181   0.592148 13.483200  2.898440
## 5 -2.224410  10.164700  1.511980 -8.35391 -11.221100  1.500370  5.899000
## 6 13.818800 -22.609100 -4.698610 -8.70657   7.638730 -9.129480 -3.919290
##        PC87      PC88      PC89       PC90      PC91      PC92       PC93
## 1 -10.13950 -0.816244  2.637640  1.3776600  0.664003 -1.815120   1.028400
## 2 -20.45190 -2.119820  2.393490  1.8052400  7.129360 -4.744720  -0.763207
## 3 -14.74440  3.848620 -0.800062 -3.6824600 13.921400  6.337180  -2.308600
## 4  -7.58353 -2.590720  1.165250  0.1242860 13.625700 -0.817992   5.490910
## 5 -10.41860  3.268430  2.752260  0.0702075  2.144920  0.522905   7.698490
## 6  -1.16883  8.544870  2.945770  2.9031300  3.885100 10.645900 -12.801200
##        PC94      PC95       PC96        PC97       PC98      PC99      PC100
## 1 -1.313660  -3.84598   2.908900  -5.2236200 -0.6353610  -9.08645  0.0143651
## 2  4.477040  -8.25974  -5.574690  -6.7365000  3.3960500   1.89099  0.2139130
## 3  6.370910 -12.22940 -12.203400 -16.7572000  6.5597700  16.19800 -3.2267400
## 4 -0.565558   7.25605   0.919801  -0.0930397 -1.5488300  11.60540 -9.1673800
## 5 -3.914470  -3.02074  -2.898510  -2.4957400  0.0482717 -11.08500  0.2298290
## 6 -3.018100   2.50314  10.226600  -5.7286700 -6.5308400   7.15800 -7.6628900
##      PC101     PC102     PC103      PC104       PC105    PC106      PC107
## 1  1.07133   3.98173 -3.252210  -1.717680  -0.3923890  1.24816  -1.745100
## 2 -3.20132  -1.04195  2.741610  -9.328820   0.0417841 -4.15877  -6.262400
## 3 -6.14705  -7.25484 -3.150010  -0.826528  -8.1842200 -1.88390  -7.674850
## 4  3.11123  -2.33031  7.844560   0.250554   8.6788100 -0.19136 -11.530400
## 5  7.85554 -11.68920 -0.605669 -15.411900 -12.3652000 -1.02618   0.500824
## 6  4.89323   2.52768 11.499600   9.711950   1.5494400  6.71054   5.783070
##      PC108     PC109     PC110     PC111     PC112     PC113    PC114    PC115
## 1 -3.02524  7.668930  -9.20340 -2.680370 11.728700 -2.738550 -4.23346 -7.32950
## 2 -7.51654 -9.780450  -2.64763  2.322780  5.607590  1.928910  3.46397 -5.31802
## 3 -8.86584 -0.808406  -4.31540  2.530450  8.258080 -8.850950  3.81741 -9.67504
## 4 -1.69105  9.377090 -17.90270  0.103756 -7.836550 11.537600  4.05930  5.59087
## 5 -8.16365 -5.208530   3.11312 -7.513210 -0.893163 11.616800 -2.47028 -3.20549
## 6  2.57702 -5.396150  -1.46467  1.333950 21.906200 -0.656812  9.92918  6.62916
##       PC116     PC117    PC118    PC119      PC120    PC121      PC122
## 1   8.41180 11.058500  6.67989  1.88865  -0.432606  8.66429  -5.089880
## 2   0.59735 -2.957340  2.90396  6.49746  -5.256190 -4.13262   6.717650
## 3   7.16965 -6.832780  3.62204  8.14030 -11.674200 -6.20002   1.717490
## 4   2.13644  7.638760  6.57390 -2.40779   9.523130 -1.45848  -0.960588
## 5 -10.54530  2.178960  6.33640  1.27484   0.707920 10.08110 -16.120300
## 6   6.08900  0.943839 -5.06997 -1.30717   7.344950  9.33311  12.082600
##       PC123    PC124     PC125     PC126    PC127    PC128    PC129      PC130
## 1  11.67940  2.14637 -3.450520 -0.674869 -3.01949 3.052020 15.97350  -8.120310
## 2  -2.41782  9.74802  2.167960  3.126920 -4.26479 7.256710  4.17745  -1.201090
## 3  -1.37718  3.03760 -9.648980 -1.442530 -5.67998 7.961680  4.01674   0.536839
## 4  -9.92076 -1.12548 -5.045300  9.174390 21.63250 0.461474 -0.70567   1.585860
## 5 -11.70200  1.53643  0.405001 -0.100278 11.13440 7.322360  4.60827   7.355260
## 6   9.61344 12.68280  1.568950  5.626010  1.69849 2.825770 12.97740 -11.241600
##       PC131     PC132      PC133    PC134      PC135    PC136    PC137
## 1  10.77360  2.952130  8.7015300 -2.42737  -0.582769  3.10618  4.76369
## 2  -2.21396 -2.593830  4.0699800  1.83944  -6.421540  3.45549 -1.54275
## 3   3.26855 -0.112469 -0.0764857 -2.07947 -13.663100  6.12741 -6.32643
## 4 -11.72950  7.815740 -4.2015800  6.19167  -8.332150 13.88750  7.01573
## 5   6.90981  2.368750 -4.9287900  1.34625   3.943220 -7.33402 -3.76745
## 6   2.19118 -3.157010  4.5153700  2.51769   2.475630  1.62286 -4.65162
##       PC138    PC139     PC140    PC141      PC142    PC143      PC144
## 1   3.78628  2.43580 -3.898380 15.94210   0.271981  3.40412   4.385640
## 2  -1.96172 10.27880  1.724300  2.74736 -11.145200 -8.40939  -0.408985
## 3 -11.85200 13.43260  0.191675 -6.99586 -10.731600 -3.43938   8.032100
## 4   7.61277  4.17395  5.126150 -6.57744   4.364710 -1.70990 -11.446700
## 5   3.41059 -6.55824 -9.269600  5.37878   7.505510 16.98780   1.765800
## 6   9.52479  2.93301 -0.312517  5.30163   8.506600 -7.73302  -0.153048
##       PC145     PC146     PC147      PC148     PC149       PC150    PC151
## 1   3.39213 -1.026560   7.01833 -1.0968900  -8.80704  -0.0733263 -2.46216
## 2  -6.72700 -3.507730  11.98460 -7.2196100  -2.80295   0.6421910 -6.50993
## 3   1.57037  6.979750  16.47200 -2.6465900  -3.19842  -4.6261600  0.47703
## 4  -6.37204  2.036010 -11.67280  0.0580746 -10.16150 -10.7923000  3.38652
## 5 -10.21940 -0.431477  10.71800 -3.9486700   3.81882   7.7359800 -6.13244
## 6  -7.02991  1.704610  -4.70034 -0.4173480   8.43710  -8.6166900  8.41247
##        PC152      PC153     PC154     PC155    PC156     PC157     PC158
## 1 -1.3818500 -0.0710997  7.569430  5.360200 -2.02716  3.710420  -6.03236
## 2 -6.8053600  8.8637900  0.218977  4.339880 -1.73716 -0.195435  -1.00440
## 3 -5.8156800 -3.2029200  5.156680  6.502330  4.85750  3.231060   3.45481
## 4 -0.0339514 -0.3514830 -8.126020 -0.765441 -9.24012 -0.911206   6.01267
## 5 -0.6752500 -0.3826510  2.898030 -6.756070  5.65613 10.711500  -1.46959
## 6 -3.4409200 -7.8875400 -1.906630 11.560300  8.24444  1.703990 -15.19740
##      PC159     PC160    PC161     PC162     PC163     PC164    PC165      PC166
## 1  8.57231  4.286060 -6.22192  6.412460 -3.436300  3.124610 -2.38407 -0.0743992
## 2  6.85697 -2.107580 -5.03154  1.970930  0.515066  5.202650 -5.59314  0.9786730
## 3 -1.84488 -0.618533 -4.09814  0.739014  5.391380  6.302800 10.75090  2.6092000
## 4 -6.31869 -1.206350  4.71626  3.385330  3.454830  3.780330 -2.61666 -9.5811300
## 5 -4.75707  9.420320 -7.47857 -6.745560  1.925900 -6.273810  4.48416 -7.5999400
## 6 -3.95660  0.290584  3.61267 -2.875150 -4.373350  0.846221 -2.02132  3.3961300
##      PC167    PC168     PC169    PC170      PC171    PC172    PC173     PC174
## 1 -3.37632 -8.65957  8.410160 -1.28420  -4.957490 -9.86513 -7.99086 -0.902778
## 2 -1.62580 -6.29061  4.311840 -9.20611  -0.261691 -6.60737 -6.71567 -1.582530
## 3 -1.99270 -1.97154 -5.766560 -7.14754   9.291180 -1.47950  2.87915 -8.384660
## 4 -9.68854 -4.86442 -3.395170 -4.30684  -5.989000 -6.77772  1.53557 -2.498750
## 5 -5.66323  4.53276  0.309445  2.92760 -10.052000  2.31203 12.53380  1.830930
## 6  2.03946 -4.51190  4.294950  7.86477  -0.462650  2.75597  3.93608  1.126650
##       PC175     PC176     PC177    PC178     PC179    PC180    PC181     PC182
## 1  -3.76400 -0.535447 -11.07330  1.76248  0.624051 -2.89583 -2.42015  2.514120
## 2  -7.16571 -7.016380   2.08440  6.67386 -5.112840  1.51242 -1.64664  3.097170
## 3  -3.61787  5.369630   5.24604 -3.06494  3.809300 -1.91191  3.39554  9.510150
## 4  -9.15582  5.580740 -11.41580  6.42779  5.493620  3.79652 -9.30633 -4.461860
## 5  -2.85959  2.125850   4.97769  7.12612 -2.058650  7.41671  7.69743 -0.983422
## 6 -15.13650  2.047540  -2.34772  1.04986 -2.748390 -5.47255  1.12465  4.138490
##       PC183     PC184     PC185     PC186       PC187    PC188     PC189
## 1 -5.995520 -11.90190  0.866295 -7.605300   0.7782150 -8.63817   7.25269
## 2 -1.345180  -2.32974 -9.487760 -0.410367  -6.7688300  6.15010 -10.13820
## 3 -0.146179   6.86151 -4.465230 -2.973670  -0.0564168 15.59260  -8.86422
## 4 -8.010760  -2.18257 10.499300  2.970940   2.1763600  2.85204   1.51159
## 5 -7.857970 -10.62900 20.013000  3.828900 -17.6671000  5.95325   9.98656
## 6  1.313790  -5.94180  1.723900  2.865960   0.2075390 -4.55138  -3.59848
##      PC190     PC191      PC192     PC193     PC194    PC195     PC196    PC197
## 1  1.63709 -5.045830 -3.2185700  0.417710  -3.36301  4.92474 10.121400 -5.99975
## 2  2.72385 -2.836020  2.1050300 -2.721190  -5.30938 -3.41458  6.458500 -1.13680
## 3 -2.87082  2.858950  7.6057100 -0.772679   6.37692  1.80985  2.838930  3.87239
## 4 18.64120 -0.809942 -1.6737800  3.432980   4.89215 -2.16179 -3.820910  8.56546
## 5  9.40218 13.214300  6.3401900  0.982574 -13.26430 -7.43342  2.377820 -3.68807
## 6  1.79455  5.003800 -0.0460385 -0.700963   1.55132 -2.21679 -0.427266 -5.59028
##       PC198     PC199    PC200    PC201    PC202     PC203    PC204    PC205
## 1 -8.329010  3.084970  6.06393  1.58537  3.45384  6.476630  4.77097  4.91583
## 2  2.413100 -4.290930  1.50770 -8.43390 -3.40258  1.262370  1.21918  5.12172
## 3  3.328860  0.627403 -9.22015  2.60473 -4.28864  0.765437 -8.21244 -2.89425
## 4 -0.950113  0.750771 11.32810 -6.16751  1.05991 -3.229290  6.46602 -1.44482
## 5  3.329510 -8.639140  9.98636 -5.50307  9.32961  1.189780 -6.69372  1.88490
## 6  1.358300  1.322150  2.11656 -4.30790  4.25234  1.863890 -3.49724 -1.83016
##       PC206    PC207       PC208     PC209     PC210     PC211     PC212
## 1  -3.48199 -5.29132  -0.0542595  2.504250  -1.55845   2.46892  5.970370
## 2  -6.61383 -6.74388  -4.1538400 -1.301270   2.29149   6.68704  4.780920
## 3   3.86647 -6.52522  -9.4877500 -2.383810  -1.46861   3.97522 -0.705368
## 4  -2.74389 -1.18694   5.0295400 -3.094740  -9.47931 -15.12050 -2.059670
## 5 -10.29620  2.54475 -16.3906000 -9.532990 -10.69040  -1.68951 -2.927220
## 6  -6.07409  4.54792   0.4110090  0.404948  -3.72848  -3.42834 -5.187530
##      PC213     PC214     PC215      PC216      PC217    PC218      PC219
## 1 -2.42070   6.48670 -3.973550 -4.7721600   4.660840 -2.60235 -14.653100
## 2 -4.30141  -2.98673 -8.709530  2.0847900 -12.326900 -1.48172 -13.695600
## 3  9.00043   3.57021 -2.005140 -1.7206000   5.750570  2.90068   0.209276
## 4 -2.48908  -8.69769 -4.212810  6.6498400 -11.340100  6.09299   1.147360
## 5  1.05449 -14.73440  8.896820 -2.1816200   0.569268  3.43031  -7.049560
## 6 -6.58644   2.32351  0.870668 -0.0806979  -1.284990  3.12712   4.691680
##      PC220    PC221     PC222     PC223    PC224     PC225     PC226    PC227
## 1  5.32424 -1.81868  3.822470 -0.275304 2.907750  3.842280 -8.259580 -3.42201
## 2 -2.74035  9.94432  0.453123  4.848000 3.556840  1.961970 -4.038620 -4.46039
## 3  4.13684  4.42842 -0.313577  3.765110 4.595700  4.061720  0.162896 10.20030
## 4 -7.18844 -8.60654 -1.823810 -2.210670 0.117209  3.811730 -0.562794 -5.90821
## 5  5.25829  2.68741 -2.751040 10.365400 1.267740  0.663535 -2.298980 -2.41475
## 6  1.30701  3.64990  6.313110 -0.895773 0.706628 -2.402200  2.331180 -3.94659
##      PC228    PC229     PC230     PC231     PC232     PC233     PC234     PC235
## 1 -5.75789  2.82847  -6.05264  3.202620  2.764870  3.180620  0.844975 -1.732440
## 2  2.37753 -1.27006  -6.63598 -0.862593 -4.765190  2.720740 -3.293410  2.199580
## 3 -2.42518 -1.59755   3.92775  5.014570  0.416147 -0.379688 -2.750140 -0.428346
## 4  2.98621  1.76162 -12.24470  0.691565 -4.574500  4.282660  6.595130 10.849900
## 5 -1.38333 -1.52500  -2.46723 -1.924180  4.370950  5.731540 -7.233430 -1.737960
## 6  1.18406 -1.30812   2.80719 -1.418090 -1.958860  5.774980 -1.396080  3.763940
##      PC236      PC237     PC238     PC239      PC240     PC241     PC242
## 1  1.50830  0.0188626 -3.241340 -2.088240  -0.145485  0.442058 -3.695300
## 2 -1.39437 -4.1411800  1.810060  0.452486   3.708480 -1.143840 -1.412840
## 3  2.13987 -1.3277700  0.214955 -2.610540   0.547971 -1.712250  2.423510
## 4  4.37107 -1.8006500  4.993960  4.129340   4.174540  0.943437 -0.612052
## 5  3.19083  4.6464400 -0.664133  1.854380 -10.472100  0.237792  6.324880
## 6 -2.99110  1.9328700  6.188620 -4.508480   7.227320 -5.453790 -3.312970
##       PC243      PC244     PC245     PC246    PC247     PC248     PC249
## 1 -0.633572 -10.757000 10.408100 -2.937290  5.34183 -0.889481  9.070440
## 2  3.575110   0.545916  7.352760 -0.644789  4.83957  1.543980 -1.261820
## 3 -1.275890  -3.528040 -0.899813 -1.176770 -1.89888 -2.911280 -4.947130
## 4  1.588890   1.716130 -6.014590 -7.631690  2.20297  4.488490  0.172609
## 5  3.029960   1.477930 -1.065330 -0.907359 -7.25785 -1.496780 -6.771940
## 6  0.850354  -3.366880 -2.951220 -6.438890  2.56175 -1.645320  1.155130
##       PC250     PC251    PC252     PC253     PC254     PC255    PC256
## 1  5.645780  2.775420  5.36163  3.207840 -5.556680  1.906810  3.29164
## 2  3.980440 -0.792701  3.09064 -1.012240  6.586170  7.407260 -4.00832
## 3 -4.385010  2.395610  3.62373  0.312608 -0.915628 -6.904650 -3.04102
## 4  2.945810 -2.617730 -3.31902  6.399660 -3.338150  8.499030  3.04215
## 5 -5.018150 -6.278210 -1.69965 -4.143880  4.157770  5.141980  5.47467
## 6  0.286806 -1.717780 -7.71334  1.654840  5.777410  0.100914  3.99665
##        PC257     PC258     PC259     PC260      PC261      PC262     PC263
## 1   1.908170 -0.346729 -2.068380 -0.454116  2.8275300 -4.2795000   2.68967
## 2  -2.836050 -5.521990  2.869430  1.831010 -0.8361630  0.6834880   4.36687
## 3   3.341050  1.700230 -1.203630  8.805090 -1.1615000 -0.0520601 -11.08250
## 4  -0.223479 -4.666480 -1.459280 -3.822090  0.0320726  6.3173300   6.13062
## 5 -13.373000  2.784190 -0.888151  3.354070  2.4945800  1.0092500   6.92787
## 6  -1.723720 -5.078500 -0.808831  1.395500 -2.6842600  0.9101180   2.47404
##       PC264     PC265     PC266    PC267     PC268     PC269     PC270
## 1 -3.896970 -7.424880  -2.61903 -1.80304 -1.432350  0.302514  4.886840
## 2  8.623130  9.048980 -12.18990  2.79658 -2.591140  5.860040  2.154690
## 3 -2.877490 -3.986740   3.98942 -1.88482 -0.159315  0.690177  2.619790
## 4 -4.059760  9.016520  -7.47800  3.57368 -9.125700 -3.264310  8.668520
## 5 -0.308475 -8.934990   5.28209  7.24398  0.887045 -3.539820 -1.098510
## 6 -0.105430  0.872292   4.34957 -3.08185  2.435040  1.939330  0.502166
##       PC271     PC272      PC273    PC274      PC275    PC276    PC277
## 1  7.881900  6.471360 -0.2928420 -2.40316  1.5879000  7.22920 -5.93816
## 2 -0.698949  1.328250 -4.8536200 -6.92171 -2.1934300 -3.14084 -4.78602
## 3 -0.594129 -7.201850 -5.2439200  2.54604 -0.0110208 -6.46526  1.95366
## 4  0.632097  2.090430  4.6021000  4.15350 -2.5705000 -1.12302  1.11706
## 5 -1.950900  2.576340  5.5259800 -4.47285  2.6125900  3.35120  4.27327
## 6 -5.470910 -0.504452 -0.0128675 -2.92769 -3.1319900  3.44109 -2.47637
##       PC278    PC279     PC280    PC281     PC282      PC283     PC284
## 1  4.099100 -7.48571 -0.167417 -4.87263  3.045790  4.1147900 -9.146650
## 2 -7.241290 -1.50634  5.497440 -1.74617  0.592121  1.3182200  0.345064
## 3 -2.509380  5.66482 10.094600  3.69168  8.531050 -1.6317600  2.625820
## 4 -9.687600 -6.44805 -8.565540 -7.51890  1.679170  9.6135700 -0.327262
## 5  0.582061  2.59476  3.509210 -5.19840 -5.738880  0.0458189  1.256320
## 6  0.557862  2.20280  1.134320  0.56882 -0.633496 -3.2924300  2.158030
##       PC285     PC286     PC287     PC288     PC289     PC290     PC291
## 1  0.454188  4.174470  0.237223  0.863101 -1.555120  5.878520  5.212820
## 2  4.232350  3.455010  4.458980 -4.171710 -1.518360 -0.932421 -4.366360
## 3  0.999888 -4.805370  7.659070  6.602280  3.226320  3.734250 -1.617640
## 4  0.308426  4.786080  4.799290  3.504770  0.738245 -0.682937  4.971720
## 5 15.276800  1.583850 -1.758480 -7.976630  5.585800 -4.134900 -6.646940
## 6  4.556510  0.369715 -1.702920 -2.950710 -2.221540  7.283740 -0.979947
##        PC292     PC293     PC294     PC295      PC296     PC297     PC298
## 1  3.2347100 -1.511000   8.81482  3.072580  5.3515800 -0.379514 -5.616130
## 2  0.0841457  1.997390   1.07355  2.846150 -1.1478300 -6.357120 -4.880070
## 3 -1.2987100  2.867530 -15.14560  4.212820  0.0752962 -1.180580  3.794550
## 4  0.5199970  3.782840  12.99830 -0.107392 -2.0496400 -3.060520 -7.168100
## 5  4.6260600  8.120040   5.14818 -5.493950  4.1595300  3.369220  6.329190
## 6 -1.6966700  0.487841   1.53241 -1.820870 -2.1325100  4.722500  0.398797
##       PC299    PC300     PC301     PC302    PC303     PC304     PC305    PC306
## 1  1.861230  3.74775 -2.460380 -2.775260  1.41265 -2.971920  2.068410  1.36384
## 2  6.735010 -8.45645  1.775710 -3.849100  3.05681 -9.581560 -5.319420 -8.04805
## 3 -0.694424  1.22018 -7.041780  2.304910 -1.24076 -0.331240  2.131310 -5.59945
## 4  2.861800 -8.04751  0.431195  0.353142  2.29742 -2.193920  6.040010  8.14027
## 5 -2.040470  1.27581  5.150070  0.908976  6.11362  0.956813  4.235160  2.17671
## 6 -1.420090 -3.30534  4.593290 -4.022630 -1.87184 -2.147600 -0.095247  5.30675
##       PC307       PC308     PC309     PC310    PC311     PC312    PC313
## 1  2.666430 -8.91234000  6.413650   4.85648 -1.01690 -12.27880 10.88790
## 2  0.779983 -0.26894600 -2.635360   1.02629  6.27035   1.44025 -1.29713
## 3 -2.735310  2.14209000 -0.543058 -10.04880 -3.71257  -6.62850 -8.11757
## 4  2.625690 -0.00487239  3.818260  -1.98443  3.03673  -7.61543 -1.92937
## 5 -2.550460  5.10258000  9.863550   5.33091 -2.23226   1.08442  7.03003
## 6  3.398430 -3.44865000 -1.505560   4.73401  5.69713   1.02146  1.17421
##      PC314    PC315    PC316    PC317    PC318     PC319    PC320    PC321
## 1  6.27147 -7.87005 -2.19640  3.75274 -2.35320 -0.154202 -4.74634  6.04989
## 2  1.98465 -2.75595  1.02746 -5.13690 -5.21263 -4.834630  2.48781  2.85248
## 3  6.46946  4.07161  5.10265  1.22482 -3.74938  2.692430  3.66822 -5.38208
## 4 -4.51037 -1.79956 -2.52217  4.26051  7.77052 -6.219070 -9.51949  6.37174
## 5  1.34634  2.01760 -1.35013 -1.03651 10.55990 -6.811470 -2.31631 -1.99656
## 6 -1.90534  1.38766  1.74802  3.24850 -3.13560  1.777040  1.93866 -4.47499
##      PC322     PC323     PC324     PC325     PC326    PC327     PC328    PC329
## 1 -9.10108  3.549700 -5.680010   4.48874  1.181740 -3.97591  1.257650  5.58334
## 2 -7.64026  0.230071  0.342849   7.77410 -4.722510 -2.35469 -0.361317 -2.08259
## 3  1.01563 -1.167460  0.789843 -10.34660 -0.977359  7.06295  0.830032 -1.26368
## 4  3.48818  3.681180  4.046320   9.38505  4.554350  0.47823  4.633000 -3.47130
## 5  2.99746 -3.148820 -2.146860  -8.20774 -0.570330 -4.22945  3.846020 -1.66290
## 6  3.75454 -1.264480  0.651504   3.85269  0.445843  1.18858 -4.492820  1.95275
##       PC330     PC331     PC332      PC333     PC334     PC335    PC336
## 1  4.725620  0.689088  3.824080  0.1544140 10.934100  1.960290  4.24537
## 2  4.529400  5.010020  0.113042 -2.6587500  1.395130  0.909263  4.34849
## 3  0.296213 -6.934480 -4.635410  0.0876124 -4.181550  5.729710 -8.11063
## 4  3.086830 -7.170510 -1.702540  0.3362310 -1.141530 -9.095320  8.75012
## 5 11.527500 -0.808551  3.888850 -0.8690900  5.919560  3.828840 -6.15706
## 6 -4.084990  4.078120  0.842066  1.1584300 -0.949974 -2.559420 -4.09158
##      PC337     PC338    PC339     PC340    PC341     PC342     PC343      PC344
## 1  6.19540  1.945560 -1.28862 -6.573360 -4.13726 -8.009540 -5.145880  -6.836020
## 2 -2.72169  3.171410  1.48908 11.547600 -6.78609 -0.150857 -0.738223  -3.725380
## 3 -8.39591  2.084090  1.21955 -5.032480 -2.83132 -0.963568 -4.870210   6.383300
## 4  0.75686 -9.706950 10.48390  0.494418  9.77365  3.238740  5.351960 -10.035500
## 5 12.14440  0.260172 -3.43320  4.804190  8.37955 -1.097120  0.981563   3.782490
## 6  3.22115  0.742774 -1.06034 -3.422170  1.74194 -0.811232 -1.507280   0.945183
##       PC345    PC346    PC347     PC348    PC349      PC350    PC351      PC352
## 1 15.642800  5.91846  7.50982  -2.67570 15.83610 -5.4801600  4.11919 -1.7759500
## 2  0.491086 -2.19816  2.25648 -12.88000 -1.21171  5.9196000  2.78346 -0.7777890
## 3  0.731040  5.25730 -4.80865   8.05605 -6.00373  2.1341400  5.14207  8.5583400
## 4  6.410900 -4.66821  3.41355  -1.43975 10.43480  3.3980600 -9.29563 -5.0634000
## 5 -0.942191  3.12428  2.47403   7.10411  5.81264 -0.4196320 -5.08933 -0.0112671
## 6 -1.250920  2.68590  2.13484  -2.36689  2.07554 -0.0453647 -1.24654  0.7466450
##       PC353      PC354     PC355     PC356    PC357    PC358      PC359
## 1 -6.271610 -3.6575200  -2.02014  5.899610  8.67169 11.37450  4.8255700
## 2  1.483500 -8.3701500   5.59213  2.834920 -5.95256 -1.84332  6.2469700
## 3 -0.886421  3.0260800   1.24496 -1.768100 -2.88226 -1.34889 -5.7470200
## 4  3.287070  0.0440818  -2.25638  2.248700 -3.60181  2.44536  9.8320600
## 5  0.541913 -9.3910500 -11.30110 -0.308222 10.47090 12.93630  5.2654400
## 6  1.377450  2.8188000   1.07941 -4.605620 -3.07809  1.60296  0.0516436
##      PC360    PC361    PC362     PC363    PC364    PC365      PC366     PC367
## 1  4.97528  1.92816  3.26170  -5.13914  8.99605  5.77021 -0.8794830   1.29226
## 2 15.77110 -4.78012  4.94689   1.43904  9.70702  5.01752  3.8053200 -10.43710
## 3  0.52344 -4.92525 -7.49090   7.74957 -2.71925  1.01535 -3.0264500   1.40407
## 4 -1.16671  3.95125  1.05050  -4.05431  3.17385  9.23509 -1.8440800  -1.28007
## 5  3.62068  1.77314  6.63181 -14.03620 -1.75186 -6.48401 -2.4602700   4.02054
## 6 -5.54325 -1.22803 -4.00229   2.64747 -3.82953 -3.51300 -0.0722511  -1.08497
##      PC368      PC369    PC370    PC371    PC372       PC373    PC374     PC375
## 1  5.05176  -2.280400 -1.28818 -1.98936  2.25610 -10.4267000 -8.52831  -4.14355
## 2 -9.02535   3.330870  4.90899 -3.33300 -2.10591   4.8554800 -1.97702   5.05386
## 3  8.59315   3.874120 -2.73169  4.81824 -2.00050   1.2452200  8.20590  -9.49484
## 4 -4.29816  -0.900339  5.13210  2.73667 -5.40409   1.0662300  5.28201  -2.18340
## 5 -2.78301 -10.314800 -1.20401  9.62003  7.94152   4.7686100  2.80937 -11.35100
## 6 -3.88405   1.491120  4.43727 -2.91086 -3.67505   0.0924878  4.06388  -3.60469
##      PC376     PC377    PC378    PC379    PC380     PC381       PC382    PC383
## 1  1.93287 -3.791200 -6.18594 -4.41144  7.48194  22.09360  4.31477000  5.48440
## 2 13.54250  9.281260 -9.10043 10.85510  9.88682   3.00223 -0.00964726  5.88384
## 3 -2.38920 -0.871791  3.70292 -1.97158 -7.64226   4.68540 -5.15182000  2.47221
## 4  6.10118 -4.475620  2.28105  4.29631  4.57648   1.05555 -1.32495000 11.20620
## 5  2.11811  3.496590  9.99898  3.49704 -6.31428 -12.32150  1.98178000 -3.65927
## 6 -2.60094 -3.825030 -3.28517  3.45940 -1.60285  -2.71120  1.30714000  4.17026
##      PC384    PC385     PC386    PC387     PC388    PC389     PC390     PC391
## 1 -5.28089 -5.80310 -10.29710 -2.42917   3.15763 2.783010 -3.184410  2.931380
## 2 10.68720  8.17930   2.16115  6.30984 -11.06560 5.941180 -2.447950  5.736400
## 3 -9.00153 -7.83689  -3.41300  5.76060  -2.89051 3.687680  6.391530 -5.684660
## 4  6.98265  5.46019   7.93793  3.34408  -3.52210 0.375499  5.429060 -0.179978
## 5 -2.23224 -1.23719   2.70152 -7.85225  15.14510 0.535925  2.373610 -6.279400
## 6  2.07067 -0.71867   2.62505  2.06858  -4.47926 3.610480  0.586538 -3.261380
##      PC392    PC393     PC394     PC395    PC396     PC397     PC398     PC399
## 1  1.62689 -1.12392 -5.635980   2.17522 -9.73348 -3.600210   6.35699 -0.612908
## 2  2.18451  9.80940 -4.209880   7.96108 -3.24413  0.656293  -1.18129 -2.038950
## 3  1.33877  2.12779  3.777860  -2.75396  2.24012 -6.166620   5.39365 -4.671400
## 4  1.81005 -1.17723 -1.805410  -9.10131  6.23361  2.355480  -2.08915  1.329180
## 5  1.70402 -1.28832 -0.523205 -15.77560 -1.26890 -9.754110 -10.30850 -0.843023
## 6 -1.26336  5.35840  0.470259  -2.92586  5.59011  5.356410   1.32668 -0.179859
##       PC400     PC401     PC402    PC403     PC404     PC405      PC406
## 1  -5.80481   4.66318  3.906940 -3.77582 10.973500 -13.75900  -9.040080
## 2 -13.22000   7.09513 12.945200 -6.56272  8.047690  -2.30640   4.000400
## 3  -4.02758   4.53056  2.418540  6.50730 -0.782111  -1.88766   1.221530
## 4  13.66800 -10.39270  1.209900  6.06472  5.519690  -2.19619 -10.185100
## 5   5.65307   2.39825 -0.964182  2.68066 -6.448430   6.36762   2.408300
## 6  -3.10339   0.56562 -0.322496  3.43305 -3.003290   1.60209   0.681346
##        PC407     PC408     PC409    PC410     PC411     PC412     PC413
## 1 -10.267300 -15.47170 -12.94810 14.20050  -6.03461 -6.497190 -7.971070
## 2  -5.511570  -1.86883   1.98164  0.48436   1.67525  7.565890  3.094270
## 3  10.794200  -4.99562  -4.91824 -1.54396  10.81960  3.548870  3.991310
## 4  -0.661768  11.45950  -4.36517 -7.25156  -5.90374  3.113560  0.613933
## 5   4.552980  -1.58269   3.86753 11.88360 -14.28830  1.956870  9.093050
## 6   1.607460   3.66859   3.26148 -3.45277   4.53780  0.879847 -0.969275
##       PC414     PC415     PC416     PC417     PC418     PC419     PC420
## 1  6.745260  7.786240  2.539430  7.386510  0.796349 -8.530330   1.31143
## 2 -0.573863  1.438410 10.799800 -4.000170  2.457460 -2.701190 -14.11400
## 3 -4.907460 -0.238353 10.316300  1.994040 -4.105150 -5.300350   3.60273
## 4  1.147570  5.171590 -2.661040 -1.330040 -4.218330  4.052830  -2.29086
## 5  0.644487  2.719560 -2.868220 -8.205280  8.984360  9.502400  11.44930
## 6  0.498546 -0.795544  0.731469  0.284111 -0.973679  0.605435   3.13568
##       PC421    PC422     PC423     PC424     PC425     PC426    PC427     PC428
## 1  5.932930 10.28400   3.00428  3.491630 -1.241720  0.756074 -3.69073  4.940750
## 2  6.267310 -6.59150 -11.83540  1.877340  3.315520 -1.095360 -9.36767 -1.251710
## 3 -1.315130  1.71957   3.83480 -0.542492  1.522560  1.758520 -3.09330  1.202350
## 4 10.180800 -5.05624   2.72519  0.814774  3.640540 -0.199441 -5.78517 11.372400
## 5 -2.604610 -6.54816   7.74349 -5.643030 -6.290190  7.583280  1.61067 -4.844120
## 6  0.461433 -2.24463   2.94083  2.804820  0.806128  2.214190  1.94373 -0.398188
##      PC429     PC430     PC431    PC432     PC433    PC434     PC435
## 1  4.50463  9.871870  0.863604 -6.77771  0.367013  1.44034  1.084970
## 2 -7.48562 -7.558070 -7.248220  3.82590  3.491890 -0.77650 -9.518170
## 3 -9.53804  8.274840  5.279000 -2.58229 -2.891920 -1.22442  8.871370
## 4 -2.46774  7.618920  3.777050  6.55248 -7.665490  8.40682 -0.231761
## 5 -3.63503 -1.699960 -7.177160  8.83438 -4.696010  7.11644  0.771812
## 6 -4.81922 -0.120496 -0.321027 -1.81872  1.108510 -0.46094  0.710387
##         PC436    PC437      PC438      PC439     PC440     PC441    PC442
## 1  1.88670000  7.91132   0.673881  6.1244400 -9.399960 -5.818550 11.57570
## 2  0.00992825 -2.80371  -4.358480 -6.3011400  7.145170  6.002420 -6.78048
## 3 -6.09916000 -7.93747   2.272930  0.0476828  0.137623 -0.773906  1.89659
## 4 -1.07342000 11.85260   7.378460 -4.0792400  0.583603 -8.627960  5.53160
## 5  0.97579100 -1.60118 -11.032900  4.8823600  3.963900 -0.874974  8.81791
## 6  1.83387000 -3.83216  -2.395240  1.7101300  0.200175 -3.708030 -2.63624
##        PC443     PC444     PC445     PC446     PC447      PC448     PC449
## 1 -4.8587400   2.08689  4.766590  5.933090 -7.266390 -12.715800 -1.758400
## 2  1.3783900 -12.17750 -0.816036  0.797621 -1.594840  -0.147152  0.436357
## 3  6.5496200  -2.87627 -3.995170  4.005200 -2.612150  -2.000990  6.135440
## 4 -8.6033800   8.32401  1.399960 -6.030020 -9.848930   8.411860  0.306252
## 5  2.3463600   6.74193  7.527560 -3.834920  0.196744  -7.705480 -5.067370
## 6  0.0120264   1.95481  2.120640 -0.525637 -3.309470   0.620707  0.632064
##        PC450      PC451    PC452    PC453      PC454    PC455      PC456
## 1 -11.689200  -4.605370  9.33549 -7.24484 -4.4733200  1.07824  8.2099200
## 2 -10.858200 -11.098400  6.48050 -7.65478  5.4590500 -3.44983  7.1966500
## 3  -3.026670  -2.304850  2.88004 -7.15739 -4.8599300 -2.40202 -3.7875900
## 4   0.819696   0.791209 -2.08938  1.07110 -2.8286500 -6.30264  0.4335760
## 5  13.739400   5.115580 -2.93432 -3.58422 -4.0064800  4.25010  1.1255600
## 6   0.293074  -1.371050  5.19409 -3.59776  0.0670635  1.01360  0.0164702
##      PC457     PC458     PC459      PC460    PC461    PC462    PC463    PC464
## 1  9.60658  5.973530   7.29880  10.782200 -5.00370 -1.42603  5.32504 -1.93291
## 2 -4.23453 -1.238000   4.32960 -10.681800 -2.41898  5.43427  2.42697  9.20443
## 3  1.93164 -0.403249  -1.60155  -9.853220  8.67881 -3.66926  2.95450  6.17350
## 4  1.91621 -5.543340 -11.11750   0.815227 -4.21960  2.59885 -2.49242  8.08769
## 5 -5.60659 -4.465390  -2.15803  -0.138856 -7.58051  1.66757  3.00453 -4.46139
## 6 -1.81341  1.627030   3.22371  -2.616790 -2.89281  2.25168  1.46636  6.23832
##        PC465     PC466     PC467    PC468     PC469     PC470     PC471
## 1 -0.8595550 -8.121010  4.350920 -8.19652  -6.18350   4.31932   2.25756
## 2 -1.6582400 -0.759506 -1.826000  1.28070   4.85208 -14.27040  -3.75016
## 3 -3.1546500  3.949060  0.833705  8.84442 -11.63850   3.87160   8.93423
## 4  4.5676100  2.243230  2.695440  4.38762  -2.73565  10.22340   9.48760
## 5  6.4021700  7.530270 -4.207430  5.05809   3.74303   5.56313 -11.32940
## 6  0.0534804 -2.098820  0.879289  7.01229  -1.25283   1.59843   2.22859
##        PC472       PC473     PC474       PC475       PC476     PC477    PC478
## 1   7.251250 -10.5307000 -13.16660  7.57463000   0.8817980  1.615600  3.56190
## 2  -6.001540   5.1884200  -1.68037 -0.00133996 -11.3358000 -0.496895 -5.49822
## 3   0.875364  -7.8378000  -5.09272  3.23684000   1.9439400 -1.600750  7.20952
## 4  -2.734310   1.1012500   1.62950  1.95918000  -0.0215181 -6.678140 11.79640
## 5 -10.640100  10.7421000   8.44604  5.37840000  -7.9373300 -5.275570 -6.24019
## 6  -4.977770  -0.0474631   2.18063  2.81963000   0.4887730 -1.164540 -1.01950
##       PC479     PC480     PC481    PC482     PC483     PC484    PC485
## 1 -2.480880 -3.143040  4.275180  7.38254 -8.756690 -6.248620 12.62860
## 2  2.959950  7.075270 10.358600 -1.92343  9.125270 -3.130580  1.12998
## 3  0.737442  4.028600 -2.381780 -2.44927  2.394750  6.570870  1.18524
## 4  7.204280 -9.550180 -1.384020  2.98877 -7.534780 12.913600  2.40848
## 5  2.790070  1.301020 -9.736830 -9.14713  1.611010  1.028350  3.16043
## 6 -0.139831  0.870771  0.122987  2.90008 -0.932203  0.602586 -2.58757
##         PC486     PC487     PC488      PC489     PC490    PC491    PC492
## 1   8.3506700 -4.273670 -2.285900  14.299500 -0.645002 1.525670 10.16080
## 2 -13.7747000  5.280480 -0.302640 -13.785400  0.930290 0.357322  1.94235
## 3  -2.2216700 -1.484910 -0.478184  -0.936645 -2.457360 1.994800 -2.99770
## 4   0.0860032 -2.037850 -1.847600  -6.067140  8.656050 3.329810 -7.81211
## 5  -1.7489000 10.983400  1.928100   0.709953 -6.431470 1.777930  8.80344
## 6  -1.6356200  0.587272 -1.084890  -2.117870 -2.745180 2.366730  1.15418
##      PC493     PC494     PC495     PC496     PC497    PC498     PC499     PC500
## 1  7.37287  0.176354  4.677560   1.84540 -3.119430 -4.82422 -4.018130 -2.041050
## 2 -5.93754  1.039890  3.069200   3.23511  6.167540 -2.54592 -7.767320  2.111700
## 3 -0.45086  2.726000  8.473720 -11.14640 -9.234550  6.74055 -1.121020 -8.940670
## 4 -3.17611 -3.790700  0.453662  -9.07701 -8.398090 12.10130  0.889802  8.020890
## 5 -1.76968 -6.958450 -0.673026   2.90539  2.844770 -3.53862 -8.190720  7.723010
## 6 -1.16890  3.543110  2.554090   2.65964 -0.569137 -1.28228  2.266770 -0.276759
##        PC501    PC502      PC503    PC504     PC505      PC506    PC507
## 1 -11.466600  4.61609  1.5441200 -2.42249  0.508865 -10.256700 -6.23454
## 2   3.524180 -1.77061  1.4710500 -2.67078 -2.123460  18.583800  2.30523
## 3  -5.712750  4.13302  1.1619900 -7.85439 -3.684590  -1.116720  4.81702
## 4   3.111240  4.54086 -4.0525000  2.62071  2.244780  -7.115510  2.93446
## 5  -1.523200  6.64615 -0.0821228 -2.74096  5.005300  -2.443910  2.80938
## 6   0.116642 -1.79876  2.2429900  1.43504  0.820537  -0.538336  2.06717
##       PC508      PC509      PC510      PC511    PC512     PC513      PC514
## 1 -11.87630 -10.979200   6.691420   3.878870 -7.65737 -1.319220   6.557230
## 2  -3.44116   9.738840 -10.613500  -4.610280 13.61980  3.491740   0.255563
## 3   9.32026   8.795590   5.135210  -0.590614  5.46324 -0.372639  -1.104600
## 4  15.57590  -0.628516   9.957560 -14.486000 -1.74275  9.748290 -10.349800
## 5  -4.20695   1.157280  -5.326120   2.030270 -4.28111  2.153750  -7.172970
## 6   3.51556   2.089350   0.911302   0.985379 -1.40459  1.728870   4.395040
##      PC515     PC516    PC517    PC518    PC519     PC520     PC521    PC522
## 1 -5.64497  4.889560 -6.77573 -7.80096 -6.49570 -2.299980   6.26671 -1.31589
## 2 11.18980  0.434659 -7.05622  7.41160  6.23250  0.715959 -12.45000  5.41268
## 3  5.78702  3.711290  7.03477 -2.30366 -2.89309 -5.057240  -3.58941 -4.90184
## 4 -7.00253 -3.985060  5.39715  3.04016 -8.84464 -6.509750   2.45062  1.76711
## 5 12.66740 -0.865975  6.40084  4.60174 -1.25770  8.229480   2.34765 -5.37326
## 6  1.73510  0.490490  2.10920 -2.62063  1.46998 -0.332803   1.30543 -1.07806
##       PC523     PC524    PC525    PC526     PC527    PC528    PC529      PC530
## 1  2.945230  1.138730 -5.01213 -4.44029 10.943300 -1.93990 -5.86642   7.822330
## 2 -1.000660  0.874072  7.03955  6.47242 -9.324960  2.96740  5.53623 -17.398500
## 3  3.367260 -3.859840 11.02850  3.50521  1.829570  4.36137  5.49040   1.997790
## 4 -0.970277  2.748140 -9.13475  7.53063 -1.372760 -5.29689  7.10520   2.154380
## 5 -6.823530 -3.007080  7.92358 -9.34217  0.920635  9.77813 -3.83178  -2.551710
## 6 -3.014560 -1.846280 -2.01592  2.11683  1.404330  3.06232  1.12556   0.724992
##        PC531     PC532     PC533    PC534     PC535     PC536     PC537
## 1   2.323650  5.801780 -10.26250 -4.90426 -2.992010 -0.971161  2.581790
## 2   6.846980  3.137600   3.26747 12.85440  5.447260  3.922390 -4.988790
## 3 -12.811800 -3.664390   7.13303 -7.04664 -3.836180 -5.814210 -0.255316
## 4  -3.514190 -4.665410  11.52220 -8.08442 -0.761186 -1.351220 -0.832300
## 5   5.560800  3.848090   4.30847  1.41211  2.398800 -2.827290  5.605320
## 6  -0.771688 -0.464441  -2.79526 -1.20121 -2.890720  1.716190 -0.618047
##        PC538     PC539    PC540     PC541     PC542      PC543     PC544
## 1  -5.959560 -3.501270 -5.19679  0.794489  2.939730   0.300115 -0.372011
## 2   9.850100  1.798040  5.45823  2.245440  6.514940  10.342000  4.987210
## 3 -10.418700  1.877030  9.63294 -0.759066 -7.147520 -14.165300  1.656040
## 4   0.103646  4.092080  2.45996 -1.219830  5.036970  -1.929810 -0.558734
## 5   3.425080  0.461763  6.80273 -1.780450 -0.297549  -2.559580  4.184200
## 6  -2.669820  0.519892 -2.16603  0.222370 -0.555264   1.491080  1.899450
##        PC545    PC546     PC547      PC548     PC549     PC550     PC551
## 1   8.596730  1.90680 -2.768640   3.892500  4.516370  3.753670 -1.818410
## 2  -3.373460 -6.47414 -8.070960  -4.283470 -1.319790 -3.101690  0.845312
## 3 -15.233900  5.33849  7.718630   4.104330 -3.104630 -6.484370 -1.206920
## 4  -4.923260 11.12820  0.777560 -12.153200  1.842120 -3.279480  7.573670
## 5  -0.249055 -2.89930  3.862600  -2.626490  0.115531 -0.304657 -3.409650
## 6  -1.699340 -1.89654 -0.990828  -0.248587 -0.366713  1.449200 -0.740623
##       PC552      PC553     PC554    PC555    PC556     PC557      PC558
## 1  4.049500  -1.495450 -3.624230 -1.34729 -2.43588  1.804550   0.426419
## 2 -2.175330   7.641940 -1.313890  1.87972  2.78012 -3.094950   0.514300
## 3  5.954110 -14.187400  6.590610 -3.21690  1.15203  8.346890 -10.219600
## 4 -6.037380   0.958791  1.392710  5.49540  5.46041 -0.772404   8.587810
## 5  0.192326  -2.167610 -0.655948 -3.30742 -1.00708 -0.423641  -6.919280
## 6 -1.134870   1.823930  1.346370  1.15543 -1.46450 -0.383302   1.604070
##       PC559     PC560     PC561     PC562      PC563     PC564     PC565
## 1 -0.888456  0.257742  1.038540  2.808230  -5.764230 -4.787640  4.876670
## 2  3.561430 -4.110920 -5.611150 -7.440650  10.514700  0.633140 -8.441390
## 3 -2.358930 -2.656010  0.185138 -1.445850 -14.351100 17.148300 11.326400
## 4 -3.561890  3.618090 -1.657060  1.604290  -1.112830  1.703900 -7.477850
## 5  0.117779  1.662100  1.995320 -6.466400  -3.115950  0.748186  7.706960
## 6 -2.316310  1.767560  0.239714  0.876397  -0.618647  1.905450 -0.306901
##       PC566     PC567     PC568    PC569      PC570      PC571     PC572
## 1 -0.887465 -1.858450 -0.991821 -1.28782 -1.4095300  0.9321680  3.216610
## 2  4.588680  3.871030  1.648220  4.39023 -0.0919025  1.7332200 -5.565070
## 3 -2.986610 -8.071860  0.334433 -8.15559  5.5742400  0.0552569  0.404143
## 4 -2.704040  4.158040  4.521030 -2.94330  3.8994000 -1.2519600  1.594030
## 5 -0.855762 -1.864210 -3.602440  1.91566 -2.2370900 -0.7691340 -1.671210
## 6  2.520780 -0.519024  1.647360 -1.43841  2.0766900 -0.7250520  1.769540
##       PC573       PC574     PC575     PC576      PC577    PC578     PC579
## 1  1.496480 -1.60765000  1.835570 -0.311554  1.6758400  1.44635 -2.656710
## 2 -1.825640 -3.48712000 -4.018310 -1.928760  3.8782400 -2.35807 -3.427110
## 3  1.656520  9.01008000  5.986710  8.663310 -1.4363500  2.52865  6.829940
## 4  0.271233 -0.82334800  1.392990  2.936590 -6.7774400  3.61576  1.093880
## 5 -0.623263 -0.00298675 -0.689481 -1.116620  1.4269300 -1.53743  5.411900
## 6  0.337914 -3.25942000  0.141403  1.424630  0.0278279 -3.64204  0.287007
##        PC580     PC581     PC582     PC583    PC584    PC585    PC586     PC587
## 1  0.0364742 -0.673971  0.374328 -2.417630 -3.62892 -2.09375 -2.73483 -0.028330
## 2  2.7331800 -3.874590  1.700810 -0.382224  1.72387  4.34788 -2.56949 -1.864730
## 3  0.8755800  6.514110 -8.071340  2.443990  1.86322 -3.15398 11.21930 -2.462360
## 4 -1.3443800  2.392330 -1.364820  2.654820  2.92796  4.27132  2.94084  1.640400
## 5  2.0050500 -0.616753  2.817470 -1.002230  4.87625 -2.26725  4.54479 -0.719349
## 6  0.8808590 -0.149492 -0.638982  0.355468  1.05417 -1.38352  2.73011 -0.131356
##        PC588     PC589      PC590      PC591     PC592      PC593     PC594
## 1   1.869680  1.046760  3.3797800  0.5383800  0.843932  1.5864500 -0.317305
## 2   3.769710  1.677300 -2.3516500 -3.0843900  1.391900 -1.9263600  0.749139
## 3 -11.209900 -7.578970  2.0437200  6.1070900  1.570600 -3.5635800  4.383560
## 4  -6.151220 -3.194720 -0.4319590  3.1596800  1.248980 -3.8401600  0.225465
## 5  -2.285420 -0.290165  0.3885680  0.0386186  1.339790 -0.5385550  4.759910
## 6  -0.215492  0.478535 -0.0664508 -0.0911585 -0.515249  0.0851132  0.918039
##       PC595     PC596     PC597     PC598     PC599      PC600      PC601
## 1 -0.362545 -2.949700 -0.310311  1.668790 -2.523580 -1.5308200 -0.0377293
## 2  2.672630  1.273240 -1.482300  1.658700  2.208960 -0.5216440 -0.1209870
## 3 -2.354800  1.945270  5.952790 -4.633580  3.818910  3.0710100  0.7772900
## 4 -1.462290  2.769190 -1.792890 -2.236280 -2.578560 -0.5260140 -0.4195160
## 5 -2.123350 -2.003970  2.803530  0.577009  0.158948  2.5971500  0.0807232
## 6 -1.327320 -0.395127  1.411420 -0.992167  2.503670 -0.0913396 -0.1667980
##       PC602      PC603     PC604     PC605     PC606     PC607     PC608
## 1 -0.304739  2.0450900 -1.732020 -0.111057 -2.124030  2.152210 -1.234130
## 2  1.805010  0.4760950  2.756400  2.929340  0.613717 -1.069050 -2.413670
## 3 -1.961670 -3.8785100 -0.403171 -2.773050 -0.821987 -2.087580  3.312900
## 4 -1.197300 -2.5238100  0.364061  0.456234  3.298650 -1.751270  0.102052
## 5 -1.098050 -0.0999618  1.809800 -0.971798  0.701285  0.195923  0.206270
## 6  1.285450 -0.1154760  0.909756 -1.508790 -0.719903  0.878298 -1.775600
##        PC609     PC610     PC611     PC612    PC613     PC614     PC615
## 1  0.0955621  0.760985 -0.276028 -1.924340 -1.25493  1.147160  0.523616
## 2  2.2096100  1.552620  0.702700  1.197280  2.83933 -1.803140  0.294197
## 3 -1.7223200 -3.096790  1.704880 -1.673690  1.30840 -0.975726 -2.455110
## 4  0.4960720 -3.631510 -2.145910  0.706432  1.99772  1.129210 -1.991370
## 5 -1.3036800 -1.539820  1.114480  1.376400 -1.09089 -0.285640  1.223230
## 6  0.2844120 -1.458870 -0.345730  1.663580  1.16857  0.600104  0.706003
##        PC616     PC617     PC618     PC619      PC620    PC621     PC622
## 1  1.8332200  1.562370  1.612280 -1.175330  0.0216826 1.006140  0.929279
## 2 -0.9053970 -2.024050 -1.980180  1.601660 -0.0189202 1.478750 -1.094460
## 3 -0.8383630  1.191360  1.355010  0.787760  0.5707440 0.313855  2.261460
## 4 -0.8475500 -0.421698  1.163030  0.909591 -0.8847680 1.234950  0.501141
## 5  0.0416817  0.737694 -0.367072  0.422090  1.4115900 2.007890  0.573467
## 6  0.8153970  0.423953  0.711886 -0.503651 -1.3974000 0.108223  1.066580
##        PC623     PC624     PC625     PC626      PC627     PC628     PC629
## 1 -0.9999710 -0.228736  0.704697  1.544990  0.0104346  1.365800 -0.202931
## 2  0.8966440  0.772229 -1.372110  0.586026  1.3146800  0.465005 -1.468590
## 3 -0.0854745  1.276980 -1.224610 -0.936766  0.3474380 -0.498030  0.781507
## 4 -0.9593130 -1.268390  2.439610 -1.104860 -0.2633000 -1.856460  0.707273
## 5 -1.0007400 -0.563482 -0.245787  1.427570 -1.7325300  0.187427  1.367160
## 6  1.0558100 -2.575940  0.430193  0.311661  0.0586203  0.996500  0.153715
##       PC630       PC631      PC632     PC633     PC634      PC635     PC636
## 1  0.992440 -0.08316820  0.1058400 -2.081650  0.859041  0.2855410  2.134560
## 2 -1.141770  0.97480700  0.8667930  2.065520  0.871858  0.8486220 -0.593652
## 3  0.709320 -0.00153648 -0.9135030 -0.356353 -1.326770 -0.6266120 -1.961070
## 4  0.532257 -0.17556200  0.3786760  0.878349  1.323040 -0.2336970 -0.631268
## 5 -0.776238 -0.79250300 -0.0519582  1.281300 -2.137560  0.0859874  0.104304
## 6  0.369187  1.09454000  0.3900930 -0.711197 -1.871700  2.4187500  0.847491
##        PC637     PC638     PC639      PC640     PC641     PC642     PC643
## 1 -0.4754800  0.119019  1.084280 -1.0022400 -0.134867  0.237516  0.913197
## 2 -0.0387455  0.773562  0.075502 -0.0604612  1.178640 -1.056790 -0.876658
## 3 -0.5904150 -1.381190 -0.123490 -0.4848100 -0.799971 -0.118841 -0.716481
## 4  0.4127430  0.214456 -0.462740 -0.5274100 -2.357560 -2.198860 -0.220158
## 5 -0.3526710  0.757678 -0.148562  0.3279850  0.465572  0.841716  0.112466
## 6 -1.1454800  0.593687  2.358600 -1.3738800 -1.556790 -0.719843  2.627710
##        PC644      PC645      PC646     PC647      PC648      PC649      PC650
## 1 -0.0298159 -0.0301118  0.7362140  0.423934 -0.7878370 -0.0601018 -0.1994670
## 2 -0.4515030  0.4289420  1.2191200  1.215450  0.4249400  0.1166390 -0.3358370
## 3  0.3272280 -0.2396700 -0.0966545 -0.200439 -0.0777307 -0.3134910 -0.3891740
## 4 -1.0070500  0.4317410  0.3506900  1.975630  0.0405942 -1.4282100  0.0747299
## 5 -1.0477900  0.0991487  0.2981610 -0.493125 -0.2079020 -0.2323220 -1.1393900
## 6  1.1811300 -0.1388410  1.5783100 -1.528600 -0.8320670  0.0684478 -1.5233300
##        PC651     PC652      PC653     PC654     PC655     PC656     PC657
## 1  0.2841670 -0.345722 -0.3721110 -0.176386  0.353736  0.633902  0.486523
## 2  0.4595890  1.527450  1.3729600  0.588080  0.904454  0.113773  0.466722
## 3  0.8857030  0.627657  0.7609450  0.841837  0.349403 -0.138947 -0.989558
## 4 -0.1274200  0.738954 -0.1443750 -0.387653 -0.690265 -0.824392 -0.577596
## 5  0.0882755 -0.301358  0.0866904  0.863115 -0.581216 -1.302260  0.402338
## 6 -2.5241100  0.432479 -2.9113700  0.597232  1.346460  0.132135  3.120740
##       PC658      PC659      PC660     PC661     PC662      PC663      PC664
## 1 -1.412490  0.3808940 -0.5036700 -0.749166 -0.672211  0.1084770 -0.2851890
## 2  0.380635 -0.4648890 -0.2210650 -0.442385 -0.733839  0.4720470  0.6990380
## 3  0.868649  0.2134630 -0.6473910  0.524091  1.228340 -0.5587860 -0.0353689
## 4  0.761675  0.4236070 -0.1206910 -0.388974 -0.635118  0.0567572 -0.4692930
## 5  1.154090  0.0332251  0.0640611 -0.192103  0.126554 -0.5142230 -0.3111330
## 6 -2.914530  5.1726300  2.5903900  6.830200 -2.036000  4.4711400 -0.3952730
##         PC665      PC666      PC667      PC668      PC669      PC670
## 1 -0.00682654 -0.1172970   0.165462 -0.3085690  -0.977771 -0.1157990
## 2 -1.15614000 -0.5562380   0.646590  0.1819240   2.369690 -0.2985400
## 3  0.03809490  0.4095490   0.356780 -0.0517369   1.355550 -0.0613148
## 4  0.49917600  0.2976380  -0.696421 -0.1653140  -1.004470 -0.4682080
## 5 -0.64443300 -0.0728556   0.475144 -0.4606660   0.677836 -0.2047800
## 6  8.66924000  2.1869300 -18.597900 -0.7709110 -38.480100 12.5468000
##         PC671      PC672     PC673     PC674     PC675      PC676      PC677
## 1   0.1865840 -0.2750550  0.306220 -0.221516 -0.102784 -0.2207100 -0.1840940
## 2   0.5650000 -0.3958730  0.135673 -0.266523 -0.204872  0.0231147 -0.4386240
## 3   0.4147100 -0.0288715 -0.388242  0.111694 -1.274040  1.3153700  0.7371570
## 4   0.0534088  0.6366900  0.194803  1.190450 -0.457648  0.2328710 -0.0647057
## 5   0.1784740 -0.2428200 -0.389587 -1.159910  0.863640  0.1489970 -0.5326790
## 6 -11.7122000  2.9120900 -2.295190  1.574010  2.193640  1.2128000 -3.1024400
##        PC678      PC679        PC680     PC681      PC682     PC683     PC684
## 1  0.0640422 -0.2710100 -0.000236821 0.0546136 -0.1848390  0.301509 -0.108393
## 2 -0.3287060  0.0539233  0.394181000 0.0925843 -0.1991700  0.184817  0.223752
## 3 -0.7678080  0.0527573  0.059858600 0.5556500 -0.1576540 -0.341618 -0.128898
## 4  0.7049970  0.1382530  0.299252000 0.2478450  0.0147967 -0.313619  0.155991
## 5  0.3404060 -0.1041340 -0.729199000 0.3316830  0.4132190 -0.332750 -0.111544
## 6 11.3435000 -0.6586940  1.011680000 0.6987820  0.5028950 -0.502816 -0.538584
##       PC685      PC686      PC687       PC688 Individual          region
## 1  0.559724 -0.0521266 -0.2548120 1.60154e-06        801 Southern Europe
## 2 -0.242465  0.3617630 -0.0776254 1.60154e-06        802 Southern Europe
## 3 -0.321339 -0.2971330  0.3965070 1.60154e-06        803 Southern Europe
## 4 -0.165811  0.0432563 -0.0400723 1.60154e-06        804 Southern Europe
## 5  0.147862  0.0761560  0.0152784 1.60154e-06        805 Southern Europe
## 6 -0.490679 -0.5041310  0.0754175 1.60154e-06        806 Southern Europe
##   Pop_City Country Latitude Longitude Continent Year          Region
## 1   Durres Albania 41.29704  19.50373    Europe 2018 Southern Europe
## 2   Durres Albania 41.29704  19.50373    Europe 2018 Southern Europe
## 3   Durres Albania 41.29704  19.50373    Europe 2018 Southern Europe
## 4   Durres Albania 41.29704  19.50373    Europe 2018 Southern Europe
## 5   Durres Albania 41.29704  19.50373    Europe 2018 Southern Europe
## 6   Durres Albania 41.29704  19.50373    Europe 2018 Southern Europe
##     Subregion order
## 1 East Europe    25
## 2 East Europe    25
## 3 East Europe    25
## 4 East Europe    25
## 5 East Europe    25
## 6 East Europe    25

1.2 Create PCA plots

PCS 1 & 2

#save the pca plot
ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "PCA_lea_euro_global_pc1_pc2_r1.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

PC1 and PC3

#save the pca plot
ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "PCA_lea_euro_global_pc1_pc3_r1.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)

1.3 Run LEA for SNP Set 2

(r2<0.1)

We will do 5 repetitions

# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
  genotype,
  K = 1:30,
  project = "new",
  repetitions = 5,
  percentage = 0.25,
  iterations = 500,
  CPU = 10,
  entropy = TRUE
)

If need to combine projects: https://www.rdocumentation.org/packages/LEA/versions/1.4.0/topics/snmf combine.snmfProject(file.snmfProject,toCombine.snmfProject) combine.snmfProject(r2_0.1.snmfProject,new_r2_0.1.snmfProject)

or use project=“continue” instead of “new” when adding remaining repetitions chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://www.bioconductor.org/packages/devel/bioc/manuals/LEA/man/LEA.pdf

project = load.snmfProject("euro_global/output/snps_sets/r2_0.1.snmfProject")

Cross entropy

# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","lea_cross_entropy_euro_global_r1.pdf"), width = 6, height = 4)

# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)

# Close the pdf file
dev.off()
## png 
##   2
plot(project, col = "blue", pch = 19, cex = 1.2)

Summary of project

check with run is best for K=20

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      4      4      4
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      4      4      4
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         4      4      4      4      4      4      4      4
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      4      4      4      4      4      4      4      4
##                       K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy         4      4      4      4      4
## without cross-entropy      0      0      0      0      0
## total                      4      4      4      4      4
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.8608505 0.8402761 0.8296083 0.8234885 0.8202643 0.8181505 0.8164076
## mean 0.8613624 0.8407854 0.8301619 0.8239896 0.8214234 0.8187226 0.8171335
## max  0.8618443 0.8412847 0.8306325 0.8244414 0.8228964 0.8191034 0.8176953
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8140960 0.8126303 0.8109797 0.8099917 0.8081587 0.8072942 0.8072585
## mean 0.8150810 0.8132653 0.8117269 0.8103243 0.8091718 0.8081472 0.8076578
## max  0.8153903 0.8141409 0.8124759 0.8106736 0.8107374 0.8090892 0.8081356
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8064940 0.8059477 0.8055170 0.8051042 0.8045488 0.8046743 0.8039141
## mean 0.8070258 0.8064300 0.8060372 0.8054720 0.8048453 0.8048238 0.8045790
## max  0.8074999 0.8071925 0.8064792 0.8058825 0.8050728 0.8049788 0.8048851
##         K = 22    K = 23    K = 24    K = 25    K = 26    K = 27    K = 28
## min  0.8041131 0.8041347 0.8037093 0.8033090 0.8036510 0.8033662 0.8035527
## mean 0.8044549 0.8046738 0.8040990 0.8039812 0.8042326 0.8039872 0.8041484
## max  0.8050793 0.8050589 0.8043927 0.8044319 0.8049499 0.8048407 0.8046351
##         K = 29    K = 30
## min  0.8039001 0.8038556
## mean 0.8044645 0.8041767
## max  0.8047952 0.8044480
# get the cross-entropy of all runs for K = 20
ce = cross.entropy(project, K = 20)
ce #run  is best for k=20
##          K = 20
## run 1 0.8046743
## run 2 0.8048026
## run 3 0.8048396
## run 4 0.8049788

2. Plots for LEA SNP Set 2 (r2<0.1)

color_palette_20 <-
  c(
    "#FF8C1A",
    "yellow2",
    "#77DD77",
    "#1E90FF",
    "#B22222",
    "#AE8333",
    "#B20CC9",
    "#F49AC2",
    "blue",
    "#008080", 
    "purple4",
    "#FFFF99",
    "#75FAFF",
    "#AE9393",
    "magenta",
    "green4",
    "navy", 
    "green",
    "purple",
    "orangered"
  )

2.1. K=20 Plots

Default plot for K=20

# select the best run for chosen K 
best = which.min(cross.entropy(project, K = 20))
# best is run 1

barchart(project, K = 20, run = best,
        border = NA, space = 0,
        col = color_palette_20,
        xlab = "Individuals",
        ylab = "Ancestry proportions",
        main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
     labels = bp$order, las=1,
     cex.axis = .4)

#pops_inds<-read.delim("/vast/palmer/scratch/caccone/mkc54/microsats/pops_inds.txt")
#pops <- as.factor($pops)
#inds <- as.factor(pops_inds$inds)

2.1.1 Mean admixture by country for K=20

using ggplot

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 20, run = best))

# Create a named vector to map countries to regions

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:20)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_20[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_20)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "LEA_admixture_by_country_euro_global_k20_r1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

Using ggplot2 for individual admixtures

2.1.2 Extract ancestry coefficients for k=20

change to correct matrix

leak20 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.snmf/K20/run1/r2_0.1_r1.20.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak20)
## # A tibble: 6 × 20
##      X1       X2      X3      X4      X5      X6      X7      X8      X9     X10
##   <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 0.467  5.65e-3 2.91e-2 2.08e-2 2.06e-2 2.30e-2 7.48e-2 7.89e-3 5.27e-2 3.98e-2
## 2 0.311  2.01e-2 8.34e-2 1.82e-2 2.75e-2 2.10e-2 1.22e-1 1.00e-4 4.56e-2 1.27e-2
## 3 0.990  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## 4 0.845  9.99e-5 3.00e-4 9.99e-5 6.99e-3 9.99e-5 2.16e-2 6.00e-2 9.99e-5 1.42e-2
## 5 0.878  9.99e-5 9.99e-5 9.99e-5 1.74e-2 4.22e-3 1.23e-2 5.27e-2 2.50e-3 1.17e-2
## 6 0.998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 10 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## #   X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.1.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak20 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak20)

head(leak20)
##    ind pop       X1          X2          X3          X4          X5          X6
## 1 1001 OKI 0.466583 5.64551e-03 2.91126e-02 2.07730e-02 2.06457e-02 2.29631e-02
## 2 1002 OKI 0.310824 2.01057e-02 8.33858e-02 1.82188e-02 2.74903e-02 2.10350e-02
## 3 1003 OKI 0.989852 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05
## 4 1004 OKI 0.844984 9.99280e-05 3.00413e-04 9.99280e-05 6.98909e-03 9.99280e-05
## 5 1005 OKI 0.878223 9.99262e-05 9.99262e-05 9.99262e-05 1.73929e-02 4.21508e-03
## 6 1006 OKI 0.998103 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
##            X7          X8          X9         X10         X11         X12
## 1 7.47906e-02 7.89411e-03 5.27420e-02 3.98423e-02 8.68477e-03 1.01131e-02
## 2 1.22019e-01 9.99730e-05 4.56474e-02 1.26797e-02 2.23758e-02 9.99730e-05
## 3 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05
## 4 2.15622e-02 6.00394e-02 9.99280e-05 1.41907e-02 3.47921e-03 1.23783e-02
## 5 1.22851e-02 5.26514e-02 2.50057e-03 1.16512e-02 4.90798e-03 9.99262e-05
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
##           X13         X14         X15         X16         X17         X18
## 1 9.37350e-02 4.03754e-02 1.32796e-02 2.91868e-02 1.85944e-03 5.56498e-02
## 2 1.60111e-01 5.60752e-02 2.10341e-02 8.74184e-03 1.79302e-02 4.49698e-02
## 3 9.98380e-05 9.98380e-05 8.35050e-03 9.98380e-05 9.98380e-05 9.98380e-05
## 4 1.11324e-02 1.59733e-03 8.88493e-03 9.99280e-05 9.99280e-05 9.99280e-05
## 5 4.65299e-03 9.99262e-05 9.99262e-05 9.99262e-05 5.12692e-04 9.99262e-05
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
##           X19         X20
## 1 1.78205e-03 4.34181e-03
## 2 7.05643e-03 9.99730e-05
## 3 9.98380e-05 9.98380e-05
## 4 1.36630e-02 9.99280e-05
## 5 1.01074e-02 9.99262e-05
## 6 9.98289e-05 9.98289e-05

Rename the columns

# Rename the columns starting from the third one
leak20 <- leak20 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak20)
##    ind pop       v1          v2          v3          v4          v5          v6
## 1 1001 OKI 0.466583 5.64551e-03 2.91126e-02 2.07730e-02 2.06457e-02 2.29631e-02
## 2 1002 OKI 0.310824 2.01057e-02 8.33858e-02 1.82188e-02 2.74903e-02 2.10350e-02
## 3 1003 OKI 0.989852 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05
## 4 1004 OKI 0.844984 9.99280e-05 3.00413e-04 9.99280e-05 6.98909e-03 9.99280e-05
## 5 1005 OKI 0.878223 9.99262e-05 9.99262e-05 9.99262e-05 1.73929e-02 4.21508e-03
## 6 1006 OKI 0.998103 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
##            v7          v8          v9         v10         v11         v12
## 1 7.47906e-02 7.89411e-03 5.27420e-02 3.98423e-02 8.68477e-03 1.01131e-02
## 2 1.22019e-01 9.99730e-05 4.56474e-02 1.26797e-02 2.23758e-02 9.99730e-05
## 3 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05
## 4 2.15622e-02 6.00394e-02 9.99280e-05 1.41907e-02 3.47921e-03 1.23783e-02
## 5 1.22851e-02 5.26514e-02 2.50057e-03 1.16512e-02 4.90798e-03 9.99262e-05
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
##           v13         v14         v15         v16         v17         v18
## 1 9.37350e-02 4.03754e-02 1.32796e-02 2.91868e-02 1.85944e-03 5.56498e-02
## 2 1.60111e-01 5.60752e-02 2.10341e-02 8.74184e-03 1.79302e-02 4.49698e-02
## 3 9.98380e-05 9.98380e-05 8.35050e-03 9.98380e-05 9.98380e-05 9.98380e-05
## 4 1.11324e-02 1.59733e-03 8.88493e-03 9.99280e-05 9.99280e-05 9.99280e-05
## 5 4.65299e-03 9.99262e-05 9.99262e-05 9.99262e-05 5.12692e-04 9.99262e-05
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
##           v19         v20
## 1 1.78205e-03 4.34181e-03
## 2 7.05643e-03 9.99730e-05
## 3 9.98380e-05 9.98380e-05
## 4 1.36630e-02 9.99280e-05
## 5 1.01074e-02 9.99262e-05
## 6 9.98289e-05 9.98289e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

2.1.3 Plot individual admixture for K=20

source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak20 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

color_palette_20 <-
  c(
    "#FF8C1A",
    "#B20CC9",
    "#77DD77",
    "#1E90FF",
    "#B22222",
    "green",
    "yellow2",
    "#F49AC2",
    "blue",
    "#008080", 
    "purple4",
    "#FFFF99",
    "#75FAFF",
    "#AE9393",
    "magenta",
    "green4",
    "navy", 
    "#AE8333",
    "purple",
    "orangered"
  )

# Generate all potential variable names
all_variables <- paste0("v", 1:20)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_20[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=20.\n LEA inference for k20 with 56,384 SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_20) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "lea_k=20_euro_global_r2_1_run1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

2.2 K=5 Plots

Try plotting k=5 (See if the 5 original Asian clusters hold)

Summary of project

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      4      4      4
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      4      4      4
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         4      4      4      4      4      4      4      4
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      4      4      4      4      4      4      4      4
##                       K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy         4      4      4      4      4
## without cross-entropy      0      0      0      0      0
## total                      4      4      4      4      4
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.8608505 0.8402761 0.8296083 0.8234885 0.8202643 0.8181505 0.8164076
## mean 0.8613624 0.8407854 0.8301619 0.8239896 0.8214234 0.8187226 0.8171335
## max  0.8618443 0.8412847 0.8306325 0.8244414 0.8228964 0.8191034 0.8176953
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8140960 0.8126303 0.8109797 0.8099917 0.8081587 0.8072942 0.8072585
## mean 0.8150810 0.8132653 0.8117269 0.8103243 0.8091718 0.8081472 0.8076578
## max  0.8153903 0.8141409 0.8124759 0.8106736 0.8107374 0.8090892 0.8081356
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8064940 0.8059477 0.8055170 0.8051042 0.8045488 0.8046743 0.8039141
## mean 0.8070258 0.8064300 0.8060372 0.8054720 0.8048453 0.8048238 0.8045790
## max  0.8074999 0.8071925 0.8064792 0.8058825 0.8050728 0.8049788 0.8048851
##         K = 22    K = 23    K = 24    K = 25    K = 26    K = 27    K = 28
## min  0.8041131 0.8041347 0.8037093 0.8033090 0.8036510 0.8033662 0.8035527
## mean 0.8044549 0.8046738 0.8040990 0.8039812 0.8042326 0.8039872 0.8041484
## max  0.8050793 0.8050589 0.8043927 0.8044319 0.8049499 0.8048407 0.8046351
##         K = 29    K = 30
## min  0.8039001 0.8038556
## mean 0.8044645 0.8041767
## max  0.8047952 0.8044480
# get the cross-entropy of all runs for K = 5
ce = cross.entropy(project, K = 5)
ce #run 1 is best for k=5
##           K = 5
## run 1 0.8202643
## run 2 0.8228964
## run 3 0.8206233
## run 4 0.8212199
## run 5 0.8221128

choose best run here

leak5 <- read_delim(
  here("euro_global/output/snps_sets/r2_0.1.snmf/K5/run1/r2_0.1_r1.5.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 

head(leak5)
## # A tibble: 6 × 5
##         X1    X2    X3     X4    X5
##      <dbl> <dbl> <dbl>  <dbl> <dbl>
## 1 0.0100   0.432 0.201 0.137  0.220
## 2 0.0200   0.435 0.204 0.121  0.220
## 3 0.0157   0.464 0.175 0.147  0.198
## 4 0.000100 0.452 0.226 0.0883 0.233
## 5 0.000100 0.462 0.199 0.109  0.230
## 6 0.0234   0.457 0.192 0.116  0.211

The fam file

fam_file <- here(
  "euro_global/output/snps_sets/r2_0.1.fam"
)

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak5 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak5)

head(leak5)
##    ind pop         X1       X2       X3        X4       X5
## 1 1001 OKI 0.01004940 0.432031 0.201044 0.1370630 0.219812
## 2 1002 OKI 0.01995590 0.434927 0.203757 0.1211740 0.220186
## 3 1003 OKI 0.01567250 0.464427 0.174829 0.1474360 0.197636
## 4 1004 OKI 0.00009999 0.452051 0.226127 0.0883153 0.233406
## 5 1005 OKI 0.00009999 0.462282 0.199097 0.1087080 0.229812
## 6 1006 OKI 0.02340520 0.457148 0.191711 0.1164300 0.211305

Rename the columns

# Rename the columns starting from the third one
leak5 <- leak5 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak5)
##    ind pop         v1       v2       v3        v4       v5
## 1 1001 OKI 0.01004940 0.432031 0.201044 0.1370630 0.219812
## 2 1002 OKI 0.01995590 0.434927 0.203757 0.1211740 0.220186
## 3 1003 OKI 0.01567250 0.464427 0.174829 0.1474360 0.197636
## 4 1004 OKI 0.00009999 0.452051 0.226127 0.0883153 0.233406
## 5 1005 OKI 0.00009999 0.462282 0.199097 0.1087080 0.229812
## 6 1006 OKI 0.02340520 0.457148 0.191711 0.1164300 0.211305

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

Default plot

color_palette_5 <-
  c(
    "#FF8C1A",
    "#77DD37",
    "#1E90FF",
    "purple3",
    "#FFFF19"
     )

# select the best run for K = 15 clusters
best = which.min(cross.entropy(project, K = 5))
# best is run 4

barchart(project, K = 5, run = best,
        border = NA, space = 0,
        col = color_palette_5,
        xlab = "Individuals",
        ylab = "Ancestry proportions",
        main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
     labels = bp$order, las=1,
     cex.axis = .4)

2.2.1 Mean admixture by country for K=5

color_palette_5 <-
  c(
    "#FF8C1A",
    "#77DD37",
    "#1E90FF",
    "purple3",
    "#FFFF19"
  )

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 5, run = best))


# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns", 
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:5)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_5[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=5.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) +  # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_5)

#   ____________________________________________________________________________
#save the plot                                                       ####
 ggsave(
  here("scripts", "RMarkdowns", 
    "output", "euro_global", "lea", "LEA_bycountry_k=5_euro_global_r1.pdf"
   ),
   width  = 12,
   height = 6,
   units  = "in",
   device = cairo_pdf
 )

2.2.2 Plot individual admixture for K=5

source(
  here("scripts", "RMarkdowns", 
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak5 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 


color_palette_5 <-
  c(
    "#FF8C1A",
    "#77DD37",
    "#1E90FF",
    "purple3",
    "#FFFF19"
     )


# Generate all potential variable names
all_variables <- paste0("v", 1:5)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_5[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 10
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=5.\n LEA inference for k5 with 56,384 SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_5) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "lea_k=5_euro_global_r2_1_run1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

2.3 K=16 Plots

for SNP Set 2 (r2<0.1)

Summary of project check with run is best

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      4      4      4
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      4      4      4
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         4      4      4      4      4      4      4      4
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      4      4      4      4      4      4      4      4
##                       K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy         4      4      4      4      4
## without cross-entropy      0      0      0      0      0
## total                      4      4      4      4      4
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.8608505 0.8402761 0.8296083 0.8234885 0.8202643 0.8181505 0.8164076
## mean 0.8613624 0.8407854 0.8301619 0.8239896 0.8214234 0.8187226 0.8171335
## max  0.8618443 0.8412847 0.8306325 0.8244414 0.8228964 0.8191034 0.8176953
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8140960 0.8126303 0.8109797 0.8099917 0.8081587 0.8072942 0.8072585
## mean 0.8150810 0.8132653 0.8117269 0.8103243 0.8091718 0.8081472 0.8076578
## max  0.8153903 0.8141409 0.8124759 0.8106736 0.8107374 0.8090892 0.8081356
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8064940 0.8059477 0.8055170 0.8051042 0.8045488 0.8046743 0.8039141
## mean 0.8070258 0.8064300 0.8060372 0.8054720 0.8048453 0.8048238 0.8045790
## max  0.8074999 0.8071925 0.8064792 0.8058825 0.8050728 0.8049788 0.8048851
##         K = 22    K = 23    K = 24    K = 25    K = 26    K = 27    K = 28
## min  0.8041131 0.8041347 0.8037093 0.8033090 0.8036510 0.8033662 0.8035527
## mean 0.8044549 0.8046738 0.8040990 0.8039812 0.8042326 0.8039872 0.8041484
## max  0.8050793 0.8050589 0.8043927 0.8044319 0.8049499 0.8048407 0.8046351
##         K = 29    K = 30
## min  0.8039001 0.8038556
## mean 0.8044645 0.8041767
## max  0.8047952 0.8044480
# get the cross-entropy of all runs for K = 16
ce = cross.entropy(project, K = 16)
ce #run 1 is best for k=
##          K = 16
## run 1 0.8059477
## run 2 0.8064777
## run 3 0.8061023
## run 4 0.8071925
color_palette_16 <-
  c(
    "#FF8C1A",
    "#B20CC9",
    "#77DD77",
    "#1E90FF",
    "#B22222",
    "green",
    "yellow2",
    "#F49AC2",
    "blue",
    "#FFFF99",
    "#75FAFF",
    "magenta",
    "green4",
    "navy", 
    "purple",
    "orangered"
  )

2.3.1 Mean admixture by country for K=16

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 16, run = best))

# Create a named vector to map countries to regions

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:20)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_16[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=16.") +
 #scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_16)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "LEA_admixture_by_country_euro_global_k16_r1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

2.3.2 Plot individual admixtures for K=16

Extract ancestry coefficients for k=16 change to correct matrix

leak16 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.snmf/K16/run1/r2_0.1_r1.16.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak16)
## # A tibble: 6 × 16
##         X1      X2      X3      X4      X5    X6      X7      X8      X9     X10
##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1  2.37e-2 8.74e-2 4.86e-2 9.70e-3 2.66e-2 0.488 1.74e-2 1.10e-2 6.44e-2 6.09e-3
## 2  2.38e-2 1.64e-1 2.72e-2 2.06e-2 9.94e-2 0.349 1.34e-2 5.74e-2 5.37e-2 1.28e-2
## 3  9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.994 9.99e-5 3.45e-3 9.99e-5 9.99e-5
## 4  1.56e-2 1.97e-2 9.99e-5 5.05e-2 9.99e-5 0.864 9.99e-5 9.99e-5 4.52e-3 9.99e-5
## 5  1.01e-2 9.05e-3 3.70e-3 4.65e-2 9.99e-5 0.895 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 6  9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## # ℹ 6 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## #   X16 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.1.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak16 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak16)

head(leak16)
##    ind pop          X1          X2          X3          X4          X5       X6
## 1 1001 OKI 2.36683e-02 8.73786e-02 4.85907e-02 9.70212e-03 2.66047e-02 0.487992
## 2 1002 OKI 2.37829e-02 1.63880e-01 2.72121e-02 2.05985e-02 9.93526e-02 0.348696
## 3 1003 OKI 9.98830e-05 9.98830e-05 9.98830e-05 9.98830e-05 9.98830e-05 0.993884
## 4 1004 OKI 1.55944e-02 1.97235e-02 9.99280e-05 5.05453e-02 9.99280e-05 0.864136
## 5 1005 OKI 1.00754e-02 9.05345e-03 3.70392e-03 4.64856e-02 9.99280e-05 0.894758
## 6 1006 OKI 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 0.998502
##            X7          X8          X9         X10         X11         X12
## 1 1.74042e-02 1.09914e-02 6.43721e-02 6.08818e-03 7.88133e-02 5.26037e-02
## 2 1.34422e-02 5.73794e-02 5.37438e-02 1.27509e-02 6.08909e-02 8.98258e-02
## 3 9.98830e-05 3.45188e-03 9.98830e-05 9.98830e-05 9.98830e-05 9.98830e-05
## 4 9.99280e-05 9.99280e-05 4.52242e-03 9.99280e-05 9.81981e-03 1.59261e-02
## 5 9.99280e-05 9.99280e-05 9.99280e-05 9.99280e-05 2.06442e-02 9.99280e-05
## 6 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
##           X13         X14         X15         X16
## 1 9.90955e-03 4.10494e-03 5.79049e-02 1.38716e-02
## 2 3.08931e-03 1.28189e-02 9.42796e-03 3.10916e-03
## 3 9.98830e-05 1.36576e-03 9.98830e-05 9.98830e-05
## 4 1.89333e-02 9.99280e-05 9.99280e-05 9.99280e-05
## 5 1.08838e-02 9.99280e-05 9.99280e-05 3.59637e-03
## 6 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05

Rename the columns

# Rename the columns starting from the third one
leak16 <- leak16 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak16)
##    ind pop          v1          v2          v3          v4          v5       v6
## 1 1001 OKI 2.36683e-02 8.73786e-02 4.85907e-02 9.70212e-03 2.66047e-02 0.487992
## 2 1002 OKI 2.37829e-02 1.63880e-01 2.72121e-02 2.05985e-02 9.93526e-02 0.348696
## 3 1003 OKI 9.98830e-05 9.98830e-05 9.98830e-05 9.98830e-05 9.98830e-05 0.993884
## 4 1004 OKI 1.55944e-02 1.97235e-02 9.99280e-05 5.05453e-02 9.99280e-05 0.864136
## 5 1005 OKI 1.00754e-02 9.05345e-03 3.70392e-03 4.64856e-02 9.99280e-05 0.894758
## 6 1006 OKI 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 0.998502
##            v7          v8          v9         v10         v11         v12
## 1 1.74042e-02 1.09914e-02 6.43721e-02 6.08818e-03 7.88133e-02 5.26037e-02
## 2 1.34422e-02 5.73794e-02 5.37438e-02 1.27509e-02 6.08909e-02 8.98258e-02
## 3 9.98830e-05 3.45188e-03 9.98830e-05 9.98830e-05 9.98830e-05 9.98830e-05
## 4 9.99280e-05 9.99280e-05 4.52242e-03 9.99280e-05 9.81981e-03 1.59261e-02
## 5 9.99280e-05 9.99280e-05 9.99280e-05 9.99280e-05 2.06442e-02 9.99280e-05
## 6 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
##           v13         v14         v15         v16
## 1 9.90955e-03 4.10494e-03 5.79049e-02 1.38716e-02
## 2 3.08931e-03 1.28189e-02 9.42796e-03 3.10916e-03
## 3 9.98830e-05 1.36576e-03 9.98830e-05 9.98830e-05
## 4 1.89333e-02 9.99280e-05 9.99280e-05 9.99280e-05
## 5 1.08838e-02 9.99280e-05 9.99280e-05 3.59637e-03
## 6 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak16 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

color_palette_16 <-
  c(
    "#FF8C1A",
    "#B20CC9",
    "#F49AC2",
    "#1E90FF",
    "#B22222",
    "green",
    "yellow2",
    "#77DD77",
    "blue",
    "#FFFF99",
    "#75FAFF",
    "magenta",
    "green4",
    "navy", 
    "purple",
    "orangered"
  )

# Generate all potential variable names
all_variables <- paste0("v", 1:16)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_16[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=16.\n LEA inference for k16 with 56,384 SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_16) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "lea_k=16_euro_global_r2_1_run1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

2.4 K=22 Plots

for LEA SNP Set 2 (r2<0.1)

Extract ancestry coefficients for k=22

color_palette_22 <-
  c(
    "#FF8C1A",
    "yellow2",
    "#77DD77",
    "#1E90FF",
    "#B22222",
    "chocolate4",
    "#B20CC9",
    "#F49AC2",
    "blue",
    "#008080", 
    "purple4",
    "#FFFF99",
    "#75FAFF",
    "#AE9393",
    "magenta",
    "green4",
    "navy", 
    "green",
    "purple",
    "orangered2",
    "goldenrod3",
    "coral")

Default plot

# select the best run for chosen K 
best = which.min(cross.entropy(project, K = 22))
# best is run 1

barchart(project, K = 22, run = best,
        border = NA, space = 0,
        col = color_palette_22,
        xlab = "Individuals",
        ylab = "Ancestry proportions",
        main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
     labels = bp$order, las=1,
     cex.axis = .4)

2.4.1 Mean admixture by country using ggplot

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 22, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:20)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_22[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=22.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_20)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "LEA_admixture_by_country_euro_global_k22_r1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)
summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      4      4      4
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      4      4      4
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         4      4      4      4      4      4      4      4
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      4      4      4      4      4      4      4      4
##                       K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy         4      4      4      4      4
## without cross-entropy      0      0      0      0      0
## total                      4      4      4      4      4
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.8608505 0.8402761 0.8296083 0.8234885 0.8202643 0.8181505 0.8164076
## mean 0.8613624 0.8407854 0.8301619 0.8239896 0.8214234 0.8187226 0.8171335
## max  0.8618443 0.8412847 0.8306325 0.8244414 0.8228964 0.8191034 0.8176953
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8140960 0.8126303 0.8109797 0.8099917 0.8081587 0.8072942 0.8072585
## mean 0.8150810 0.8132653 0.8117269 0.8103243 0.8091718 0.8081472 0.8076578
## max  0.8153903 0.8141409 0.8124759 0.8106736 0.8107374 0.8090892 0.8081356
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8064940 0.8059477 0.8055170 0.8051042 0.8045488 0.8046743 0.8039141
## mean 0.8070258 0.8064300 0.8060372 0.8054720 0.8048453 0.8048238 0.8045790
## max  0.8074999 0.8071925 0.8064792 0.8058825 0.8050728 0.8049788 0.8048851
##         K = 22    K = 23    K = 24    K = 25    K = 26    K = 27    K = 28
## min  0.8041131 0.8041347 0.8037093 0.8033090 0.8036510 0.8033662 0.8035527
## mean 0.8044549 0.8046738 0.8040990 0.8039812 0.8042326 0.8039872 0.8041484
## max  0.8050793 0.8050589 0.8043927 0.8044319 0.8049499 0.8048407 0.8046351
##         K = 29    K = 30
## min  0.8039001 0.8038556
## mean 0.8044645 0.8041767
## max  0.8047952 0.8044480
# get the cross-entropy of all runs for K = 16
ce = cross.entropy(project, K = 22)
ce #run 3 is best for k=22
##          K = 22
## run 1 0.8042983
## run 2 0.8043288
## run 3 0.8041131
## run 4 0.8050793

2.4.2 Plot individual admixtures for k=22

Extract ancestry coefficients for k=22 change to correct matrix

leak22 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.snmf/K22/run3/r2_0.1_r3.22.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak22)
## # A tibble: 6 × 22
##          X1        X2        X3       X4      X5      X6      X7      X8      X9
##       <dbl>     <dbl>     <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 0.00433   0.0240    0.0000999  1.38e-3 2.26e-3 1.12e-2 5.04e-2 1.53e-2 6.20e-2
## 2 0.000100  0.0305    0.0136     5.11e-3 2.66e-2 1.58e-3 4.62e-2 1.00e-2 4.42e-2
## 3 0.0000998 0.0000998 0.0000998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 1.41e-4 9.98e-5
## 4 0.0000999 0.0000999 0.000972   9.99e-5 9.99e-5 4.85e-2 9.99e-5 9.99e-5 9.99e-5
## 5 0.0000999 0.00648   0.0000999  9.99e-5 1.58e-2 3.37e-2 9.99e-5 9.99e-5 9.99e-5
## 6 0.0000998 0.0000998 0.0000998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 13 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## #   X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>,
## #   X21 <dbl>, X22 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.1.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak22 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak22)

head(leak22)
##    ind pop          X1          X2          X3          X4          X5
## 1 1001 OKI 4.32608e-03 2.39781e-02 9.99460e-05 1.37750e-03 2.25550e-03
## 2 1002 OKI 9.99820e-05 3.04932e-02 1.35758e-02 5.11275e-03 2.65567e-02
## 3 1003 OKI 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05
## 4 1004 OKI 9.99010e-05 9.99010e-05 9.72357e-04 9.99010e-05 9.99010e-05
## 5 1005 OKI 9.98740e-05 6.48490e-03 9.98740e-05 9.98740e-05 1.58013e-02
## 6 1006 OKI 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
##            X6          X7          X8          X9         X10         X11
## 1 1.11820e-02 5.03599e-02 1.52698e-02 6.19554e-02 1.79845e-02 8.70910e-02
## 2 1.58466e-03 4.61989e-02 9.99813e-03 4.42420e-02 2.45108e-02 2.75577e-02
## 3 9.98201e-05 9.98201e-05 1.41240e-04 9.98201e-05 9.98201e-05 9.98201e-05
## 4 4.85039e-02 9.99010e-05 9.99010e-05 9.99010e-05 1.15443e-02 9.99010e-05
## 5 3.36562e-02 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05 6.80740e-03
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
##           X12         X13         X14         X15         X16         X17
## 1 6.32903e-02 9.99460e-05 9.99460e-05 9.99460e-05 9.99460e-05 4.03509e-02
## 2 7.93744e-02 3.87164e-03 2.38563e-03 9.99820e-05 8.19030e-02 2.66488e-02
## 3 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05
## 4 8.45930e-03 9.99010e-05 2.28579e-03 2.34025e-02 4.88334e-03 9.99010e-05
## 5 9.98740e-05 9.98740e-05 9.98740e-05 3.36100e-02 9.98740e-05 9.98740e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
##           X18         X19      X20         X21         X22
## 1 1.75520e-02 7.57056e-02 0.484006 4.27157e-02 9.99460e-05
## 2 3.89836e-02 1.49530e-01 0.333537 5.07033e-02 3.03177e-03
## 3 9.98201e-05 9.98201e-05 0.997862 9.98201e-05 9.98201e-05
## 4 9.99010e-05 1.85123e-02 0.840190 2.36356e-02 1.65116e-02
## 5 9.98740e-05 2.18772e-02 0.872565 7.79989e-03 9.98740e-05
## 6 9.98108e-05 9.98108e-05 0.997904 9.98108e-05 9.98108e-05

Rename the columns

# Rename the columns starting from the third one
leak22 <- leak22 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak22)
##    ind pop          v1          v2          v3          v4          v5
## 1 1001 OKI 4.32608e-03 2.39781e-02 9.99460e-05 1.37750e-03 2.25550e-03
## 2 1002 OKI 9.99820e-05 3.04932e-02 1.35758e-02 5.11275e-03 2.65567e-02
## 3 1003 OKI 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05
## 4 1004 OKI 9.99010e-05 9.99010e-05 9.72357e-04 9.99010e-05 9.99010e-05
## 5 1005 OKI 9.98740e-05 6.48490e-03 9.98740e-05 9.98740e-05 1.58013e-02
## 6 1006 OKI 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
##            v6          v7          v8          v9         v10         v11
## 1 1.11820e-02 5.03599e-02 1.52698e-02 6.19554e-02 1.79845e-02 8.70910e-02
## 2 1.58466e-03 4.61989e-02 9.99813e-03 4.42420e-02 2.45108e-02 2.75577e-02
## 3 9.98201e-05 9.98201e-05 1.41240e-04 9.98201e-05 9.98201e-05 9.98201e-05
## 4 4.85039e-02 9.99010e-05 9.99010e-05 9.99010e-05 1.15443e-02 9.99010e-05
## 5 3.36562e-02 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05 6.80740e-03
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
##           v12         v13         v14         v15         v16         v17
## 1 6.32903e-02 9.99460e-05 9.99460e-05 9.99460e-05 9.99460e-05 4.03509e-02
## 2 7.93744e-02 3.87164e-03 2.38563e-03 9.99820e-05 8.19030e-02 2.66488e-02
## 3 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05
## 4 8.45930e-03 9.99010e-05 2.28579e-03 2.34025e-02 4.88334e-03 9.99010e-05
## 5 9.98740e-05 9.98740e-05 9.98740e-05 3.36100e-02 9.98740e-05 9.98740e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
##           v18         v19      v20         v21         v22
## 1 1.75520e-02 7.57056e-02 0.484006 4.27157e-02 9.99460e-05
## 2 3.89836e-02 1.49530e-01 0.333537 5.07033e-02 3.03177e-03
## 3 9.98201e-05 9.98201e-05 0.997862 9.98201e-05 9.98201e-05
## 4 9.99010e-05 1.85123e-02 0.840190 2.36356e-02 1.65116e-02
## 5 9.98740e-05 2.18772e-02 0.872565 7.79989e-03 9.98740e-05
## 6 9.98108e-05 9.98108e-05 0.997904 9.98108e-05 9.98108e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak22 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

color_palette_22 <-
  c(
    "#FF8C1A",
    "yellow2",
    "#77DD77",
    "chocolate4",
    "#B22222",
    "purple",
    "#B20CC9",
    "#F49AC2",
    "blue",
    "#1E90FF",
    "purple4",
    "#FFFF99",
    "#75FAFF",
    "#AE9393",
    "magenta",
    "green4",
    "navy", 
    "green",
    "#008080",
    "goldenrod3",
    "coral",
    "orangered2"
    )

# Generate all potential variable names
all_variables <- paste0("v", 1:22)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_22[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=22.\n LEA inference for k22 with 56,384 SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_22) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "lea_k=22_euro_global_r2_1_run3.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

2.5 Plots for k=23

LEA SNP Set 2 (r2<0.1)

Extract ancestry coefficients for k=23 LEA r2<0.1 change to correct matrix

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      4      4      4
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      4      4      4
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         4      4      4      4      4      4      4      4
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      4      4      4      4      4      4      4      4
##                       K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy         4      4      4      4      4
## without cross-entropy      0      0      0      0      0
## total                      4      4      4      4      4
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.8608505 0.8402761 0.8296083 0.8234885 0.8202643 0.8181505 0.8164076
## mean 0.8613624 0.8407854 0.8301619 0.8239896 0.8214234 0.8187226 0.8171335
## max  0.8618443 0.8412847 0.8306325 0.8244414 0.8228964 0.8191034 0.8176953
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8140960 0.8126303 0.8109797 0.8099917 0.8081587 0.8072942 0.8072585
## mean 0.8150810 0.8132653 0.8117269 0.8103243 0.8091718 0.8081472 0.8076578
## max  0.8153903 0.8141409 0.8124759 0.8106736 0.8107374 0.8090892 0.8081356
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8064940 0.8059477 0.8055170 0.8051042 0.8045488 0.8046743 0.8039141
## mean 0.8070258 0.8064300 0.8060372 0.8054720 0.8048453 0.8048238 0.8045790
## max  0.8074999 0.8071925 0.8064792 0.8058825 0.8050728 0.8049788 0.8048851
##         K = 22    K = 23    K = 24    K = 25    K = 26    K = 27    K = 28
## min  0.8041131 0.8041347 0.8037093 0.8033090 0.8036510 0.8033662 0.8035527
## mean 0.8044549 0.8046738 0.8040990 0.8039812 0.8042326 0.8039872 0.8041484
## max  0.8050793 0.8050589 0.8043927 0.8044319 0.8049499 0.8048407 0.8046351
##         K = 29    K = 30
## min  0.8039001 0.8038556
## mean 0.8044645 0.8041767
## max  0.8047952 0.8044480
# get the cross-entropy of all runs for K = 23
ce = cross.entropy(project, K = 23)
ce #run 1 is best for k=23
##          K = 23
## run 1 0.8041347
## run 2 0.8046587
## run 3 0.8050589
## run 4 0.8048428

2.5.1 Extract ancetry coefficients for K=23

leak23 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.snmf/K23/run1/r2_0.1_r1.23.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak23)
## # A tibble: 6 × 23
##          X1        X2        X3       X4      X5      X6      X7      X8      X9
##       <dbl>     <dbl>     <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 0.0128    0.0167    0.00201    1.00e-4 4.82e-2 2.09e-2 4.73e-2 1.46e-2 7.80e-2
## 2 0.0206    0.0877    0.00711    1.00e-4 1.39e-2 4.46e-2 3.96e-2 4.61e-2 4.25e-2
## 3 0.0000998 0.0000998 0.0000998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## 4 0.00999   0.0000999 0.0000999  1.58e-2 4.36e-2 4.53e-2 9.99e-5 9.99e-5 9.99e-5
## 5 0.0119    0.0000999 0.0000999  9.99e-5 3.63e-2 4.11e-2 1.64e-2 9.99e-5 9.99e-5
## 6 0.0000998 0.0000998 0.0000998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 14 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## #   X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>,
## #   X21 <dbl>, X22 <dbl>, X23 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.1.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak23 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak23)

head(leak23)
##    ind pop          X1          X2          X3          X4          X5
## 1 1001 OKI 1.27639e-02 1.67423e-02 2.00565e-03 9.99550e-05 4.81760e-02
## 2 1002 OKI 2.05636e-02 8.77317e-02 7.10981e-03 9.99820e-05 1.38512e-02
## 3 1003 OKI 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 1004 OKI 9.99118e-03 9.98830e-05 9.98830e-05 1.58279e-02 4.36386e-02
## 5 1005 OKI 1.18692e-02 9.98740e-05 9.98740e-05 9.98740e-05 3.62593e-02
## 6 1006 OKI 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
##            X6          X7          X8          X9         X10         X11
## 1 2.08513e-02 4.72904e-02 1.46362e-02 7.79676e-02 1.24348e-02 1.20796e-02
## 2 4.45759e-02 3.96060e-02 4.60880e-02 4.24541e-02 1.14773e-02 9.93311e-03
## 3 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 4.52657e-02 9.98830e-05 9.98830e-05 9.98830e-05 1.31127e-02 4.39800e-04
## 5 4.11295e-02 1.64199e-02 9.98740e-05 9.98740e-05 1.08600e-02 9.98740e-05
## 6 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
##           X12      X13         X14         X15         X16         X17
## 1 9.99550e-05 0.468524 4.30887e-02 9.99550e-05 6.54457e-02 4.49180e-02
## 2 7.31831e-04 0.316644 3.43821e-02 1.79289e-02 9.94290e-02 1.10625e-02
## 3 9.98021e-05 0.997804 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 9.98830e-05 0.824120 9.98830e-05 9.98830e-05 3.81714e-02 9.98830e-05
## 5 4.26943e-03 0.862749 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05
## 6 9.98019e-05 0.997804 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
##           X18         X19         X20         X21         X22         X23
## 1 9.99550e-05 2.37160e-03 9.99550e-05 1.91372e-02 1.77211e-02 7.33465e-02
## 2 2.15107e-02 1.24573e-02 6.39485e-03 9.99820e-05 1.84864e-02 1.37382e-01
## 3 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 9.98830e-05 9.98830e-05 1.60094e-03 9.98830e-05 9.98830e-05 6.53375e-03
## 5 9.98740e-05 9.98740e-05 3.72111e-03 9.98740e-05 9.98740e-05 1.13240e-02
## 6 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05

Rename the columns

# Rename the columns starting from the third one
leak23 <- leak23 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak23)
##    ind pop          v1          v2          v3          v4          v5
## 1 1001 OKI 1.27639e-02 1.67423e-02 2.00565e-03 9.99550e-05 4.81760e-02
## 2 1002 OKI 2.05636e-02 8.77317e-02 7.10981e-03 9.99820e-05 1.38512e-02
## 3 1003 OKI 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 1004 OKI 9.99118e-03 9.98830e-05 9.98830e-05 1.58279e-02 4.36386e-02
## 5 1005 OKI 1.18692e-02 9.98740e-05 9.98740e-05 9.98740e-05 3.62593e-02
## 6 1006 OKI 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
##            v6          v7          v8          v9         v10         v11
## 1 2.08513e-02 4.72904e-02 1.46362e-02 7.79676e-02 1.24348e-02 1.20796e-02
## 2 4.45759e-02 3.96060e-02 4.60880e-02 4.24541e-02 1.14773e-02 9.93311e-03
## 3 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 4.52657e-02 9.98830e-05 9.98830e-05 9.98830e-05 1.31127e-02 4.39800e-04
## 5 4.11295e-02 1.64199e-02 9.98740e-05 9.98740e-05 1.08600e-02 9.98740e-05
## 6 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
##           v12      v13         v14         v15         v16         v17
## 1 9.99550e-05 0.468524 4.30887e-02 9.99550e-05 6.54457e-02 4.49180e-02
## 2 7.31831e-04 0.316644 3.43821e-02 1.79289e-02 9.94290e-02 1.10625e-02
## 3 9.98021e-05 0.997804 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 9.98830e-05 0.824120 9.98830e-05 9.98830e-05 3.81714e-02 9.98830e-05
## 5 4.26943e-03 0.862749 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05
## 6 9.98019e-05 0.997804 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
##           v18         v19         v20         v21         v22         v23
## 1 9.99550e-05 2.37160e-03 9.99550e-05 1.91372e-02 1.77211e-02 7.33465e-02
## 2 2.15107e-02 1.24573e-02 6.39485e-03 9.99820e-05 1.84864e-02 1.37382e-01
## 3 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 9.98830e-05 9.98830e-05 1.60094e-03 9.98830e-05 9.98830e-05 6.53375e-03
## 5 9.98740e-05 9.98740e-05 3.72111e-03 9.98740e-05 9.98740e-05 1.13240e-02
## 6 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

2.5.2 Plot individual admixtures for K=23

source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak23 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

color_palette_23 <-
  c(
    "purple4",    
    "blue",
    "green4",     
    "#FF8C1A",
    "green",    
    "#AE9393",
    "#1E90FF",    
    "yellow2",
    "#008080", 
    "purple",   
    "#FFFF99",
    "magenta",
    "navy", 
    "orangered",
    "#B20CC9",      
    "orchid1",
    "coral",
    "#75FAFF",
    "#B22222",
    "goldenrod",    
    "#F49AC2",
    "#77DD77",
    "chocolate4"        
  )
  

# Generate all potential variable names
all_variables <- paste0("v", 1:23)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_23[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=23.\n LEA inference for k23 with 56,384 SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_23) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "lea_k=23_euro_global_r2_1_run1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

3 LEA for SNP Set 1 (r<0.01)) dataset for Europe_global

3.1 Check and import the data

Check data - we created vcf file with LD pruning r2<0.01 (Set 1): r2_0.01.vcf

ls euro_global/output/snps_sets/*.vcf
## euro_global/output/snps_sets/neutral.vcf
## euro_global/output/snps_sets/r2_0.01.vcf
## euro_global/output/snps_sets/r2_0.01_b.vcf
## euro_global/output/snps_sets/r2_0.1.vcf
## euro_global/output/snps_sets/r2_0.1_b.vcf

Import the data for r2<0.01

genotype <- here(
   "euro_global/output/snps_sets/r2_0.01.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 19318
##   column count: 697
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 19318
##   Character matrix gt cols: 697
##   skip: 0
##   nrows: 19318
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant: 19318
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS GEL GES GRA GRC GRV 
##  10  12  12  10  12  12  12  13  10  12  14  12  12  16  12   2  12  11  10  12 
## HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC KAG KAN KAT KER KLP KRA KUN LAM 
##  12   4   7  12   4  11   4   5   9  12   2   6  12  11   6  12   4  12   4   9 
## MAL MAT OKI PAL POL POP QNC RAR REC ROM ROS SER SEV SIC SLO SOC SON SPB SPC SPM 
##  12  12  12  11   2  12  11  12  11   4  11   4  12   9  12  12   3   8   6   5 
## SPS SSK STS SUF SUU TAI TIK TIR TRE TUA TUH UTS YUN 
##   8  12  12   6   6   7  12   4  12   9  12  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   688
##  - number of detected loci:      19318
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   688
##  - number of detected loci:      19318
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.removed file, for more informations.
## 
## 
##  - number of detected individuals:   688
##  - number of detected loci:      19318
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.lfmm"

PCA for SNP Set 1

nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          688
##         -L (number of loci)                 19318
##         -K (number of principal components) 688
##         -x (genotype file)                  /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.lfmm
##         -a (eigenvalue file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.pca/r2_0.01.eigenvalues
##         -e (eigenvector file)               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.pca/r2_0.01.eigenvectors
##         -d (standard deviation file)        /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.pca/r2_0.01.sdev
##         -p (projection file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.pca/r2_0.01.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/ 
## pca result directory:            r2_0.01.pca/ 
## input file:                      r2_0.01.lfmm 
## eigenvalue file:                 r2_0.01.eigenvalues 
## eigenvector file:                r2_0.01.eigenvectors 
## standard deviation file:         r2_0.01.sdev 
## projection file:                 r2_0.01.projections 
## pcaProject file:                   r2_0.01.pcaProject 
## number of individuals:           688 
## number of loci:                  19318 
## number of principal components:  688 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          688
##         -i (input file)                     /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.pca/r2_0.01.eigenvalues
##         -o (output file)                    /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.pca/r2_0.01.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)

Get values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)

Check R symbols for plot

#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,33:127)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
  scale_shape_manual(values=good.shapes[1:N]) +
  geom_point()

sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "sampling_loc_all.csv"
    ))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", 
  "output", "sampling_loc_all.rds"
))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_all.rds"))
head(sampling_loc)
##       Pop_City    Country  Latitude Longitude Continent Abbreviation
## 1  Franceville      Gabon  -1.59207  13.53242    Africa          GAB
## 2 Antananarivo Madagascar -18.87920  47.50790    Africa          ANT
## 3  Diego ville Madagascar -12.27361  49.29372    Africa          DGV
## 4    Morondava Madagascar -20.28420  44.27940    Africa          MAD
## 5     Vohimasy Madagascar -22.81591  47.75026    Africa          VOH
## 6      Dauguet  Mauritius -20.18530  57.52154    Africa          DAU
##          Year         Region    Subregion order
## 1        2015 Central Africa       Africa    72
## 2        2022    East Africa  East Africa    76
## 3        2022    East Africa  East Africa    77
## 4        2016    East Africa  East Africa    78
## 5 2016 & 2017    East Africa  East Africa    79
## 6        2022   Indian Ocean Indian Ocean    80

More sample data

# import sample attributes
samples2 <- read.delim(
  here("scripts", "RMarkdowns", 
    "output", "Population_meta_data.txt"
  ),
  head = TRUE
)

samples2<- samples2 |>
  dplyr::select(
    region, pop
  )

# check head of the file
head(samples2)
##           region pop
## 1 Central Africa GAB
## 2    East Africa ANT
## 3    East Africa MAD
## 4    East Africa DGV
## 5    East Africa VOH
## 6   Indian Ocean DAU

Merge with sampling_loc

merged_loc <- merge(samples2, sampling_loc, by.x = "pop", by.y = "Abbreviation")
head(merged_loc)
##   pop          region           Pop_City    Country  Latitude Longitude
## 1 AIZ       East Asia Aizuwakamatsu City      Japan  37.49240 139.99360
## 2 ALD Southern Europe             Durres    Albania  41.29704  19.50373
## 3 ALU  Eastern Europe            Alushta    Ukraine  44.68289  34.40368
## 4 ALV Southern Europe              Vlore    Albania  40.46600  19.48970
## 5 ANT     East Africa       Antananarivo Madagascar -18.87920  47.50790
## 6 ARM  Eastern Europe             Ijevan    Armenia  40.87971  45.14764
##   Continent Year          Region   Subregion order
## 1      Asia 2008       East Asia                NA
## 2    Europe 2018 Southern Europe East Europe    25
## 3    Europe 2021  Eastern Europe East Europe    35
## 4    Europe 2020 Southern Europe East Europe    24
## 5    Africa 2022     East Africa East Africa    76
## 6    Europe 2020  Eastern Europe East Europe    42

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 73 Levels: ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 73

Check the regions

unique(merged_loc$region)
##  [1] "East Asia"       "Southern Europe" "Eastern Europe"  "East Africa"    
##  [5] "West Africa"     "North America"   "South Asia"      "Caribbean"      
##  [9] "Southeast Asia"  "Indian Ocean"    "Western Europe"  "Central Africa" 
## [13] "South America"   "North Africa"

Merge

merged_data <- merge(pc.coord, merged_loc, by.x = "Population", by.y = "pop")
head(merged_data)
##   Population     PC1     PC2     PC3      PC4      PC5      PC6      PC7
## 1        ALD 11.3046 3.85130 19.7833 1.687230 -31.9742 -7.03582  2.87450
## 2        ALD 11.3286 4.09234 21.6678 2.240930 -35.6509 -6.67780  1.30074
## 3        ALD 10.9877 2.96010 27.1215 1.722600 -27.0213 -8.93229  4.56509
## 4        ALD 12.5399 3.70106 23.5571 1.923170 -32.8265 -4.32701 -0.54051
## 5        ALD 10.6210 6.13863 18.5964 0.106578 -28.5055 -4.24365 -2.14243
## 6        ALD 10.3700 3.24124 23.0999 0.624742 -34.5679 -8.77115 -1.64540
##        PC8       PC9       PC10      PC11      PC12     PC13       PC14
## 1  8.76051 -2.390600 -1.1133700  1.409550 -2.432570  2.21439  0.0831294
## 2 10.53680  0.572231 -0.0447769  5.148600  1.885100  6.72702  0.3359080
## 3 12.34000  2.083830 -2.5534900  5.418010 -3.295510  6.81957 -2.2182600
## 4  8.67068  3.930570 -1.8172500 -0.958173  3.130710  1.46826  3.9380100
## 5  6.97025  1.488870 -2.2270600  2.202370 -1.782360 -3.09520 -2.3885500
## 6  8.84329 -1.938790 -1.5921900  6.365010  0.288914  5.53801 -2.6206200
##        PC15     PC16      PC17      PC18      PC19     PC20      PC21     PC22
## 1  1.591980 -3.89981  0.846729 -1.512010  1.275050  5.20659  0.355472  6.59573
## 2  2.506320 -1.82679  4.219520 -2.663210  0.722739  6.76211  4.321160  5.56285
## 3 -0.176634 -3.29138  3.994920 -3.303390 -3.444090  4.72517  2.293660  1.81743
## 4  1.702520  1.34122 -0.943790  0.525896 -0.857543 -2.31975 -2.554890 -1.97683
## 5 -0.245045 -4.12400  2.278240 -4.605470  4.697470  1.25214  3.550810  7.89109
## 6  3.673660 -1.73236  1.017960 -2.620330 -0.748756  4.38833 -0.176724  5.28283
##         PC23      PC24      PC25      PC26      PC27     PC28     PC29     PC30
## 1  3.2310000 -3.782110  0.343321  3.452750 -0.397645  8.59238 -6.38829  2.67176
## 2  0.1545930 -0.973830  0.824802  5.002070 -1.803110  7.84339 -2.56195  2.51685
## 3 -0.3103470  0.104312  0.789867  5.413510 -3.317490 11.40880 -3.64877  3.30967
## 4  0.0302859 -0.871619  2.124920  5.503500  0.533652 -1.33150 -3.21115  2.07990
## 5  5.7531100  2.564260 -1.794590 -0.455631 -0.109703 12.61800  7.14574 -8.21520
## 6  2.0990500 -0.298802 -0.988705  7.396710  0.401363  5.97424 -9.19427 -3.69260
##       PC31      PC32      PC33    PC34     PC35     PC36     PC37     PC38
## 1  6.86863 -1.089500  -4.44339 6.67840 -3.42491 -4.82034  8.93957 -4.88268
## 2  7.74545  2.853570  -3.56519 4.42324 -1.18201 -3.85741  4.39299 -6.69470
## 3  8.60304  6.357500  -7.46373 4.15771 -4.83301 -2.46457  7.54084 -8.38108
## 4 -4.69375 -0.207922   2.10898 2.65389  1.70503  5.73929 -4.27603 -3.66764
## 5 -6.60411  4.734720 -12.04430 4.07524 -3.22002 -2.26138  4.88504 -6.55756
## 6  4.72237 -0.926283  -3.27779 5.47676 -1.57894 -8.10418  7.07028  7.59312
##       PC39     PC40      PC41      PC42      PC43     PC44     PC45       PC46
## 1 -6.78503 -4.42121   3.59483 -2.665010 -3.630720 15.46030 -3.96144 -6.4089000
## 2 -7.26412 -5.50867   7.42830 -2.154760 -0.773832  7.63320 -1.18746 -6.5392000
## 3 -5.62695 -5.01390   8.26299  1.203870  0.940659  1.56725  1.96505  0.5932230
## 4 -8.27641 -2.11220   7.04648 -2.259360  3.718930  1.64267 -7.67578 -0.5220810
## 5 -2.90173  0.32610  -3.06623 -0.994856 -8.462040  9.02437  1.05418  0.0444795
## 6  2.13255  8.45184 -12.27810  3.350630  0.377129 13.13020 -2.84936 -3.6098600
##       PC47      PC48      PC49      PC50     PC51       PC52      PC53
## 1 -3.71097 -1.082220 -2.766790  7.672670 12.30180 -3.1054600  1.924130
## 2 -2.01765  1.189410 -5.822020  4.647830  4.41900  0.0153929 -0.433726
## 3  3.39743  0.471181 -5.686080  0.208139  1.09238  0.1296940  7.671860
## 4  7.34806  0.639311 -7.172720 -1.552730  1.99029 -1.8731700 -3.060680
## 5 -5.90926  4.810760  1.229390  1.441610  6.24433 -0.0726317  0.420500
## 6 -4.74439  8.944300 -0.487939 -3.048420  2.38544  0.9313740 -7.630770
##        PC54      PC55      PC56      PC57       PC58      PC59     PC60
## 1 -0.683836  3.345480 17.363200 -1.485730 -0.0259309   1.56506  1.23083
## 2 -4.033930  2.998030 13.485500 -4.554240  2.5985500   3.97646 -2.31647
## 3 -3.867050 -0.788444 11.591500 -0.836219  2.9079800   4.70301  1.73630
## 4  4.549290  1.211220 -0.138582 -1.870110 -0.9434830 -12.62790 -5.61055
## 5  0.181434  4.545250  3.638140 -0.995323 -2.4282200  -3.61029  8.37504
## 6 -5.363230 -2.720780 -2.805080  5.570260 -0.5126940   8.06162  3.68012
##        PC61      PC62      PC63      PC64      PC65      PC66      PC67
## 1  12.36460   8.14968  4.424100  0.675101 -4.946100  0.666866 -0.539953
## 2   7.21885   5.87628  2.239920 -5.329600 -4.809330  0.698679 -3.867710
## 3  10.40760   5.37401  5.918490 -2.324100 -5.898300 -4.353950 -3.038200
## 4   3.67099   2.43420  0.184676  2.806220 -6.231890 -3.045850  1.087490
## 5   4.05643   1.76671  6.028750  1.079830 -0.638482 -0.808931 -8.573490
## 6 -12.93130 -12.34680 -1.453650 -9.402500 25.534300  2.088730  3.960290
##        PC68     PC69      PC70      PC71      PC72        PC73     PC74
## 1  3.626120  3.15212  3.178980 -0.322655   8.34466  -0.0736454  3.20536
## 2  6.192960 -1.63845  5.300660 -6.972940  10.38190  -3.4281300  2.21707
## 3  0.790238 -1.59328 10.640200 -4.497410   7.29813  -3.0353400  5.61416
## 4 -0.308131 -3.43526 -4.851840 -4.036010   8.68017  -4.5135500 -4.00738
## 5  4.745270  6.12391  0.487623 -5.248270   2.22806   4.2850400 -1.88139
## 6  5.207100  4.08905 -9.177160 14.790700 -10.49410 -14.7545000 -1.28016
##        PC75       PC76      PC77      PC78       PC79      PC80      PC81
## 1  1.560740 -1.1318000  3.079280 -2.294070   0.330213 -0.849971   0.23262
## 2  2.419960 -3.5366100 -2.952500 -0.300429   1.180380 -3.288850   2.46454
## 3  4.474900 -3.2787500  2.369970 -4.133800   6.488080 -0.821653  -1.46916
## 4 -2.897600  0.0746116 -6.816260 -1.939580   4.969250 -7.470980   1.76947
## 5 -0.452728 -3.6357600 -0.414089 -4.228540   2.659440 -3.243120  -6.26291
## 6  3.161660  7.8113200 21.747800  2.844890 -10.529600 -1.050340 -10.94520
##         PC82      PC83      PC84     PC85      PC86     PC87      PC88
## 1   3.676170 -2.114840  3.300130  9.77398 -0.501383  4.24391  2.821420
## 2  -2.625270  5.738870  7.397860  7.17553 -3.859310  4.76020 -4.966830
## 3   1.386140  4.294830 10.229400  1.45990 -6.178910 -2.86423 -3.815870
## 4   6.702490  2.631350 -0.272547  4.98237 -2.535640  2.31096 -3.023310
## 5   0.504438  0.151063 -0.428986  6.41210  4.387500  4.84532 -0.855451
## 6 -10.615300 -3.035320 -1.516010 -3.19946 -3.781570 -2.63837 -1.979470
##        PC89       PC90     PC91      PC92      PC93      PC94      PC95
## 1  0.881157  -1.591860  5.38173  2.853400  2.381060 -1.304460 -5.221110
## 2 -4.828930  -4.081890 -1.09502 -0.126301  3.998150  5.205870  1.319620
## 3 -6.985200  -5.026990 -4.65174 -5.034190  0.994824 12.556600  4.545030
## 4 -5.524060   0.811997  1.02417 -6.209200 -0.934143  3.319320 -1.919300
## 5 -1.640050   1.103730  3.49897 -2.529840  2.613670  3.429880 -5.707880
## 6 -9.966890 -12.861300  5.42910  0.589571  2.157570  0.951197 -0.826276
##       PC96      PC97       PC98       PC99     PC100     PC101    PC102
## 1  1.82553  2.370550  1.8149700 -0.9901620 -2.541920 -0.679209 -3.58714
## 2 -2.39237  1.638400  4.4738900  2.6497300  1.116190 -1.692700  1.95229
## 3 -5.22245 10.461600 -0.8455530  0.0263679  0.505576 -7.371870  1.94227
## 4  3.89914  4.791710  1.3550500  1.6973700 -1.274580  5.993940 -5.88560
## 5 -2.82533  0.932514  5.2839700  7.5794400  3.857620  1.928860  1.70852
## 6 -4.32066 -0.115444  0.0519455 -3.6473500  0.416684  2.038670  3.52975
##       PC103    PC104     PC105     PC106      PC107    PC108     PC109    PC110
## 1 -1.242230  1.37588 -0.282432  1.009850  4.5056900 3.105340 -3.378790  3.24386
## 2 -0.388432 -4.76404  0.346802 -2.296200 -5.0945900 0.967458  4.496360 -1.54846
## 3  5.974930  4.55647 -1.570030 -0.563826 -0.1233150 2.944420 -1.330780 -6.44084
## 4  0.486571 -5.57529  1.298180 -5.440210  0.4837610 3.749750  0.621117 -6.45818
## 5  5.042380 -3.03734 12.192400  3.364350  0.0474162 1.490450 -3.737110  6.32788
## 6 -4.821870 -4.92392 -1.975290  2.803510  6.6819200 4.907960  3.215450 -5.80926
##       PC111     PC112     PC113    PC114    PC115     PC116     PC117
## 1  1.831410  -3.56423 -0.829310 -6.09407 -1.41875  3.763020 -1.236830
## 2 -0.123996   2.05858  2.182990 -5.22162 -2.11596 -0.160551  1.039900
## 3 -2.496910  -2.32711  5.238510 -2.03523 -2.69885  7.127050  0.156205
## 4  1.704910  10.35100 -0.742679  7.69977 -1.23606 -3.155350 11.828700
## 5  5.810880   7.21787  3.713050 -4.69280  3.66361 -2.726250 -5.317630
## 6  8.105620 -10.90690 -6.465960 -1.26854 -1.50106 -5.159560  8.907690
##         PC118     PC119     PC120     PC121    PC122     PC123    PC124
## 1 -6.16092000  1.699850  0.186924 -4.391350  3.11780 -0.422223  4.95131
## 2 -2.08942000 -1.546120  3.111010 -0.949337 -1.59902  1.958100  1.83929
## 3 -0.34188400  0.749496  2.910160  1.304050  4.03203 -1.654230  7.92005
## 4 -2.17334000 -4.921240  1.410030 -7.839490  4.07305 -2.265450 -8.38001
## 5  0.00684361 -3.118290  5.235230  6.032060  7.83041 -1.207320 -0.43200
## 6  4.07670000  5.702270 -3.817560  6.174360 -4.08069 -2.418970 -0.97293
##        PC125     PC126     PC127    PC128    PC129      PC130     PC131
## 1   0.185832  0.261899 -6.282550  5.91919  6.23795  7.0226900  2.535890
## 2  -0.580664  5.848520 -1.924010  8.06666 -2.68879 -1.5571300  0.298089
## 3   7.351420  6.280290 -0.995556  7.52393 -5.38622 -0.0920857 -5.297550
## 4  -5.969310 -6.378260 -6.014520 -4.39762  3.20391  7.1223200  4.967480
## 5 -10.860400 -6.738010  0.320248  3.82667 -1.24171 -3.7832800  5.202910
## 6  -1.569440  5.852960 -0.186841  3.44772 -1.20364 -0.4914640 -0.200503
##       PC132    PC133      PC134     PC135     PC136     PC137      PC138
## 1 -7.091640 -4.09787 -0.9431710  3.730210 -0.179847 -2.858830   1.118670
## 2 -0.353264 -6.52274  0.0324326  3.545020  0.543437 -0.982884  -3.196480
## 3  1.967810 -6.04081 -2.9640000  5.207930  3.043190 -2.378460  -0.665147
## 4 -2.844440  1.84523  2.1313300  1.505590  6.049240 -3.552260 -10.050800
## 5 -3.937810  9.15740 -4.9289600 -0.265273  0.282665 -2.600760  -0.921349
## 6 -6.911200  1.92793  1.3839200 -0.159293 -4.199880 -3.750490  -2.533260
##       PC139     PC140    PC141     PC142     PC143    PC144     PC145     PC146
## 1  4.263590  6.099980  3.22012 -8.334070  1.299490  7.02878  0.628793  1.854180
## 2 -0.703574  7.821730 -2.22026 -1.010210  1.234940 -3.78188  2.323930 -3.586940
## 3 -2.674500  9.179140 -1.15101 -2.484550 -7.111730 -1.52159  6.152850 -8.405590
## 4 -5.241910  3.592440  5.13412  0.565826 -1.087230 -4.42264  1.112270 -0.866679
## 5 -4.036760  0.360688 -2.64851 -1.479770  3.857360  6.19061 -1.356600  2.374540
## 6  5.005410 -0.217569  3.18881 -5.517830  0.507129 -2.31484  5.017880  1.484370
##       PC147    PC148    PC149      PC150     PC151    PC152     PC153    PC154
## 1 -3.169590  3.90917  5.48849  5.5814600  2.969340 -1.10906  0.375404 -1.01594
## 2  0.941444 -1.69402 -1.08083 -0.8090390  5.699030  1.34852 -4.540270 -2.32031
## 3 -3.784180 -5.59783 -1.90505 -1.8694100  2.849310  5.91475 -3.382540 -2.34757
## 4 -3.553470 -3.55506 -2.91962 -0.0703231  0.377022  1.92900  1.305020 -7.54519
## 5 12.438800  6.40531 -1.03165  3.5773900  4.051250  0.97918  0.427298  6.82716
## 6 -7.824840 -2.85133  2.15418  5.0165100 -2.573520 -3.33185  1.175740  5.80516
##       PC155     PC156     PC157    PC158     PC159    PC160    PC161      PC162
## 1 -3.194590  6.344740 -4.716410  5.56698 -0.229742 -1.12558 -2.78257 -1.1763900
## 2 -2.299270  4.739130 -3.311780 -3.87053 -5.494820 -1.99021  2.77757 -6.3429000
## 3  0.131539 -0.691950  2.945900 -9.90679  0.222635 -4.41852  4.96841 -0.5344060
## 4  6.693760 -0.412623 -1.045450  2.78292  0.401828 -1.29800 -2.12093  0.0229272
## 5  0.645226  6.898780 -0.239179 -1.87662 -7.712930 -8.70063 -7.45579  3.9954700
## 6 -8.188180 -0.502378 -0.606133  8.85862 -6.430770  3.95163  3.67135  4.9203100
##       PC163     PC164     PC165     PC166       PC167      PC168     PC169
## 1  2.407920  0.504277 -6.207460 -1.821530 -1.46783000 -2.2010600  0.284804
## 2  0.160678  9.304710 -2.631630 -5.545960 -0.14424700 -5.9512900  4.902430
## 3  1.266740  0.489838 -7.443820  0.357423 -0.00106332 -4.8939300  0.550162
## 4  1.272300 -9.188160  5.111440 -3.451410 -3.36530000 -1.3892100 -3.437740
## 5 -5.355580  3.933350  0.553361  1.089580  4.15026000 -0.0174637  0.393706
## 6  2.439100  1.310280  3.313470  5.025000  3.10215000  1.9033900  1.265480
##      PC170     PC171     PC172    PC173    PC174     PC175     PC176     PC177
## 1 -2.06408  0.249062  3.206130  2.51127 -1.57129  2.806100  2.328480  0.940548
## 2 -2.31194 -3.141630  0.382582  2.01567  5.37382  0.446184  0.977171  1.229680
## 3 -1.36361 -0.628543 -3.132420  5.85005 -1.13194 -4.795800 -0.509625 -8.482140
## 4  9.70610  0.776337 10.806700 -3.03668  3.68210  2.316360 -1.512890  1.518280
## 5  2.00749 10.847100  4.886560 -2.65903 -5.96778 -5.692680  4.891790 -5.440430
## 6  3.31191 -1.190150  3.899690 -2.94469 -2.81133 -2.711750 -2.510280  3.611460
##         PC178    PC179      PC180     PC181     PC182     PC183     PC184
## 1 -0.00200687 -4.65122  2.3894400 -7.303210 -2.759210  2.754250  5.157660
## 2 -0.74188400  4.72168 -1.3900200 -8.020900 -0.766446  0.408351  3.968530
## 3  1.17209000  4.64376 -4.5449300 -1.867810 -5.384910  7.567070 -7.286060
## 4  8.23867000 -7.06576  2.5939400 -2.129160  1.646700 -3.702110 -0.560026
## 5 -3.79358000  6.30434 -3.0662400  4.405560 -3.986570 -2.528300  3.393280
## 6 -0.15949300  0.36457 -0.0979605 -0.803125 -3.550490  1.204050  5.197190
##        PC185     PC186    PC187     PC188    PC189     PC190    PC191
## 1  -5.122260 -2.875810  1.66973 -2.690040 -2.36216 -7.727470  3.27333
## 2  -5.199090 -1.588930  4.85938  5.565460  2.97993 -1.850640 -1.77663
## 3   3.780100  0.199628  1.42551  0.608429 -1.16493  1.421940 -0.28420
## 4 -11.393300 -8.025690 -3.60762  6.154500 -3.88123 -0.491429  4.93060
## 5  -0.683104  0.265958 -7.64458  0.818755  4.07498 -6.294480 -7.87804
## 6   3.152440  0.569911 -1.26830 -0.904214 -0.70014 -1.357030 -2.88737
##        PC192     PC193    PC194    PC195     PC196    PC197      PC198
## 1 -10.970500  3.395710  2.02413 -1.57175 -3.547260  1.39563 -3.6125500
## 2  -1.240410  2.429520 -3.98737 -2.97395  1.090970 -1.02960 -0.0936227
## 3   2.198030  1.188300  1.26478  4.66170  0.524950 -2.12740  6.8943600
## 4  -0.355432 -0.614295  3.02340 -1.12641 -7.986670 -2.69594 -2.7320400
## 5   2.761850  3.336300  1.12680 -3.44725  0.689305 -1.93704  1.8109300
## 6   1.778270  0.410245 -0.27013  1.71417 -1.849740 -4.71803  0.0304301
##        PC199      PC200    PC201     PC202     PC203    PC204     PC205
## 1  4.3506500 -0.1842840 -2.35063  2.435380  0.592889 -1.75159 -3.861130
## 2 -3.7102100  1.1534600  2.31505  3.295890  4.785610 -6.55696 -4.182160
## 3 -1.8342000 -0.0591787  1.43649 -1.125470 -1.176830 -5.18465  0.556054
## 4  5.9963000  4.2321100  1.46582  0.689867 -4.254750  6.18998  4.144820
## 5 -6.4662400  3.6972300 -3.45097 -5.285000 -5.222300 -5.51265  2.109890
## 6  0.0379097  1.2963200 -5.46168 -2.748510  7.776450  4.47502  4.427750
##      PC206    PC207     PC208     PC209     PC210     PC211    PC212     PC213
## 1  1.29632  6.80115  3.724990 -5.679960  7.245660  1.430160 -3.92452  2.438540
## 2 -3.83494  1.33549 -3.521190 -1.570180  6.505040 -0.768251 -1.45241  2.828640
## 3 -5.16227  1.28890  1.088420 -2.014510 -1.789410  6.007320 -1.91646 -0.565721
## 4  0.99046 -1.30097 -2.315050 -5.949760  6.741770 -2.065300  2.48424 -1.596290
## 5 -1.61388  2.94504 -0.124489 -0.257321 11.537800  0.399082 -6.19094 -6.520800
## 6  1.93542 -4.99764  4.708590 -3.235450  0.620947  0.889299 -2.53503  0.928740
##       PC214     PC215    PC216     PC217    PC218    PC219     PC220     PC221
## 1  4.028970 -4.886490  2.92810  5.481230 -3.43628 -8.39159  0.176600 -0.198451
## 2 -0.305079 -1.914920 -0.79662 -1.815350 -1.09437 -2.52244 -3.347810 -1.306930
## 3 -2.319340  1.040040 -1.62346 -3.834280  0.84389  1.46661  1.267470  1.828480
## 4 -0.255899  0.965714  7.46661  3.159170  5.80024  4.63414 -6.449860  1.305460
## 5  0.503550  7.551410 -2.29599  5.001530  4.75892  8.14829  4.804990 -2.737450
## 6 -3.560970 -0.402145 -2.13385  0.128978 -2.61625  4.07387  0.670273  0.929313
##       PC222     PC223     PC224    PC225    PC226      PC227       PC228
## 1  3.137110 -5.921780 -3.046360 -1.67911  1.24058 -7.4721300 -0.48883500
## 2  5.360180  4.655510 -0.385307 -1.37559 -1.85719 -5.6091900 -0.26706700
## 3 -2.584560 -0.296809  5.357860  4.38550 -2.74253 -1.7924800  0.00585376
## 4 -0.462546  5.945220  1.681390  2.55162 -2.71198 -0.0619051  0.59627000
## 5  5.466080 -3.546580  8.015470  5.28112  1.35408 -0.1151420 -0.39073600
## 6 -3.499260  1.654170 -0.164668  4.23481 -1.47545  3.8106900 -0.67177800
##       PC229     PC230    PC231    PC232     PC233     PC234     PC235     PC236
## 1 -0.484416 -6.591900  7.10378  1.20679 -0.953984  1.795180 -3.067130 -0.185464
## 2  5.944490  0.946733 -3.46878 -1.51164  0.568447  2.588420 -0.633609  6.386770
## 3  0.472092  2.519760  1.65108 -1.18434 -1.810870  2.063490 -0.997746 -1.331900
## 4  5.414530 -4.616250 -5.22295  3.60925 -1.318590 -5.395360  0.213695 -0.929498
## 5  1.747560 -3.011750 -3.00410  0.82996  0.798574  0.862193  2.458870 -4.114840
## 6 -1.438900  4.061510 -1.97774  6.04377  2.038440 -1.225040 -0.970757  2.346040
##       PC237     PC238     PC239      PC240    PC241     PC242    PC243
## 1  -2.60627  1.724900 -3.233350 -1.8882900 -2.71458 -1.530650 -1.79883
## 2  -1.67193 -5.356220 -3.979290 -4.1170300 -2.96122  5.222450  4.06640
## 3   2.33019 -0.447266 -3.185630  0.0358097  8.08531  0.866774 -1.22115
## 4 -11.29090  6.735320  5.308680 -0.9669070 -3.06053  1.807940  4.24437
## 5   1.17821  3.636750  0.234708 -6.4126800 -5.04591 -1.722030  6.87473
## 6  -1.91458 -5.204140  2.415640 -1.8898000 -2.21064 -2.173030 -1.47902
##       PC244     PC245    PC246     PC247      PC248    PC249    PC250     PC251
## 1 -0.141661 -3.046470 -5.59169  2.374930  3.7825000  7.28198 -1.39082 -2.122820
## 2 -2.937560 -3.264400  1.04026  1.934550 -4.5539700 -3.65782 -2.21004  1.590630
## 3 -0.158989  0.424500  2.31887 -0.986673 -0.6495400 -4.43763  1.94750 -0.639691
## 4 -0.387815 -4.758520 -5.30704 -0.800842 -3.1182100 -1.80576 -4.37313 -1.730690
## 5 -0.443691  2.042780 -4.16313 -2.437480 -1.8216800  1.89389 -3.22134  0.903353
## 6  2.181070 -0.869642  1.58744  3.757840  0.0782487 -2.07095 -5.05006  1.444780
##        PC252      PC253     PC254     PC255     PC256     PC257     PC258
## 1 -3.1109100 -0.0989007 -3.984490 -0.380065  0.773690  5.081620  3.796560
## 2 -0.0900249 -3.1492800  0.945347 -0.316746  2.787110  4.558370 -0.148475
## 3  1.8735200  5.0584200 -0.611032 -1.920500  3.709800 -0.484981 -5.442160
## 4 -4.2312100 -7.2820700  0.406724 -0.858350  4.568340 -1.386100  2.992070
## 5  8.4939100  4.2626500 -0.487946 -0.449387 -1.182860 -0.322571  6.044300
## 6 -2.1517800  0.5775150 -0.378945  2.891990  0.450424  0.249205  0.817690
##       PC259     PC260     PC261     PC262    PC263    PC264     PC265     PC266
## 1  4.554870 -0.180526  3.435920  0.225784  1.60653  3.76099  2.871990 -1.302680
## 2  1.288960 -3.907130 -1.382680  2.555630 -6.02580  1.71128  4.870660  7.403940
## 3  4.021820 -2.085190  4.568090 -1.944590 -2.48828 -3.21620  1.575480 -0.828938
## 4 -0.230136  3.909410 -3.996590  1.347380  3.39100 -2.32004  3.366520 -3.100750
## 5 -2.199270 -6.744820 -6.012390 -0.999186  3.85779  3.65402 -5.079180  0.360558
## 6  0.529356 -3.363430 -0.787424  0.286810 -1.05157 -1.30635 -0.712065  1.261690
##       PC267     PC268      PC269     PC270     PC271    PC272     PC273
## 1 -0.704708  4.322350 -1.1258300 -1.179440  0.675343  3.01902  1.970360
## 2  5.160620  3.635520 -2.4468800 -0.493435  3.050280 -1.56081 -8.733250
## 3 -2.105030 -0.509731 -0.0109466 -1.482940 -2.779740 -6.08347  1.363990
## 4  4.917970  4.606890  3.2853500 -1.722580 -1.324390  1.72342  5.424210
## 5 -2.559020  1.384370 -2.3572400  1.784000  3.650560 -2.38102 -2.517820
## 6  2.473910  0.508406  2.7605000 -3.296760  0.279406  1.45649  0.701155
##       PC274     PC275      PC276     PC277      PC278      PC279     PC280
## 1  1.076720 -6.456570 -4.3610100  2.232230  3.7653100  2.5232200 -2.577250
## 2 -2.348860  1.316240 -3.4896600  1.010140  3.5050800  1.3732900  1.907470
## 3  1.627800 -2.196950  5.0531400  3.953960  0.1296010  0.7704000 -1.831710
## 4  0.922302  2.941030  0.0113377 -2.077920  0.0954494  4.3804500  6.568490
## 5 -1.010390  0.577939  1.6901900 -1.417940 -0.2320480  2.0787400  3.394350
## 6  1.942790 -0.784017  1.2404900  0.799635 -1.3125100 -0.0911067  0.593806
##       PC281    PC282     PC283     PC284    PC285     PC286     PC287     PC288
## 1 -1.167470 -7.32710 -1.757210  6.263120  6.68303 -2.738220 -1.282220  9.083170
## 2  6.050040  5.06460  1.867210  2.773310  3.04143  1.122050  2.125570  0.786617
## 3 -3.118240  2.85255 -1.304560 -1.839270 -3.27678  2.887690  4.543650 -4.966050
## 4 -0.755247 -2.41956 -0.177232 -0.785872  2.57769 -4.062370 -2.953830  0.331381
## 5 -0.167794 -5.00791 -0.104553 -1.051420  1.41791 -0.746397 -1.887730  0.908154
## 6 -0.419569 -2.21488  2.496790 -1.429140 -1.06806 -1.826130  0.880564  2.172490
##      PC289    PC290     PC291     PC292     PC293     PC294     PC295
## 1 -2.24602  3.25972  3.101570  0.906306  3.789420 -0.418502 -0.835618
## 2 -1.97198  3.50750  0.107420 -1.740950 -1.272470 -2.388470  0.110258
## 3  3.27843 -6.82619  1.227160 -0.963813 -0.240709  0.520258  5.565670
## 4  4.31911 -2.41201 -0.813299 -2.981570  4.501630  2.012100 -0.720453
## 5 -2.20043  7.65197 -0.123663  0.949846  5.226230 -1.838180  5.166070
## 6 -1.55086  1.81220  2.017660 -0.907038  1.868770 -2.701710  3.248320
##        PC296     PC297     PC298     PC299     PC300      PC301     PC302
## 1  4.8994000 -0.354399 -2.316600  1.198970  7.127490 -6.4063800 -9.698180
## 2  4.2534900 -1.765790  4.649910 -0.824233 -6.574470 -5.6218900  3.017050
## 3  1.0221600  1.172740 -0.620871  1.926090  1.182940  1.2398100 -1.562080
## 4  3.5535700  2.434290  2.813040 -0.250536 -2.751430  0.4080740  4.008350
## 5 -3.9412800  7.063680  1.152600  1.067250  0.957400  6.0361900 -5.040300
## 6  0.0479087 -0.334840  1.281160 -1.611780  0.510062  0.0803322  0.585129
##       PC303      PC304     PC305     PC306      PC307     PC308     PC309
## 1 -1.244670 -0.0359111  3.494620  5.136460 -1.0652500 -5.897530 -2.196240
## 2  0.880336 -0.2842230 -5.833970  3.408550  2.8466200 -0.746355  3.100230
## 3 -3.920340 -2.6214400  5.745940  0.144089 -0.0390333  2.352300  0.797683
## 4 -3.766600  2.7632300 -2.127930 -1.375800  6.4100900 -4.232740  5.516660
## 5  2.002890  6.0349300 -2.486370  2.179430  9.5686900 -1.446080 -2.554080
## 6  2.285350 -0.0504606 -0.860408  1.275890 -0.3145560  0.263111  3.049160
##       PC310     PC311     PC312    PC313     PC314     PC315     PC316
## 1 -4.529300 -0.596955 -6.022760 -1.98718 -3.854230 -4.458500  4.820600
## 2 -1.279880 -2.394470 -2.733360  2.29759  6.911940  2.783710  3.275130
## 3  1.875160 -0.843038 -3.966130 -3.25928 -0.861628 -1.571350  4.746350
## 4 -1.148870 -1.579430  2.904310  2.57539 -1.109820 -0.791526 -0.364149
## 5 -5.018320  1.121770  7.693330 -1.24061  0.587698 -0.272952 -1.099740
## 6  0.552704  1.666260  0.578342 -1.21995  1.913870  1.801810 -0.304175
##       PC317     PC318     PC319     PC320    PC321     PC322     PC323
## 1  2.317450 -3.031970 -3.967150 -3.718840 -1.18691  3.754420 -2.247830
## 2  6.108240  0.961631 -1.638030  1.563570  2.16924 -3.449600  4.716840
## 3  1.737610  0.195350  4.031240 -1.256110 -4.96456 -0.360506  0.182904
## 4 -1.039470  0.732611 -1.700800  1.173080  4.33882  3.042740 -2.074310
## 5 -2.445130 -2.092750 -3.781580  7.753250  1.51262 -1.001810  0.216068
## 6  0.484572  0.124960  0.254165 -0.590667 -1.23423  1.284490  1.421910
##       PC324    PC325     PC326     PC327     PC328     PC329    PC330     PC331
## 1 -2.407790  3.52872  3.019310 -5.046920  1.570320  3.542260 -4.17224 -0.764133
## 2  2.885370  1.60306  5.788690 -0.411278 -5.938110 -3.153700  1.52610  8.171960
## 3  3.217630 -2.05684  2.295780  2.350170  7.088650 -2.384220 -6.56339  3.529360
## 4  3.601730  3.30243  0.271778 -0.458454 -0.321689 -1.276820 10.04940 -5.558870
## 5 -1.756110  7.21950 -7.370730  0.722412 -1.418190  4.488210  5.81059 -1.090410
## 6 -0.140333 -2.03585 -0.620517  0.324934  2.739080 -0.184163 -1.30837 -1.856670
##       PC332      PC333     PC334     PC335     PC336     PC337     PC338
## 1  2.403320 -3.4056100  3.337870  4.087050 -0.919898  1.374020 -0.956720
## 2  0.186044 -2.6618000 -0.863452 -0.624972  2.173400  1.361700 -3.670690
## 3 -2.177060 -0.5525700 -3.144710  2.214470 -2.243890 -1.964570  3.324350
## 4  2.962830  1.4213600 -1.762620  1.362520  4.203550 -1.478030 -0.558561
## 5  0.588064  6.0416500  5.217510 -3.452600  1.496190  0.221104 -5.046460
## 6 -0.592151 -0.0223541 -1.813180  1.348970 -0.975891 -1.191660 -2.384790
##       PC339       PC340    PC341      PC342     PC343     PC344     PC345
## 1 -0.880226 -1.21509000  3.04682 -2.4142100 -5.935350 -1.983460 -4.654590
## 2 -0.605932 -0.00555504  3.36160 -2.1675600  1.394030 -1.566820  2.968850
## 3  4.231470 -4.78404000  3.28742 -0.0250219  0.753464 -4.882620 -1.118300
## 4 -2.447590 -4.44271000 -1.14934  8.4456400  2.939360 -0.727864 -1.694790
## 5 -2.852770 -1.32246000  1.09861 -0.1515270  6.350420  7.626440  0.222928
## 6  1.691910  2.79154000 -3.56493  4.8764800  1.760920  1.908600  0.218869
##       PC346    PC347     PC348     PC349       PC350     PC351    PC352
## 1 -0.580142 -5.47046 -2.364440  1.107250  4.27404000 -0.412252  3.67231
## 2  1.987670  4.96409 11.246600 -1.562750  5.19936000 -0.521341 -2.80807
## 3 -4.398510 -1.68173 -0.847626  0.085437  0.46251200 -2.057560  2.52434
## 4  0.857038  2.08038  1.765370 -1.220690 -1.86166000  3.361190  1.92769
## 5 -2.248230 -3.16584 -7.307590 -0.262214 -5.46398000  1.412240  1.14520
## 6  2.221290  2.57069  1.317720  0.953769 -0.00655931  0.540784  1.76074
##       PC353     PC354     PC355     PC356    PC357     PC358     PC359
## 1  1.871810 -6.511980 -0.661564  3.378200 -4.47432  0.941510 -2.664150
## 2  0.063551 -2.677530 -1.001650 -4.270190 -2.68414 -0.182748  0.708255
## 3 -1.812180  6.216740  3.139380  4.413630 -1.97647  5.228330  2.181090
## 4 -0.700273 -5.511250 -4.511440  1.528640 -2.73488  0.625332 -3.168350
## 5 -4.372240 -0.261217  0.172529  4.927270  1.26310 -3.894150  3.667970
## 6 -1.519310  4.241000 -1.619820  0.725635 -1.29752 -0.255093 -0.706519
##       PC360     PC361     PC362     PC363     PC364      PC365     PC366
## 1  2.231740  4.099790 -8.543030 -1.588750 -1.157290 -3.5193500 -2.105260
## 2  1.397130  1.526930 -5.251060  5.153830 -5.638690 -4.5892300  7.809180
## 3 -5.802150 -0.321080  1.538390 -1.007440 -0.692839  2.2761400  3.638130
## 4 -0.785163  0.439312 -4.591540  6.905630  2.763150 -3.7990900 -4.649150
## 5  3.697240  1.214710 -0.744555 -0.815993  5.262620  2.7329000 -5.562210
## 6 -0.707137 -1.655810  0.165163  1.824010  0.922399  0.0216574  0.838715
##        PC367     PC368     PC369     PC370     PC371     PC372    PC373
## 1 -1.6720900  4.487680 -4.314740 -3.528740  2.225810 -4.667440 1.028090
## 2  0.1706070  0.427944 -8.202190  1.707700 -0.317252  2.776320 4.323410
## 3  1.1027500  1.074630  0.550941 -1.084960  0.709983 -1.953010 2.460610
## 4 -1.0847500 -0.633538  2.947630  1.980230 -0.123283  3.328190 3.106650
## 5  4.8595200 -0.519760 -2.005060  3.654230  2.527530  0.186395 2.013940
## 6 -0.0637026  0.906861 -0.779790 -0.610078 -2.564690 -0.551038 0.510095
##      PC374     PC375     PC376      PC377     PC378     PC379     PC380
## 1  7.02319  1.738220  6.596490 -0.8094320 -2.618450 -3.024410 -1.680290
## 2 -7.89084 -0.738554 -1.583800 -1.5258300 -3.943280 -1.357440 -6.709730
## 3  4.21677 -2.482110 -4.478720 -0.0608105  0.197405  0.299719  1.448730
## 4 -3.33134  2.823940 -4.081030 -2.2697700 -3.363470 -1.840450  4.161030
## 5  2.88541  0.485885  0.580940 -1.7494500  6.343700  2.654310  4.209810
## 6 -5.10864  1.730910  0.818387 -0.3118950 -0.834581 -0.560287  0.823605
##       PC381    PC382     PC383     PC384     PC385     PC386     PC387
## 1  1.224140 -4.46823  4.560820  0.336982  3.222050 -0.128295 -3.128460
## 2  2.085980 -2.11413 -3.411630  6.556230  3.376970  0.324062  2.453470
## 3  4.953880  2.79445  2.917030 -2.244600 -0.713586 -0.365991  0.134078
## 4  0.742917 -3.21043 -0.491023  5.074570  0.285338  2.363070 -4.858370
## 5 -8.116330  4.33748  1.892020 -4.607370 -0.826007  2.118930 -1.398930
## 6  1.033700 -1.65589 -1.777830 -3.430340 -0.767556  1.481900  2.696470
##       PC388     PC389     PC390      PC391    PC392     PC393     PC394
## 1  0.939928  0.933622  0.669828  2.5207300  2.17117 -2.860700  6.196000
## 2 -0.037260  1.490700  0.419819  0.1439770 -4.44610 -2.577600 -6.390070
## 3 -3.199150 -5.518490  3.181760 -2.8136800  1.66992  2.044430 -0.187200
## 4  1.497960 -0.326636 -3.596580  2.3397100  3.57688  1.227180  0.812500
## 5  6.972460 -1.416870 -3.069460 -2.8932200 -5.67699 -4.580810  2.630930
## 6  0.110215 -1.793140  0.266512  0.0441029  2.89785 -0.887878 -0.778045
##       PC395     PC396     PC397     PC398     PC399    PC400      PC401
## 1 -2.982830 -3.082900 -1.915160  0.805651  0.620796  4.94050  2.0283400
## 2 -3.178690  3.264820 -0.390678  5.573150 -1.369010  2.10913 -1.7665300
## 3  4.499490 -1.544360 -0.836570  4.431710 -5.337080 -4.37793  0.2483010
## 4  5.035120  1.209630  0.238963 -3.609800  5.509770 -1.35605  0.7613310
## 5 -0.591322 -2.202240 -2.334450 -0.538381  0.351640 -9.57576 -0.0717029
## 6  0.426598 -0.399882  2.091850  2.806660 -0.307129  1.51197  0.2879930
##       PC402    PC403     PC404    PC405     PC406    PC407     PC408     PC409
## 1 -4.114920 -1.97979 -0.808980 -1.91079  4.511750 -3.82357 -0.799932 -0.149239
## 2 -0.183011  2.31346  3.158090 -6.78478  2.630460 -6.62612 -7.284580 -1.642940
## 3  1.537450  5.09322 -0.578961  5.01701 -6.217660  2.83876 -1.389460 -3.389160
## 4 -2.623490  2.60056  0.689956  5.17085 -0.133193 -1.47182 -2.351500  2.767990
## 5 -2.331520 -6.66658  2.385440  8.46534 -7.742310  2.81319  3.648790 -2.183510
## 6  2.455320  1.00331  4.438930  1.72310  0.462323 -1.11461 -1.513510  0.549190
##       PC410     PC411     PC412     PC413     PC414    PC415     PC416
## 1  1.180210  7.201860  5.997450  0.796744  6.778560 -6.71034  3.086720
## 2 -2.246620  3.939280  1.488150 -3.055610 -2.258990  6.62703 -2.355380
## 3  1.748630 -0.637145  1.767430 -1.559500  5.161070 -2.28945 -2.964570
## 4 -0.186485  2.244710 -4.674540  4.427060  3.618180  3.84153  2.612780
## 5 -6.562460  3.455780 -0.511899 -1.848520  0.333890  0.88684 -8.437590
## 6  1.467220  1.914170 -0.209509 -3.444480  0.877921 -1.44130 -0.203141
##       PC417     PC418     PC419      PC420     PC421     PC422     PC423
## 1 -5.270000  2.669880 -2.737900  0.0972643 -1.743620 -1.627760  5.633320
## 2  5.311830 -1.166070 -5.028760  2.5614900  0.544232  5.968320 -0.237397
## 3 -1.070970  6.167360 -1.685960  0.2800240 -0.776307 -2.840230  3.627820
## 4 -1.219700 -7.430310  1.134160  5.7804800  0.770270 -1.164310 -5.490850
## 5  0.311861  2.239030  2.096780 -3.4352900  5.097890 -3.305730  1.374780
## 6 -0.633924 -0.160888 -0.692334 -1.4510800 -0.791833 -0.747096 -3.763220
##       PC424     PC425     PC426       PC427     PC428     PC429     PC430
## 1 -1.059930  1.189740  7.897540 -1.28273000  3.842900 -2.966280  0.098208
## 2  4.908080 -7.822090 -3.670350  0.00254577 -5.044020 -1.718650  2.294610
## 3 -4.824760  0.765343 -2.504620  2.69992000  4.290130 -2.310330 -3.080490
## 4  0.409329  3.696870 -5.035110 -9.54874000 -4.796550 -3.665010  4.221900
## 5  0.175582 -3.201250 -0.861944  3.02739000  0.143382 -1.756010  5.042660
## 6 -1.932680  2.291090  0.951139  1.05760000 -1.540200  0.810951  2.420680
##       PC431     PC432     PC433     PC434      PC435     PC436     PC437
## 1 -6.697050  1.685660 -0.243431 -2.448690   3.431410  4.924440 -0.557961
## 2  2.249520  1.899320 -0.886325 -2.881820   4.404680  3.758520  2.652000
## 3  0.561962 -0.963899 -3.753970  0.255719   4.539240 -1.637550  1.301970
## 4 -7.683000 -5.204160  2.251000  2.005980 -11.448100 -4.724820 -3.365900
## 5  0.568323  1.611650  4.376980  1.930420  -7.120900 -3.577540  0.190141
## 6  2.721070 -0.976344 -0.615873  0.295888  -0.452162 -0.825643  0.570102
##       PC438     PC439     PC440     PC441    PC442     PC443      PC444
## 1  2.027550 -1.307050  6.075990  2.392040 -5.98478  3.875640 -1.5105900
## 2  1.606860  0.106339  1.904550  1.165110 -3.41790  5.343510 -2.1010700
## 3 -0.804997 -0.222015  0.398814 -2.446710 -3.29946  0.474361 -0.1870860
## 4  1.322950  2.981560 -0.178201  0.445028 -3.96408  1.481280 -6.1553900
## 5  1.096330  1.982040 -3.747420  2.849590  1.18340 -2.205130  6.1859400
## 6 -1.661220  0.460001 -4.163780  1.734380 -1.60864  1.425830  0.0177583
##        PC445     PC446     PC447      PC448      PC449     PC450       PC451
## 1  0.0543639 -3.483880 -1.758580  5.8307800 -12.800700  8.665420  2.57629000
## 2  0.4838180  0.842091 -1.087880  8.0486200   4.420650 -5.093200  0.72606300
## 3  4.9778000  3.322760 -4.199770  3.2852100   5.431240 -0.278039  3.61754000
## 4  1.0217200 -2.943360 -1.568410  3.5185400   3.588530  1.165150  1.37150000
## 5 -0.2619590  2.436830 -2.134500 -6.2773600  -0.162487 -7.516650  0.00819288
## 6 -0.5146880  2.233280 -0.798793 -0.0443748   0.574122 -2.278650 -2.66079000
##       PC452     PC453     PC454     PC455     PC456      PC457     PC458
## 1 -2.663170  1.336180 -3.876230  0.128933  1.647270 -2.4688800 -0.747510
## 2  0.464246 -0.880251  4.770560  1.805550  9.946890 -0.2074640  0.724752
## 3 -3.159810  1.534980 -1.749210 -2.494610 -1.520250  0.9077070 -4.305340
## 4  0.709957  5.805050  1.884610  1.677390 -0.649724  0.5085680 -2.370760
## 5  2.752160 -0.550547 -5.337650  0.399748 -5.600140  0.0219237  4.681990
## 6  1.325480  0.154809 -0.385594 -0.776716 -0.788787  2.1430000 -1.536190
##       PC459      PC460     PC461    PC462     PC463      PC464    PC465
## 1  2.270570 -5.4933200  6.461500 -3.33142  3.142090 -12.198700  3.91248
## 2  2.549500  4.7613800 -3.256860  4.54163  2.767550   1.899950 -3.89990
## 3 -1.951570  0.0337128  6.954090  5.37897  0.262643  -2.775460  8.30044
## 4  0.219423 -1.0728000  6.479310  4.85522 -2.139290  -0.349628  4.27176
## 5 10.069900  2.1485000  0.896402 -4.57293  2.335340  -2.821520 -4.37676
## 6 -0.524228 -0.5470630  2.378640  2.67134 -0.211221   1.420820  1.73797
##       PC466      PC467      PC468     PC469      PC470     PC471    PC472
## 1  3.628020 -4.0244400 -8.1711400  0.310744 -0.8074060  2.912020 -2.53329
## 2  0.418647 10.4508000 -1.0979000 -1.964110 -0.8672260 -4.281280  0.76241
## 3  6.141740 -5.1453900  8.6116400 -5.823570  3.9905600  0.883324  2.46051
## 4 -2.623500 -6.0081400  2.5242500  7.108670 -3.1207300 -4.661080 -1.30211
## 5 -1.183000  0.0101678 -0.0661312 -1.290870  5.4439800 -6.449400  2.27396
## 6  1.824960 -1.2615100 -0.2402270  1.131610  0.0380535 -0.192693  1.20366
##      PC473      PC474    PC475    PC476     PC477    PC478     PC479     PC480
## 1 1.013180  1.6745700  2.96890 -5.48375 -6.103410 -4.61101 -2.322810  0.412452
## 2 2.673770  0.0147722 -4.50127  2.04992  3.102240  2.99153  7.120000  0.830942
## 3 3.358100 -3.4148800  1.81465  3.03073 -0.987833 -2.96006  0.594713  0.195196
## 4 1.966060 -3.9964500 -2.79979 -1.95266  0.727856  1.33037 -2.553200  5.371020
## 5 4.215990  1.6974900 -2.43417 -1.75712  1.000220  7.04984 -0.121104  0.675001
## 6 0.760275  0.5023750 -1.60718  2.58132 -1.260870 -1.52995  2.888590 -0.886037
##       PC481     PC482      PC483      PC484     PC485     PC486     PC487
## 1  3.554580 -1.461740  0.0914599 -3.2171800  0.655798  3.843490  2.011310
## 2  7.372670 -5.650640  3.4037300 -0.0880227  4.858760  3.624130 -1.305790
## 3 -2.255540  3.775280  1.3882900 -1.7515100 -0.668557 -3.536550 -1.459290
## 4  1.432980  5.346770 -2.6040100  1.6998900 -1.660010  6.209710 -0.313448
## 5  0.450003 -2.554510 -0.2082930 -1.0672200  5.025500  0.353257  0.968105
## 6  3.231290 -0.392056  1.5970900 -0.1756650 -1.125020  1.700380  0.606852
##       PC488    PC489    PC490     PC491     PC492      PC493     PC494
## 1  3.802490 -4.42576 -2.72845  7.621920 -4.325580 -0.3197550  3.667250
## 2 -0.273779  4.25418 -2.62627 -8.714900  2.932970  0.0685562  0.559071
## 3  1.703190  2.56347  1.85937 -0.324487  1.980840  1.2350700 -6.113290
## 4 -1.095370 -2.46304 -2.46801  2.913660 -3.140380  0.4899490 -5.079530
## 5 -2.027000  1.03381  3.38786 -5.432900  1.385940 -2.4002500 -3.183000
## 6 -0.680082  2.57602 -1.26008 -2.528290  0.896173  0.3928170 -1.163320
##       PC495      PC496    PC497     PC498     PC499     PC500     PC501
## 1  2.152860  5.7872700 -6.75241  4.888950 10.529200  0.598650  5.330280
## 2 -0.160883 -2.1484100  4.78026 -5.771790 -3.471150  0.939116 -2.290020
## 3  6.756230 -0.0277912  1.17952  6.194580 -0.993496  0.656219  4.909960
## 4  4.118470 -1.4353600  2.49865  3.763830 -2.638930  1.726980 -4.746170
## 5 -4.630090 -0.3568730 -1.52424  0.184014  7.265410  5.768670 -0.799854
## 6 -1.031990  0.3377750  2.11723  1.698640  2.682820 -1.489850  2.085970
##       PC502    PC503     PC504     PC505     PC506     PC507    PC508     PC509
## 1  1.147590 -6.18349  2.441420  2.589680  0.945649 -0.938806  5.70242 -1.472610
## 2  0.584227 -3.77973 -0.269226  5.746340 -2.132790 -6.135870 -3.78667 -1.808490
## 3 -1.617680  3.19440 -0.131290 -3.426400  2.990390 -1.383630 -1.41255 -1.592690
## 4 -2.249930  6.06621 -5.229590 -3.874520 -0.932163 -4.240970 -1.71753  5.178590
## 5  4.279080 -2.11966 -0.039880  2.324150 -6.198930  0.201677 -1.85836  3.175350
## 6 -0.180856  2.28419  0.209238 -0.384053  0.195751  1.556930  1.70532 -0.573656
##      PC510     PC511    PC512     PC513    PC514    PC515    PC516     PC517
## 1  2.49245 -0.170717 -0.65352 -1.421020 -3.11407 -1.79039 -2.01058  0.322580
## 2 -4.53870  6.674060  5.31818  5.727520 -3.67606 -0.36751 -3.44218 -1.265880
## 3 -1.75669 -0.131597 -2.17060 -2.524800 -2.97285 -2.59630  3.07011  3.589120
## 4  7.61641 -0.945926 -7.50184 -0.244245  1.73440 -6.08824  2.38231  1.054690
## 5  1.51179 -4.933460  1.81896  5.225040  3.30415  3.05736  1.43495 -2.695320
## 6  1.69116 -2.429450  1.22643  0.605065 -0.58304  1.19657  1.36024 -0.223375
##       PC518     PC519     PC520      PC521    PC522    PC523     PC524
## 1 -0.563165 -2.154070 -0.845365 -0.6483500  1.83221  2.88531 -1.725740
## 2  0.301268 -3.229540  0.873406  1.7891300 -5.46300  7.75819  2.325860
## 3 -0.767775 -0.220158 -4.583340 -3.5858600 -1.04207 -5.68721 -0.233143
## 4  1.791160  1.734350 -2.313860  1.5824700  3.74753 -1.16347  0.521510
## 5 -1.746250 -1.497070  0.906723  2.1055100 -2.84490 -2.15630  0.213521
## 6 -1.645620 -0.315717 -1.094820 -0.0148446 -1.29688  2.53103  0.295172
##        PC525     PC526     PC527     PC528      PC529    PC530    PC531
## 1 -2.1186300  1.580550 -0.393534 -3.340450  0.7839050 -4.66696 -3.14469
## 2 -0.0507085  0.426393 -0.383365 -4.232440  2.2558000  3.93817  2.57636
## 3 -1.5023100 -1.351400 -0.664369  0.516294 -0.9451710  3.92352 -1.64968
## 4  0.7228330 -5.733700  7.168400  4.020570 -3.8497600  2.84411  1.62315
## 5 -1.2113900  5.624170 -4.654230  1.694480 -1.3925100  1.42230 -2.14384
## 6  0.0801718 -0.170995  0.501421  0.400535 -0.0388899 -0.12602 -0.49322
##       PC532     PC533     PC534      PC535     PC536     PC537     PC538
## 1  3.405020 -1.023090  6.971360   3.084210  0.583564 -1.490980 -5.154550
## 2 -6.307960  1.694060 -6.513580  -1.381010 -7.136960 -2.431020 -2.209950
## 3  0.736933 -0.884715 -6.036790 -11.649600  7.088760  2.491950  3.449040
## 4 -1.267600 -1.954300 -1.052460   3.190740  1.285960 -0.221289  0.805661
## 5  2.102560 -1.143910  0.266658  -2.888100  2.042110 -0.211161  0.818581
## 6 -0.181220  1.334190 -1.171840   0.729099 -0.210794  0.856780 -0.167919
##       PC539     PC540     PC541      PC542     PC543       PC544     PC545
## 1 -2.207470  0.758238 -3.335490  5.4590000  1.221180  1.63972000  4.427280
## 2 -0.911507 -1.389160  3.149200 -3.8821200 -3.431940 -6.08617000 -4.603970
## 3  3.543010 -1.516680  1.850980 -1.5179400 -0.407301  1.66806000 -7.206650
## 4 -2.818540 -1.144380 -2.341230 -7.4721300 -2.596550  2.33664000 -3.276970
## 5 -1.459910 -2.492320  1.155440  0.0787301  3.107540 -0.00789298  0.538279
## 6  0.139411  0.726248  0.900952  2.1527200 -1.238480  0.32846000  0.776762
##       PC546     PC547     PC548     PC549    PC550      PC551     PC552
## 1 -0.592321 -3.950480  5.398870 -0.259369 1.346980 -3.0258500  2.398070
## 2  0.868614 -0.776384 -3.824660  2.338640 0.631417 -1.0236800  3.124520
## 3  4.242580  2.912100 -2.691830 -5.502290 1.836520 -1.5376700 -7.697860
## 4  0.600199  4.452940 -1.611770 -0.782176 1.684540  0.0141803 -2.459820
## 5 -0.293807  3.026430 -6.600290 -1.351960 0.712053  0.9871010 -0.721727
## 6  2.176100 -0.281714 -0.211135  1.887900 0.134043 -1.3467200 -1.413170
##       PC553      PC554     PC555     PC556       PC557     PC558     PC559
## 1 -0.286456 -0.0663779  3.476500 -1.351740 -1.64359000 -0.778570 -1.306260
## 2  1.589360 -1.7513600 -3.728780 -0.730756 -0.00663453 -0.484378  1.384330
## 3 -7.170830  2.0107000 -9.569970  3.974570  3.56960000  8.585820  1.739970
## 4 -6.107070  1.8487100  2.812830  3.488180  0.66171100 -1.753540 -2.556800
## 5 -0.694254  0.4203220 -6.285140  1.427040  2.70552000  0.324602 -0.267143
## 6 -1.441920 -0.3393800 -0.212959  1.152500 -0.77473400  0.450112 -1.014180
##       PC560      PC561    PC562    PC563     PC564     PC565     PC566
## 1  2.825240  0.0995965  1.57620 -3.60296  0.670159 -3.209690  1.550600
## 2 -2.489070 -4.0515200 -3.70281  6.60162 -2.458920  6.003580 -7.124020
## 3 -1.748750  2.9037700  2.62248 -6.01691  1.100880 -4.439560  5.372880
## 4 -4.708210  2.8882200 -1.32227  2.67467  2.624540  0.743727  2.569070
## 5  0.653901 -0.5110950  1.18163 -2.79985 -1.797680  0.198578 -3.013150
## 6 -1.833120 -1.2206000 -1.99551  1.19657  0.685755  0.234363 -0.634971
##       PC567     PC568     PC569    PC570     PC571    PC572      PC573
## 1  0.989093 -0.459252  0.975044  2.73825  1.134910 -2.30975  0.2222010
## 2 -0.929648 -3.040190  0.822756 -1.69767 -1.093300 -2.08528 -0.0283054
## 3  1.996950  6.582050  0.692111 -2.92630  1.409960  6.58744 -1.6235900
## 4  0.980353 -3.243020  5.102860 -2.73461  0.253183  3.93572 -0.1543540
## 5 -1.986100  4.835600 -1.979180 -2.03141  0.057754 -4.35120 -0.0373671
## 6 -0.800821 -0.265115 -1.446920 -1.63397 -0.565600  2.08011 -0.6737900
##       PC574     PC575     PC576    PC577        PC578     PC579     PC580
## 1  1.188210  0.870558 -2.113010  1.37526  0.000623485  2.240530 -0.459305
## 2 -3.579570  3.242020  0.369332 -3.67347 -2.681540000 -3.973060  0.369044
## 3  0.950265 -0.902675 -3.054820  1.31367  2.586150000  0.692345  1.026450
## 4  2.626630 -0.128662  6.096140  1.26551  0.796442000 -0.817054 -1.224270
## 5 -0.530347 -0.572138 -1.720590  1.32734 -0.445303000  1.693790  0.569703
## 6 -0.331061 -0.566017  0.365110 -0.71432 -0.226785000  0.743208  1.396390
##       PC581     PC582     PC583     PC584     PC585     PC586     PC587
## 1  0.900178 -0.975244  1.351650 -0.764878  0.411443 -3.608420  0.816712
## 2  0.230988 -0.649956 -1.625020  3.628170 -3.325320  1.478650  1.625470
## 3 -3.870950  3.023630  2.915330 -1.153350  2.061790  0.483450 -5.288950
## 4 -0.117613  1.489090  2.486810 -0.658991 -0.150000 -1.361030 -2.080040
## 5  2.659190  2.698140  0.107061  0.738584  0.576776  2.061340 -0.933789
## 6 -0.741153 -0.478477 -1.091250  0.432916  0.536083 -0.676711 -0.147207
##        PC588     PC589     PC590     PC591      PC592      PC593     PC594
## 1  0.0384876 -2.219890 -0.408084 -1.612930 -1.4117400 -0.2201720  1.179640
## 2  2.5240200  1.151330 -0.354449 -0.381923 -0.5873530  0.6151750  1.015280
## 3 -7.6461100  0.354336  2.357310  5.889990  0.2842930  1.6530400 -0.253811
## 4 -1.0604400 -0.817802  2.861370  4.752060 -1.3637300 -0.4553330  0.113574
## 5 -0.3504120 -1.044770 -0.397141  0.717417 -1.2142500  0.0818215  0.682315
## 6 -0.7484500  0.578017  1.178260  1.426280 -0.0826511 -1.7031100 -0.406109
##        PC595     PC596     PC597      PC598     PC599      PC600      PC601
## 1 -0.0430501 -0.664985 -0.157422  0.0947256  2.419170  0.6925310  0.0561172
## 2 -2.9241400  1.393060  1.208010 -0.6697120 -1.009420  0.6709490 -1.2857100
## 3  3.9703000  2.300020 -3.190190  1.5798000  0.306442 -3.5776800 -0.9908110
## 4 -0.7069660 -2.730620 -1.374650  0.4564930 -1.200290 -1.1788900  1.8492300
## 5  1.2877000 -2.035230 -1.534000 -0.2660950  1.271710 -0.0783648 -3.0542800
## 6  2.1880700  1.462530 -1.806420  0.0631933 -0.662056  1.1882700 -0.9711610
##       PC602      PC603      PC604      PC605     PC606     PC607      PC608
## 1 -0.422698  1.2217300  1.1282100 -0.0968164  0.883596 -1.851110  0.2777940
## 2  2.119190 -0.2993870  1.1771100  3.2232000  1.750800 -0.367609 -0.0249427
## 3 -4.576820 -0.8257750  1.2293200 -3.1137200 -0.268914  3.724350 -1.6254500
## 4  0.316105 -0.5618170 -0.1933880 -0.6756260  0.588500  1.240880  0.6906870
## 5 -2.893920 -0.8409080  0.0943487 -1.0693300  0.974841 -0.101900 -1.5808500
## 6  0.217015 -0.0314555  1.2089000 -0.0546657 -0.414050 -1.478620  0.2828550
##        PC609     PC610      PC611      PC612      PC613     PC614      PC615
## 1  0.9498830  0.482229  0.0376125  0.1797420  1.8884100 -1.172590  0.0356375
## 2  0.1552650 -0.364791 -1.1793100 -0.9907910 -0.4088200  0.901477  0.1978340
## 3 -1.4347400  2.588180  2.9292400 -0.8670590 -0.0932232  1.349220 -1.0828100
## 4  0.0549793 -0.170540 -1.0536100  0.1080440 -2.1936100  3.458960  1.8271800
## 5 -0.8816780  0.193019  0.1477220  0.0441914  1.2076800  0.291053 -0.6286800
## 6 -1.0314900  1.159970 -0.5925440  0.4074700 -0.9531180 -0.382572 -0.0261295
##        PC616      PC617     PC618     PC619     PC620     PC621     PC622
## 1 -1.2742200 -1.0166500  1.017700  0.413055 -0.615057  0.933675  0.561719
## 2  0.0792599 -0.2762350  0.377416 -1.354380 -1.201900 -0.260239 -0.613003
## 3  0.9567240  1.8708200  1.603800  2.505120  0.409465 -1.091240 -1.182830
## 4  0.7979200  1.3687000 -2.398750  2.090690 -1.684470  0.237746  1.055300
## 5  0.0248111  0.7346960 -0.984834 -0.284761  0.104881 -0.605211  1.480840
## 6  1.2182000 -0.0674913 -0.194666 -0.839569 -0.573405  0.038825  0.137123
##        PC623     PC624      PC625     PC626     PC627     PC628      PC629
## 1  0.1222350 -1.655880 -1.0240800 -0.398773 -0.372284  0.479921 -0.0538361
## 2  0.7523290  1.770930  0.9077600 -0.342363 -1.178260 -1.118970 -0.8612290
## 3  0.4812560  0.754075 -1.5656300  0.225430  0.780495  0.447120 -0.1822280
## 4  1.0049600 -0.483412  1.6803900 -0.427996  0.511488  0.817202 -0.3105760
## 5  0.9491250  0.715926 -0.3017880  1.305980  0.541594  1.282800  0.2009910
## 6 -0.0150364 -1.275430 -0.0555473 -0.887926  0.135793  0.881540  1.3025200
##        PC630     PC631     PC632      PC633      PC634      PC635      PC636
## 1 -0.6730140  1.329650  0.654511 -0.4903520 -0.3914520 -0.3719210 -0.9230830
## 2  0.0548119 -1.407510 -0.869820  1.1010700 -1.0294800  0.5386230  0.5480640
## 3  0.5820030  0.347856  0.200176 -0.3495600  1.3099300 -0.0265796 -0.5817040
## 4 -0.1259030 -1.563200 -0.721731  0.1302550  0.6358970  0.2288740 -1.1053800
## 5  0.5597680  0.776038 -0.203955  1.5193500 -0.0816816 -1.5026400  0.0457431
## 6  0.5599610 -0.573395  0.917824 -0.0270427  1.3210200  0.6686640 -2.0031600
##       PC637      PC638     PC639     PC640     PC641     PC642      PC643
## 1 -0.545903 -0.2611980  0.663981  0.148466 -1.352060  0.534462 -0.0320718
## 2  1.160670 -0.6517060 -1.015110 -0.414513  0.627800 -0.762995  0.5611680
## 3 -0.377351  1.2528800  1.005600 -0.662310  0.233966 -0.474998  0.5785380
## 4 -0.285090 -0.0676641 -0.187437 -2.525190  0.409769 -1.207560 -0.3914100
## 5  1.077120 -0.3668680 -0.679117  0.429045  0.409687  0.160206  0.1937180
## 6 -0.929517 -1.1370400 -1.731310  0.106299 -2.042490  0.202262 -1.0385800
##       PC644      PC645     PC646     PC647      PC648      PC649      PC650
## 1  0.695318 -0.5732730  1.419190 -0.376348  0.7357110 -0.8545110 -0.3704340
## 2  0.425374 -0.7022600 -0.923579  0.105626  0.2854380 -0.2521310  0.9802470
## 3 -0.291940  0.6470010 -0.476133 -0.762751 -1.1635700 -0.0828889 -0.0932712
## 4  1.453290  0.0768286 -1.272050 -0.611429  0.4130000  0.2926600 -0.2423210
## 5  0.215671 -0.0764362  0.256408  0.724395  0.0163036 -1.2054800  0.2619800
## 6 -0.760264 -2.0082000  2.892230  2.303850 -2.2543100 -0.8165350 -0.6895360
##         PC651     PC652     PC653     PC654      PC655     PC656      PC657
## 1 -0.29158400 -0.956123 -0.972026 -0.877605 -1.1494000  0.829165 -0.0530470
## 2 -0.00710967  1.328450 -1.379670  0.160955  0.2285950 -0.466127  0.0432664
## 3  0.50577700 -0.142748  0.781087  0.590896  1.3880000 -0.124747  0.3939730
## 4  0.89601900  0.405259  0.029616  0.808107  0.0401254 -0.356069 -0.0146783
## 5 -0.26726200 -0.468042  0.183892  1.217820  0.2050710 -0.709634 -0.9518690
## 6  1.56279000 -1.084490 -1.147930 -3.845710  3.3993000  3.065770  1.6460700
##         PC658      PC659      PC660      PC661     PC662     PC663      PC664
## 1  0.00773703 -0.5788180  0.4602070  0.0772628 -0.302533 -0.457385 -0.0492801
## 2  0.58495000  0.6189400 -0.1186510  0.5637610 -0.520939  0.256424 -1.5277700
## 3  0.23331200 -0.3534350  0.6300210  0.1764910  0.155248 -0.325883  0.2595370
## 4 -0.17203300 -0.0365733 -0.1475710 -0.2947840  0.147316 -0.172945  0.7324470
## 5 -0.61314100  0.0015057 -0.1592740  0.3222340  0.147858  0.313532  0.1500250
## 6  1.63719000  1.7095000  0.0199371 -3.7314300  8.424690 -6.823410  0.6166360
##        PC665     PC666      PC667     PC668      PC669      PC670       PC671
## 1 -0.0482858 -1.071640   0.307679  0.517415  -0.247526 -0.2724220   0.0570314
## 2  0.4070540  0.978048   0.505680  0.441660   1.310400  0.2838230   0.3236680
## 3  0.1722620  0.860721   0.116949  0.344689   0.648392  0.0017265   0.1882100
## 4  0.0770676 -0.702335  -0.113037 -0.185621  -0.649254  0.1925390   0.0629604
## 5 -0.1966550 -0.104446   0.168875  0.373712   0.356984  0.2672210  -0.2675250
## 6  2.2520500 -9.838060 -10.543700 -2.177630 -18.570900 -4.9561400 -10.5035000
##        PC672      PC673      PC674      PC675      PC676      PC677       PC678
## 1  0.3658390 -0.2220490 -0.4542430 -0.1059320  0.0668257 -0.1227860 -0.07169060
## 2 -0.3572560  0.3813790 -0.0584039  0.3682240 -0.0438910 -0.2437420 -0.62563900
## 3  0.0525116  1.1491300 -0.0821967  0.0657877 -0.3136410  1.0217800 -0.12046900
## 4  0.4529030  0.1608160  0.0197362 -0.6737840 -0.4331220  0.0589937  0.28747700
## 5 -0.3019860  0.1766060  0.2178910  0.6849810  0.3028740 -0.8538600 -0.00102422
## 6  1.3322600  0.0338846 -2.9810000 -1.6761600  1.4657700 -5.6600100  1.48465000
##        PC679      PC680      PC681      PC682      PC683      PC684      PC685
## 1 -0.1012020  0.0386973  0.2632050  0.1172300  0.3892970  0.0890863  0.3201930
## 2 -0.1120630 -0.0705258 -0.0886319 -0.1414360  0.0814943 -0.0653639  0.1095060
## 3  0.0830344  0.2721590 -0.3359110 -0.3258950 -0.0744264 -0.1506270 -0.1918520
## 4 -0.1225620  0.1521610 -0.1445910  0.0501605 -0.2175600 -0.0702993 -0.0326227
## 5  0.3900610 -0.3917050 -0.4784030  0.1387840 -0.2413210 -0.1031130  0.1312310
## 6 -0.3688850  0.3400610  0.4379240  0.9850670 -0.5071560 -0.6654800 -0.7090970
##        PC686      PC687        PC688 Individual          region Pop_City
## 1  0.0385472  0.1949580 -6.61228e-07        801 Southern Europe   Durres
## 2 -0.2263910 -0.0350247 -6.61228e-07        802 Southern Europe   Durres
## 3  0.3104160 -0.3268830 -6.61228e-07        803 Southern Europe   Durres
## 4 -0.1771030  0.0535150 -6.61228e-07        804 Southern Europe   Durres
## 5  0.0731578  0.0622075 -6.61228e-07        805 Southern Europe   Durres
## 6  0.6414640  0.0648584 -6.61228e-07        806 Southern Europe   Durres
##   Country Latitude Longitude Continent Year          Region   Subregion order
## 1 Albania 41.29704  19.50373    Europe 2018 Southern Europe East Europe    25
## 2 Albania 41.29704  19.50373    Europe 2018 Southern Europe East Europe    25
## 3 Albania 41.29704  19.50373    Europe 2018 Southern Europe East Europe    25
## 4 Albania 41.29704  19.50373    Europe 2018 Southern Europe East Europe    25
## 5 Albania 41.29704  19.50373    Europe 2018 Southern Europe East Europe    25
## 6 Albania 41.29704  19.50373    Europe 2018 Southern Europe East Europe    25

3.2 Create PCA plots

PCs 1 & 2

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "PCA_lea_euro_global_pc1_pc2_r01.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

PC1 and PC3

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "PCA_lea_euro_global_pc1_pc3_r01.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)

3.3 Re-do PCA plot with new colors

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 73 Levels: ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 73

Check the regions

unique(sampling_loc$Region)
##  [1] "North America"   "South America"   "Western Europe"  "Southern Europe"
##  [5] "Eastern Europe"  "East Asia"       "South Asia"      "Southeast Asia" 
##  [9] "West Africa"     "East Africa"     "Indian Ocean"    "Central Africa" 
## [13] "North Africa"    "Caribbean"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population     PC1     PC2     PC3      PC4      PC5      PC6      PC7
## 1        ALD 11.3046 3.85130 19.7833 1.687230 -31.9742 -7.03582  2.87450
## 2        ALD 11.3286 4.09234 21.6678 2.240930 -35.6509 -6.67780  1.30074
## 3        ALD 10.9877 2.96010 27.1215 1.722600 -27.0213 -8.93229  4.56509
## 4        ALD 12.5399 3.70106 23.5571 1.923170 -32.8265 -4.32701 -0.54051
## 5        ALD 10.6210 6.13863 18.5964 0.106578 -28.5055 -4.24365 -2.14243
## 6        ALD 10.3700 3.24124 23.0999 0.624742 -34.5679 -8.77115 -1.64540
##        PC8       PC9       PC10      PC11      PC12     PC13       PC14
## 1  8.76051 -2.390600 -1.1133700  1.409550 -2.432570  2.21439  0.0831294
## 2 10.53680  0.572231 -0.0447769  5.148600  1.885100  6.72702  0.3359080
## 3 12.34000  2.083830 -2.5534900  5.418010 -3.295510  6.81957 -2.2182600
## 4  8.67068  3.930570 -1.8172500 -0.958173  3.130710  1.46826  3.9380100
## 5  6.97025  1.488870 -2.2270600  2.202370 -1.782360 -3.09520 -2.3885500
## 6  8.84329 -1.938790 -1.5921900  6.365010  0.288914  5.53801 -2.6206200
##        PC15     PC16      PC17      PC18      PC19     PC20      PC21     PC22
## 1  1.591980 -3.89981  0.846729 -1.512010  1.275050  5.20659  0.355472  6.59573
## 2  2.506320 -1.82679  4.219520 -2.663210  0.722739  6.76211  4.321160  5.56285
## 3 -0.176634 -3.29138  3.994920 -3.303390 -3.444090  4.72517  2.293660  1.81743
## 4  1.702520  1.34122 -0.943790  0.525896 -0.857543 -2.31975 -2.554890 -1.97683
## 5 -0.245045 -4.12400  2.278240 -4.605470  4.697470  1.25214  3.550810  7.89109
## 6  3.673660 -1.73236  1.017960 -2.620330 -0.748756  4.38833 -0.176724  5.28283
##         PC23      PC24      PC25      PC26      PC27     PC28     PC29     PC30
## 1  3.2310000 -3.782110  0.343321  3.452750 -0.397645  8.59238 -6.38829  2.67176
## 2  0.1545930 -0.973830  0.824802  5.002070 -1.803110  7.84339 -2.56195  2.51685
## 3 -0.3103470  0.104312  0.789867  5.413510 -3.317490 11.40880 -3.64877  3.30967
## 4  0.0302859 -0.871619  2.124920  5.503500  0.533652 -1.33150 -3.21115  2.07990
## 5  5.7531100  2.564260 -1.794590 -0.455631 -0.109703 12.61800  7.14574 -8.21520
## 6  2.0990500 -0.298802 -0.988705  7.396710  0.401363  5.97424 -9.19427 -3.69260
##       PC31      PC32      PC33    PC34     PC35     PC36     PC37     PC38
## 1  6.86863 -1.089500  -4.44339 6.67840 -3.42491 -4.82034  8.93957 -4.88268
## 2  7.74545  2.853570  -3.56519 4.42324 -1.18201 -3.85741  4.39299 -6.69470
## 3  8.60304  6.357500  -7.46373 4.15771 -4.83301 -2.46457  7.54084 -8.38108
## 4 -4.69375 -0.207922   2.10898 2.65389  1.70503  5.73929 -4.27603 -3.66764
## 5 -6.60411  4.734720 -12.04430 4.07524 -3.22002 -2.26138  4.88504 -6.55756
## 6  4.72237 -0.926283  -3.27779 5.47676 -1.57894 -8.10418  7.07028  7.59312
##       PC39     PC40      PC41      PC42      PC43     PC44     PC45       PC46
## 1 -6.78503 -4.42121   3.59483 -2.665010 -3.630720 15.46030 -3.96144 -6.4089000
## 2 -7.26412 -5.50867   7.42830 -2.154760 -0.773832  7.63320 -1.18746 -6.5392000
## 3 -5.62695 -5.01390   8.26299  1.203870  0.940659  1.56725  1.96505  0.5932230
## 4 -8.27641 -2.11220   7.04648 -2.259360  3.718930  1.64267 -7.67578 -0.5220810
## 5 -2.90173  0.32610  -3.06623 -0.994856 -8.462040  9.02437  1.05418  0.0444795
## 6  2.13255  8.45184 -12.27810  3.350630  0.377129 13.13020 -2.84936 -3.6098600
##       PC47      PC48      PC49      PC50     PC51       PC52      PC53
## 1 -3.71097 -1.082220 -2.766790  7.672670 12.30180 -3.1054600  1.924130
## 2 -2.01765  1.189410 -5.822020  4.647830  4.41900  0.0153929 -0.433726
## 3  3.39743  0.471181 -5.686080  0.208139  1.09238  0.1296940  7.671860
## 4  7.34806  0.639311 -7.172720 -1.552730  1.99029 -1.8731700 -3.060680
## 5 -5.90926  4.810760  1.229390  1.441610  6.24433 -0.0726317  0.420500
## 6 -4.74439  8.944300 -0.487939 -3.048420  2.38544  0.9313740 -7.630770
##        PC54      PC55      PC56      PC57       PC58      PC59     PC60
## 1 -0.683836  3.345480 17.363200 -1.485730 -0.0259309   1.56506  1.23083
## 2 -4.033930  2.998030 13.485500 -4.554240  2.5985500   3.97646 -2.31647
## 3 -3.867050 -0.788444 11.591500 -0.836219  2.9079800   4.70301  1.73630
## 4  4.549290  1.211220 -0.138582 -1.870110 -0.9434830 -12.62790 -5.61055
## 5  0.181434  4.545250  3.638140 -0.995323 -2.4282200  -3.61029  8.37504
## 6 -5.363230 -2.720780 -2.805080  5.570260 -0.5126940   8.06162  3.68012
##        PC61      PC62      PC63      PC64      PC65      PC66      PC67
## 1  12.36460   8.14968  4.424100  0.675101 -4.946100  0.666866 -0.539953
## 2   7.21885   5.87628  2.239920 -5.329600 -4.809330  0.698679 -3.867710
## 3  10.40760   5.37401  5.918490 -2.324100 -5.898300 -4.353950 -3.038200
## 4   3.67099   2.43420  0.184676  2.806220 -6.231890 -3.045850  1.087490
## 5   4.05643   1.76671  6.028750  1.079830 -0.638482 -0.808931 -8.573490
## 6 -12.93130 -12.34680 -1.453650 -9.402500 25.534300  2.088730  3.960290
##        PC68     PC69      PC70      PC71      PC72        PC73     PC74
## 1  3.626120  3.15212  3.178980 -0.322655   8.34466  -0.0736454  3.20536
## 2  6.192960 -1.63845  5.300660 -6.972940  10.38190  -3.4281300  2.21707
## 3  0.790238 -1.59328 10.640200 -4.497410   7.29813  -3.0353400  5.61416
## 4 -0.308131 -3.43526 -4.851840 -4.036010   8.68017  -4.5135500 -4.00738
## 5  4.745270  6.12391  0.487623 -5.248270   2.22806   4.2850400 -1.88139
## 6  5.207100  4.08905 -9.177160 14.790700 -10.49410 -14.7545000 -1.28016
##        PC75       PC76      PC77      PC78       PC79      PC80      PC81
## 1  1.560740 -1.1318000  3.079280 -2.294070   0.330213 -0.849971   0.23262
## 2  2.419960 -3.5366100 -2.952500 -0.300429   1.180380 -3.288850   2.46454
## 3  4.474900 -3.2787500  2.369970 -4.133800   6.488080 -0.821653  -1.46916
## 4 -2.897600  0.0746116 -6.816260 -1.939580   4.969250 -7.470980   1.76947
## 5 -0.452728 -3.6357600 -0.414089 -4.228540   2.659440 -3.243120  -6.26291
## 6  3.161660  7.8113200 21.747800  2.844890 -10.529600 -1.050340 -10.94520
##         PC82      PC83      PC84     PC85      PC86     PC87      PC88
## 1   3.676170 -2.114840  3.300130  9.77398 -0.501383  4.24391  2.821420
## 2  -2.625270  5.738870  7.397860  7.17553 -3.859310  4.76020 -4.966830
## 3   1.386140  4.294830 10.229400  1.45990 -6.178910 -2.86423 -3.815870
## 4   6.702490  2.631350 -0.272547  4.98237 -2.535640  2.31096 -3.023310
## 5   0.504438  0.151063 -0.428986  6.41210  4.387500  4.84532 -0.855451
## 6 -10.615300 -3.035320 -1.516010 -3.19946 -3.781570 -2.63837 -1.979470
##        PC89       PC90     PC91      PC92      PC93      PC94      PC95
## 1  0.881157  -1.591860  5.38173  2.853400  2.381060 -1.304460 -5.221110
## 2 -4.828930  -4.081890 -1.09502 -0.126301  3.998150  5.205870  1.319620
## 3 -6.985200  -5.026990 -4.65174 -5.034190  0.994824 12.556600  4.545030
## 4 -5.524060   0.811997  1.02417 -6.209200 -0.934143  3.319320 -1.919300
## 5 -1.640050   1.103730  3.49897 -2.529840  2.613670  3.429880 -5.707880
## 6 -9.966890 -12.861300  5.42910  0.589571  2.157570  0.951197 -0.826276
##       PC96      PC97       PC98       PC99     PC100     PC101    PC102
## 1  1.82553  2.370550  1.8149700 -0.9901620 -2.541920 -0.679209 -3.58714
## 2 -2.39237  1.638400  4.4738900  2.6497300  1.116190 -1.692700  1.95229
## 3 -5.22245 10.461600 -0.8455530  0.0263679  0.505576 -7.371870  1.94227
## 4  3.89914  4.791710  1.3550500  1.6973700 -1.274580  5.993940 -5.88560
## 5 -2.82533  0.932514  5.2839700  7.5794400  3.857620  1.928860  1.70852
## 6 -4.32066 -0.115444  0.0519455 -3.6473500  0.416684  2.038670  3.52975
##       PC103    PC104     PC105     PC106      PC107    PC108     PC109    PC110
## 1 -1.242230  1.37588 -0.282432  1.009850  4.5056900 3.105340 -3.378790  3.24386
## 2 -0.388432 -4.76404  0.346802 -2.296200 -5.0945900 0.967458  4.496360 -1.54846
## 3  5.974930  4.55647 -1.570030 -0.563826 -0.1233150 2.944420 -1.330780 -6.44084
## 4  0.486571 -5.57529  1.298180 -5.440210  0.4837610 3.749750  0.621117 -6.45818
## 5  5.042380 -3.03734 12.192400  3.364350  0.0474162 1.490450 -3.737110  6.32788
## 6 -4.821870 -4.92392 -1.975290  2.803510  6.6819200 4.907960  3.215450 -5.80926
##       PC111     PC112     PC113    PC114    PC115     PC116     PC117
## 1  1.831410  -3.56423 -0.829310 -6.09407 -1.41875  3.763020 -1.236830
## 2 -0.123996   2.05858  2.182990 -5.22162 -2.11596 -0.160551  1.039900
## 3 -2.496910  -2.32711  5.238510 -2.03523 -2.69885  7.127050  0.156205
## 4  1.704910  10.35100 -0.742679  7.69977 -1.23606 -3.155350 11.828700
## 5  5.810880   7.21787  3.713050 -4.69280  3.66361 -2.726250 -5.317630
## 6  8.105620 -10.90690 -6.465960 -1.26854 -1.50106 -5.159560  8.907690
##         PC118     PC119     PC120     PC121    PC122     PC123    PC124
## 1 -6.16092000  1.699850  0.186924 -4.391350  3.11780 -0.422223  4.95131
## 2 -2.08942000 -1.546120  3.111010 -0.949337 -1.59902  1.958100  1.83929
## 3 -0.34188400  0.749496  2.910160  1.304050  4.03203 -1.654230  7.92005
## 4 -2.17334000 -4.921240  1.410030 -7.839490  4.07305 -2.265450 -8.38001
## 5  0.00684361 -3.118290  5.235230  6.032060  7.83041 -1.207320 -0.43200
## 6  4.07670000  5.702270 -3.817560  6.174360 -4.08069 -2.418970 -0.97293
##        PC125     PC126     PC127    PC128    PC129      PC130     PC131
## 1   0.185832  0.261899 -6.282550  5.91919  6.23795  7.0226900  2.535890
## 2  -0.580664  5.848520 -1.924010  8.06666 -2.68879 -1.5571300  0.298089
## 3   7.351420  6.280290 -0.995556  7.52393 -5.38622 -0.0920857 -5.297550
## 4  -5.969310 -6.378260 -6.014520 -4.39762  3.20391  7.1223200  4.967480
## 5 -10.860400 -6.738010  0.320248  3.82667 -1.24171 -3.7832800  5.202910
## 6  -1.569440  5.852960 -0.186841  3.44772 -1.20364 -0.4914640 -0.200503
##       PC132    PC133      PC134     PC135     PC136     PC137      PC138
## 1 -7.091640 -4.09787 -0.9431710  3.730210 -0.179847 -2.858830   1.118670
## 2 -0.353264 -6.52274  0.0324326  3.545020  0.543437 -0.982884  -3.196480
## 3  1.967810 -6.04081 -2.9640000  5.207930  3.043190 -2.378460  -0.665147
## 4 -2.844440  1.84523  2.1313300  1.505590  6.049240 -3.552260 -10.050800
## 5 -3.937810  9.15740 -4.9289600 -0.265273  0.282665 -2.600760  -0.921349
## 6 -6.911200  1.92793  1.3839200 -0.159293 -4.199880 -3.750490  -2.533260
##       PC139     PC140    PC141     PC142     PC143    PC144     PC145     PC146
## 1  4.263590  6.099980  3.22012 -8.334070  1.299490  7.02878  0.628793  1.854180
## 2 -0.703574  7.821730 -2.22026 -1.010210  1.234940 -3.78188  2.323930 -3.586940
## 3 -2.674500  9.179140 -1.15101 -2.484550 -7.111730 -1.52159  6.152850 -8.405590
## 4 -5.241910  3.592440  5.13412  0.565826 -1.087230 -4.42264  1.112270 -0.866679
## 5 -4.036760  0.360688 -2.64851 -1.479770  3.857360  6.19061 -1.356600  2.374540
## 6  5.005410 -0.217569  3.18881 -5.517830  0.507129 -2.31484  5.017880  1.484370
##       PC147    PC148    PC149      PC150     PC151    PC152     PC153    PC154
## 1 -3.169590  3.90917  5.48849  5.5814600  2.969340 -1.10906  0.375404 -1.01594
## 2  0.941444 -1.69402 -1.08083 -0.8090390  5.699030  1.34852 -4.540270 -2.32031
## 3 -3.784180 -5.59783 -1.90505 -1.8694100  2.849310  5.91475 -3.382540 -2.34757
## 4 -3.553470 -3.55506 -2.91962 -0.0703231  0.377022  1.92900  1.305020 -7.54519
## 5 12.438800  6.40531 -1.03165  3.5773900  4.051250  0.97918  0.427298  6.82716
## 6 -7.824840 -2.85133  2.15418  5.0165100 -2.573520 -3.33185  1.175740  5.80516
##       PC155     PC156     PC157    PC158     PC159    PC160    PC161      PC162
## 1 -3.194590  6.344740 -4.716410  5.56698 -0.229742 -1.12558 -2.78257 -1.1763900
## 2 -2.299270  4.739130 -3.311780 -3.87053 -5.494820 -1.99021  2.77757 -6.3429000
## 3  0.131539 -0.691950  2.945900 -9.90679  0.222635 -4.41852  4.96841 -0.5344060
## 4  6.693760 -0.412623 -1.045450  2.78292  0.401828 -1.29800 -2.12093  0.0229272
## 5  0.645226  6.898780 -0.239179 -1.87662 -7.712930 -8.70063 -7.45579  3.9954700
## 6 -8.188180 -0.502378 -0.606133  8.85862 -6.430770  3.95163  3.67135  4.9203100
##       PC163     PC164     PC165     PC166       PC167      PC168     PC169
## 1  2.407920  0.504277 -6.207460 -1.821530 -1.46783000 -2.2010600  0.284804
## 2  0.160678  9.304710 -2.631630 -5.545960 -0.14424700 -5.9512900  4.902430
## 3  1.266740  0.489838 -7.443820  0.357423 -0.00106332 -4.8939300  0.550162
## 4  1.272300 -9.188160  5.111440 -3.451410 -3.36530000 -1.3892100 -3.437740
## 5 -5.355580  3.933350  0.553361  1.089580  4.15026000 -0.0174637  0.393706
## 6  2.439100  1.310280  3.313470  5.025000  3.10215000  1.9033900  1.265480
##      PC170     PC171     PC172    PC173    PC174     PC175     PC176     PC177
## 1 -2.06408  0.249062  3.206130  2.51127 -1.57129  2.806100  2.328480  0.940548
## 2 -2.31194 -3.141630  0.382582  2.01567  5.37382  0.446184  0.977171  1.229680
## 3 -1.36361 -0.628543 -3.132420  5.85005 -1.13194 -4.795800 -0.509625 -8.482140
## 4  9.70610  0.776337 10.806700 -3.03668  3.68210  2.316360 -1.512890  1.518280
## 5  2.00749 10.847100  4.886560 -2.65903 -5.96778 -5.692680  4.891790 -5.440430
## 6  3.31191 -1.190150  3.899690 -2.94469 -2.81133 -2.711750 -2.510280  3.611460
##         PC178    PC179      PC180     PC181     PC182     PC183     PC184
## 1 -0.00200687 -4.65122  2.3894400 -7.303210 -2.759210  2.754250  5.157660
## 2 -0.74188400  4.72168 -1.3900200 -8.020900 -0.766446  0.408351  3.968530
## 3  1.17209000  4.64376 -4.5449300 -1.867810 -5.384910  7.567070 -7.286060
## 4  8.23867000 -7.06576  2.5939400 -2.129160  1.646700 -3.702110 -0.560026
## 5 -3.79358000  6.30434 -3.0662400  4.405560 -3.986570 -2.528300  3.393280
## 6 -0.15949300  0.36457 -0.0979605 -0.803125 -3.550490  1.204050  5.197190
##        PC185     PC186    PC187     PC188    PC189     PC190    PC191
## 1  -5.122260 -2.875810  1.66973 -2.690040 -2.36216 -7.727470  3.27333
## 2  -5.199090 -1.588930  4.85938  5.565460  2.97993 -1.850640 -1.77663
## 3   3.780100  0.199628  1.42551  0.608429 -1.16493  1.421940 -0.28420
## 4 -11.393300 -8.025690 -3.60762  6.154500 -3.88123 -0.491429  4.93060
## 5  -0.683104  0.265958 -7.64458  0.818755  4.07498 -6.294480 -7.87804
## 6   3.152440  0.569911 -1.26830 -0.904214 -0.70014 -1.357030 -2.88737
##        PC192     PC193    PC194    PC195     PC196    PC197      PC198
## 1 -10.970500  3.395710  2.02413 -1.57175 -3.547260  1.39563 -3.6125500
## 2  -1.240410  2.429520 -3.98737 -2.97395  1.090970 -1.02960 -0.0936227
## 3   2.198030  1.188300  1.26478  4.66170  0.524950 -2.12740  6.8943600
## 4  -0.355432 -0.614295  3.02340 -1.12641 -7.986670 -2.69594 -2.7320400
## 5   2.761850  3.336300  1.12680 -3.44725  0.689305 -1.93704  1.8109300
## 6   1.778270  0.410245 -0.27013  1.71417 -1.849740 -4.71803  0.0304301
##        PC199      PC200    PC201     PC202     PC203    PC204     PC205
## 1  4.3506500 -0.1842840 -2.35063  2.435380  0.592889 -1.75159 -3.861130
## 2 -3.7102100  1.1534600  2.31505  3.295890  4.785610 -6.55696 -4.182160
## 3 -1.8342000 -0.0591787  1.43649 -1.125470 -1.176830 -5.18465  0.556054
## 4  5.9963000  4.2321100  1.46582  0.689867 -4.254750  6.18998  4.144820
## 5 -6.4662400  3.6972300 -3.45097 -5.285000 -5.222300 -5.51265  2.109890
## 6  0.0379097  1.2963200 -5.46168 -2.748510  7.776450  4.47502  4.427750
##      PC206    PC207     PC208     PC209     PC210     PC211    PC212     PC213
## 1  1.29632  6.80115  3.724990 -5.679960  7.245660  1.430160 -3.92452  2.438540
## 2 -3.83494  1.33549 -3.521190 -1.570180  6.505040 -0.768251 -1.45241  2.828640
## 3 -5.16227  1.28890  1.088420 -2.014510 -1.789410  6.007320 -1.91646 -0.565721
## 4  0.99046 -1.30097 -2.315050 -5.949760  6.741770 -2.065300  2.48424 -1.596290
## 5 -1.61388  2.94504 -0.124489 -0.257321 11.537800  0.399082 -6.19094 -6.520800
## 6  1.93542 -4.99764  4.708590 -3.235450  0.620947  0.889299 -2.53503  0.928740
##       PC214     PC215    PC216     PC217    PC218    PC219     PC220     PC221
## 1  4.028970 -4.886490  2.92810  5.481230 -3.43628 -8.39159  0.176600 -0.198451
## 2 -0.305079 -1.914920 -0.79662 -1.815350 -1.09437 -2.52244 -3.347810 -1.306930
## 3 -2.319340  1.040040 -1.62346 -3.834280  0.84389  1.46661  1.267470  1.828480
## 4 -0.255899  0.965714  7.46661  3.159170  5.80024  4.63414 -6.449860  1.305460
## 5  0.503550  7.551410 -2.29599  5.001530  4.75892  8.14829  4.804990 -2.737450
## 6 -3.560970 -0.402145 -2.13385  0.128978 -2.61625  4.07387  0.670273  0.929313
##       PC222     PC223     PC224    PC225    PC226      PC227       PC228
## 1  3.137110 -5.921780 -3.046360 -1.67911  1.24058 -7.4721300 -0.48883500
## 2  5.360180  4.655510 -0.385307 -1.37559 -1.85719 -5.6091900 -0.26706700
## 3 -2.584560 -0.296809  5.357860  4.38550 -2.74253 -1.7924800  0.00585376
## 4 -0.462546  5.945220  1.681390  2.55162 -2.71198 -0.0619051  0.59627000
## 5  5.466080 -3.546580  8.015470  5.28112  1.35408 -0.1151420 -0.39073600
## 6 -3.499260  1.654170 -0.164668  4.23481 -1.47545  3.8106900 -0.67177800
##       PC229     PC230    PC231    PC232     PC233     PC234     PC235     PC236
## 1 -0.484416 -6.591900  7.10378  1.20679 -0.953984  1.795180 -3.067130 -0.185464
## 2  5.944490  0.946733 -3.46878 -1.51164  0.568447  2.588420 -0.633609  6.386770
## 3  0.472092  2.519760  1.65108 -1.18434 -1.810870  2.063490 -0.997746 -1.331900
## 4  5.414530 -4.616250 -5.22295  3.60925 -1.318590 -5.395360  0.213695 -0.929498
## 5  1.747560 -3.011750 -3.00410  0.82996  0.798574  0.862193  2.458870 -4.114840
## 6 -1.438900  4.061510 -1.97774  6.04377  2.038440 -1.225040 -0.970757  2.346040
##       PC237     PC238     PC239      PC240    PC241     PC242    PC243
## 1  -2.60627  1.724900 -3.233350 -1.8882900 -2.71458 -1.530650 -1.79883
## 2  -1.67193 -5.356220 -3.979290 -4.1170300 -2.96122  5.222450  4.06640
## 3   2.33019 -0.447266 -3.185630  0.0358097  8.08531  0.866774 -1.22115
## 4 -11.29090  6.735320  5.308680 -0.9669070 -3.06053  1.807940  4.24437
## 5   1.17821  3.636750  0.234708 -6.4126800 -5.04591 -1.722030  6.87473
## 6  -1.91458 -5.204140  2.415640 -1.8898000 -2.21064 -2.173030 -1.47902
##       PC244     PC245    PC246     PC247      PC248    PC249    PC250     PC251
## 1 -0.141661 -3.046470 -5.59169  2.374930  3.7825000  7.28198 -1.39082 -2.122820
## 2 -2.937560 -3.264400  1.04026  1.934550 -4.5539700 -3.65782 -2.21004  1.590630
## 3 -0.158989  0.424500  2.31887 -0.986673 -0.6495400 -4.43763  1.94750 -0.639691
## 4 -0.387815 -4.758520 -5.30704 -0.800842 -3.1182100 -1.80576 -4.37313 -1.730690
## 5 -0.443691  2.042780 -4.16313 -2.437480 -1.8216800  1.89389 -3.22134  0.903353
## 6  2.181070 -0.869642  1.58744  3.757840  0.0782487 -2.07095 -5.05006  1.444780
##        PC252      PC253     PC254     PC255     PC256     PC257     PC258
## 1 -3.1109100 -0.0989007 -3.984490 -0.380065  0.773690  5.081620  3.796560
## 2 -0.0900249 -3.1492800  0.945347 -0.316746  2.787110  4.558370 -0.148475
## 3  1.8735200  5.0584200 -0.611032 -1.920500  3.709800 -0.484981 -5.442160
## 4 -4.2312100 -7.2820700  0.406724 -0.858350  4.568340 -1.386100  2.992070
## 5  8.4939100  4.2626500 -0.487946 -0.449387 -1.182860 -0.322571  6.044300
## 6 -2.1517800  0.5775150 -0.378945  2.891990  0.450424  0.249205  0.817690
##       PC259     PC260     PC261     PC262    PC263    PC264     PC265     PC266
## 1  4.554870 -0.180526  3.435920  0.225784  1.60653  3.76099  2.871990 -1.302680
## 2  1.288960 -3.907130 -1.382680  2.555630 -6.02580  1.71128  4.870660  7.403940
## 3  4.021820 -2.085190  4.568090 -1.944590 -2.48828 -3.21620  1.575480 -0.828938
## 4 -0.230136  3.909410 -3.996590  1.347380  3.39100 -2.32004  3.366520 -3.100750
## 5 -2.199270 -6.744820 -6.012390 -0.999186  3.85779  3.65402 -5.079180  0.360558
## 6  0.529356 -3.363430 -0.787424  0.286810 -1.05157 -1.30635 -0.712065  1.261690
##       PC267     PC268      PC269     PC270     PC271    PC272     PC273
## 1 -0.704708  4.322350 -1.1258300 -1.179440  0.675343  3.01902  1.970360
## 2  5.160620  3.635520 -2.4468800 -0.493435  3.050280 -1.56081 -8.733250
## 3 -2.105030 -0.509731 -0.0109466 -1.482940 -2.779740 -6.08347  1.363990
## 4  4.917970  4.606890  3.2853500 -1.722580 -1.324390  1.72342  5.424210
## 5 -2.559020  1.384370 -2.3572400  1.784000  3.650560 -2.38102 -2.517820
## 6  2.473910  0.508406  2.7605000 -3.296760  0.279406  1.45649  0.701155
##       PC274     PC275      PC276     PC277      PC278      PC279     PC280
## 1  1.076720 -6.456570 -4.3610100  2.232230  3.7653100  2.5232200 -2.577250
## 2 -2.348860  1.316240 -3.4896600  1.010140  3.5050800  1.3732900  1.907470
## 3  1.627800 -2.196950  5.0531400  3.953960  0.1296010  0.7704000 -1.831710
## 4  0.922302  2.941030  0.0113377 -2.077920  0.0954494  4.3804500  6.568490
## 5 -1.010390  0.577939  1.6901900 -1.417940 -0.2320480  2.0787400  3.394350
## 6  1.942790 -0.784017  1.2404900  0.799635 -1.3125100 -0.0911067  0.593806
##       PC281    PC282     PC283     PC284    PC285     PC286     PC287     PC288
## 1 -1.167470 -7.32710 -1.757210  6.263120  6.68303 -2.738220 -1.282220  9.083170
## 2  6.050040  5.06460  1.867210  2.773310  3.04143  1.122050  2.125570  0.786617
## 3 -3.118240  2.85255 -1.304560 -1.839270 -3.27678  2.887690  4.543650 -4.966050
## 4 -0.755247 -2.41956 -0.177232 -0.785872  2.57769 -4.062370 -2.953830  0.331381
## 5 -0.167794 -5.00791 -0.104553 -1.051420  1.41791 -0.746397 -1.887730  0.908154
## 6 -0.419569 -2.21488  2.496790 -1.429140 -1.06806 -1.826130  0.880564  2.172490
##      PC289    PC290     PC291     PC292     PC293     PC294     PC295
## 1 -2.24602  3.25972  3.101570  0.906306  3.789420 -0.418502 -0.835618
## 2 -1.97198  3.50750  0.107420 -1.740950 -1.272470 -2.388470  0.110258
## 3  3.27843 -6.82619  1.227160 -0.963813 -0.240709  0.520258  5.565670
## 4  4.31911 -2.41201 -0.813299 -2.981570  4.501630  2.012100 -0.720453
## 5 -2.20043  7.65197 -0.123663  0.949846  5.226230 -1.838180  5.166070
## 6 -1.55086  1.81220  2.017660 -0.907038  1.868770 -2.701710  3.248320
##        PC296     PC297     PC298     PC299     PC300      PC301     PC302
## 1  4.8994000 -0.354399 -2.316600  1.198970  7.127490 -6.4063800 -9.698180
## 2  4.2534900 -1.765790  4.649910 -0.824233 -6.574470 -5.6218900  3.017050
## 3  1.0221600  1.172740 -0.620871  1.926090  1.182940  1.2398100 -1.562080
## 4  3.5535700  2.434290  2.813040 -0.250536 -2.751430  0.4080740  4.008350
## 5 -3.9412800  7.063680  1.152600  1.067250  0.957400  6.0361900 -5.040300
## 6  0.0479087 -0.334840  1.281160 -1.611780  0.510062  0.0803322  0.585129
##       PC303      PC304     PC305     PC306      PC307     PC308     PC309
## 1 -1.244670 -0.0359111  3.494620  5.136460 -1.0652500 -5.897530 -2.196240
## 2  0.880336 -0.2842230 -5.833970  3.408550  2.8466200 -0.746355  3.100230
## 3 -3.920340 -2.6214400  5.745940  0.144089 -0.0390333  2.352300  0.797683
## 4 -3.766600  2.7632300 -2.127930 -1.375800  6.4100900 -4.232740  5.516660
## 5  2.002890  6.0349300 -2.486370  2.179430  9.5686900 -1.446080 -2.554080
## 6  2.285350 -0.0504606 -0.860408  1.275890 -0.3145560  0.263111  3.049160
##       PC310     PC311     PC312    PC313     PC314     PC315     PC316
## 1 -4.529300 -0.596955 -6.022760 -1.98718 -3.854230 -4.458500  4.820600
## 2 -1.279880 -2.394470 -2.733360  2.29759  6.911940  2.783710  3.275130
## 3  1.875160 -0.843038 -3.966130 -3.25928 -0.861628 -1.571350  4.746350
## 4 -1.148870 -1.579430  2.904310  2.57539 -1.109820 -0.791526 -0.364149
## 5 -5.018320  1.121770  7.693330 -1.24061  0.587698 -0.272952 -1.099740
## 6  0.552704  1.666260  0.578342 -1.21995  1.913870  1.801810 -0.304175
##       PC317     PC318     PC319     PC320    PC321     PC322     PC323
## 1  2.317450 -3.031970 -3.967150 -3.718840 -1.18691  3.754420 -2.247830
## 2  6.108240  0.961631 -1.638030  1.563570  2.16924 -3.449600  4.716840
## 3  1.737610  0.195350  4.031240 -1.256110 -4.96456 -0.360506  0.182904
## 4 -1.039470  0.732611 -1.700800  1.173080  4.33882  3.042740 -2.074310
## 5 -2.445130 -2.092750 -3.781580  7.753250  1.51262 -1.001810  0.216068
## 6  0.484572  0.124960  0.254165 -0.590667 -1.23423  1.284490  1.421910
##       PC324    PC325     PC326     PC327     PC328     PC329    PC330     PC331
## 1 -2.407790  3.52872  3.019310 -5.046920  1.570320  3.542260 -4.17224 -0.764133
## 2  2.885370  1.60306  5.788690 -0.411278 -5.938110 -3.153700  1.52610  8.171960
## 3  3.217630 -2.05684  2.295780  2.350170  7.088650 -2.384220 -6.56339  3.529360
## 4  3.601730  3.30243  0.271778 -0.458454 -0.321689 -1.276820 10.04940 -5.558870
## 5 -1.756110  7.21950 -7.370730  0.722412 -1.418190  4.488210  5.81059 -1.090410
## 6 -0.140333 -2.03585 -0.620517  0.324934  2.739080 -0.184163 -1.30837 -1.856670
##       PC332      PC333     PC334     PC335     PC336     PC337     PC338
## 1  2.403320 -3.4056100  3.337870  4.087050 -0.919898  1.374020 -0.956720
## 2  0.186044 -2.6618000 -0.863452 -0.624972  2.173400  1.361700 -3.670690
## 3 -2.177060 -0.5525700 -3.144710  2.214470 -2.243890 -1.964570  3.324350
## 4  2.962830  1.4213600 -1.762620  1.362520  4.203550 -1.478030 -0.558561
## 5  0.588064  6.0416500  5.217510 -3.452600  1.496190  0.221104 -5.046460
## 6 -0.592151 -0.0223541 -1.813180  1.348970 -0.975891 -1.191660 -2.384790
##       PC339       PC340    PC341      PC342     PC343     PC344     PC345
## 1 -0.880226 -1.21509000  3.04682 -2.4142100 -5.935350 -1.983460 -4.654590
## 2 -0.605932 -0.00555504  3.36160 -2.1675600  1.394030 -1.566820  2.968850
## 3  4.231470 -4.78404000  3.28742 -0.0250219  0.753464 -4.882620 -1.118300
## 4 -2.447590 -4.44271000 -1.14934  8.4456400  2.939360 -0.727864 -1.694790
## 5 -2.852770 -1.32246000  1.09861 -0.1515270  6.350420  7.626440  0.222928
## 6  1.691910  2.79154000 -3.56493  4.8764800  1.760920  1.908600  0.218869
##       PC346    PC347     PC348     PC349       PC350     PC351    PC352
## 1 -0.580142 -5.47046 -2.364440  1.107250  4.27404000 -0.412252  3.67231
## 2  1.987670  4.96409 11.246600 -1.562750  5.19936000 -0.521341 -2.80807
## 3 -4.398510 -1.68173 -0.847626  0.085437  0.46251200 -2.057560  2.52434
## 4  0.857038  2.08038  1.765370 -1.220690 -1.86166000  3.361190  1.92769
## 5 -2.248230 -3.16584 -7.307590 -0.262214 -5.46398000  1.412240  1.14520
## 6  2.221290  2.57069  1.317720  0.953769 -0.00655931  0.540784  1.76074
##       PC353     PC354     PC355     PC356    PC357     PC358     PC359
## 1  1.871810 -6.511980 -0.661564  3.378200 -4.47432  0.941510 -2.664150
## 2  0.063551 -2.677530 -1.001650 -4.270190 -2.68414 -0.182748  0.708255
## 3 -1.812180  6.216740  3.139380  4.413630 -1.97647  5.228330  2.181090
## 4 -0.700273 -5.511250 -4.511440  1.528640 -2.73488  0.625332 -3.168350
## 5 -4.372240 -0.261217  0.172529  4.927270  1.26310 -3.894150  3.667970
## 6 -1.519310  4.241000 -1.619820  0.725635 -1.29752 -0.255093 -0.706519
##       PC360     PC361     PC362     PC363     PC364      PC365     PC366
## 1  2.231740  4.099790 -8.543030 -1.588750 -1.157290 -3.5193500 -2.105260
## 2  1.397130  1.526930 -5.251060  5.153830 -5.638690 -4.5892300  7.809180
## 3 -5.802150 -0.321080  1.538390 -1.007440 -0.692839  2.2761400  3.638130
## 4 -0.785163  0.439312 -4.591540  6.905630  2.763150 -3.7990900 -4.649150
## 5  3.697240  1.214710 -0.744555 -0.815993  5.262620  2.7329000 -5.562210
## 6 -0.707137 -1.655810  0.165163  1.824010  0.922399  0.0216574  0.838715
##        PC367     PC368     PC369     PC370     PC371     PC372    PC373
## 1 -1.6720900  4.487680 -4.314740 -3.528740  2.225810 -4.667440 1.028090
## 2  0.1706070  0.427944 -8.202190  1.707700 -0.317252  2.776320 4.323410
## 3  1.1027500  1.074630  0.550941 -1.084960  0.709983 -1.953010 2.460610
## 4 -1.0847500 -0.633538  2.947630  1.980230 -0.123283  3.328190 3.106650
## 5  4.8595200 -0.519760 -2.005060  3.654230  2.527530  0.186395 2.013940
## 6 -0.0637026  0.906861 -0.779790 -0.610078 -2.564690 -0.551038 0.510095
##      PC374     PC375     PC376      PC377     PC378     PC379     PC380
## 1  7.02319  1.738220  6.596490 -0.8094320 -2.618450 -3.024410 -1.680290
## 2 -7.89084 -0.738554 -1.583800 -1.5258300 -3.943280 -1.357440 -6.709730
## 3  4.21677 -2.482110 -4.478720 -0.0608105  0.197405  0.299719  1.448730
## 4 -3.33134  2.823940 -4.081030 -2.2697700 -3.363470 -1.840450  4.161030
## 5  2.88541  0.485885  0.580940 -1.7494500  6.343700  2.654310  4.209810
## 6 -5.10864  1.730910  0.818387 -0.3118950 -0.834581 -0.560287  0.823605
##       PC381    PC382     PC383     PC384     PC385     PC386     PC387
## 1  1.224140 -4.46823  4.560820  0.336982  3.222050 -0.128295 -3.128460
## 2  2.085980 -2.11413 -3.411630  6.556230  3.376970  0.324062  2.453470
## 3  4.953880  2.79445  2.917030 -2.244600 -0.713586 -0.365991  0.134078
## 4  0.742917 -3.21043 -0.491023  5.074570  0.285338  2.363070 -4.858370
## 5 -8.116330  4.33748  1.892020 -4.607370 -0.826007  2.118930 -1.398930
## 6  1.033700 -1.65589 -1.777830 -3.430340 -0.767556  1.481900  2.696470
##       PC388     PC389     PC390      PC391    PC392     PC393     PC394
## 1  0.939928  0.933622  0.669828  2.5207300  2.17117 -2.860700  6.196000
## 2 -0.037260  1.490700  0.419819  0.1439770 -4.44610 -2.577600 -6.390070
## 3 -3.199150 -5.518490  3.181760 -2.8136800  1.66992  2.044430 -0.187200
## 4  1.497960 -0.326636 -3.596580  2.3397100  3.57688  1.227180  0.812500
## 5  6.972460 -1.416870 -3.069460 -2.8932200 -5.67699 -4.580810  2.630930
## 6  0.110215 -1.793140  0.266512  0.0441029  2.89785 -0.887878 -0.778045
##       PC395     PC396     PC397     PC398     PC399    PC400      PC401
## 1 -2.982830 -3.082900 -1.915160  0.805651  0.620796  4.94050  2.0283400
## 2 -3.178690  3.264820 -0.390678  5.573150 -1.369010  2.10913 -1.7665300
## 3  4.499490 -1.544360 -0.836570  4.431710 -5.337080 -4.37793  0.2483010
## 4  5.035120  1.209630  0.238963 -3.609800  5.509770 -1.35605  0.7613310
## 5 -0.591322 -2.202240 -2.334450 -0.538381  0.351640 -9.57576 -0.0717029
## 6  0.426598 -0.399882  2.091850  2.806660 -0.307129  1.51197  0.2879930
##       PC402    PC403     PC404    PC405     PC406    PC407     PC408     PC409
## 1 -4.114920 -1.97979 -0.808980 -1.91079  4.511750 -3.82357 -0.799932 -0.149239
## 2 -0.183011  2.31346  3.158090 -6.78478  2.630460 -6.62612 -7.284580 -1.642940
## 3  1.537450  5.09322 -0.578961  5.01701 -6.217660  2.83876 -1.389460 -3.389160
## 4 -2.623490  2.60056  0.689956  5.17085 -0.133193 -1.47182 -2.351500  2.767990
## 5 -2.331520 -6.66658  2.385440  8.46534 -7.742310  2.81319  3.648790 -2.183510
## 6  2.455320  1.00331  4.438930  1.72310  0.462323 -1.11461 -1.513510  0.549190
##       PC410     PC411     PC412     PC413     PC414    PC415     PC416
## 1  1.180210  7.201860  5.997450  0.796744  6.778560 -6.71034  3.086720
## 2 -2.246620  3.939280  1.488150 -3.055610 -2.258990  6.62703 -2.355380
## 3  1.748630 -0.637145  1.767430 -1.559500  5.161070 -2.28945 -2.964570
## 4 -0.186485  2.244710 -4.674540  4.427060  3.618180  3.84153  2.612780
## 5 -6.562460  3.455780 -0.511899 -1.848520  0.333890  0.88684 -8.437590
## 6  1.467220  1.914170 -0.209509 -3.444480  0.877921 -1.44130 -0.203141
##       PC417     PC418     PC419      PC420     PC421     PC422     PC423
## 1 -5.270000  2.669880 -2.737900  0.0972643 -1.743620 -1.627760  5.633320
## 2  5.311830 -1.166070 -5.028760  2.5614900  0.544232  5.968320 -0.237397
## 3 -1.070970  6.167360 -1.685960  0.2800240 -0.776307 -2.840230  3.627820
## 4 -1.219700 -7.430310  1.134160  5.7804800  0.770270 -1.164310 -5.490850
## 5  0.311861  2.239030  2.096780 -3.4352900  5.097890 -3.305730  1.374780
## 6 -0.633924 -0.160888 -0.692334 -1.4510800 -0.791833 -0.747096 -3.763220
##       PC424     PC425     PC426       PC427     PC428     PC429     PC430
## 1 -1.059930  1.189740  7.897540 -1.28273000  3.842900 -2.966280  0.098208
## 2  4.908080 -7.822090 -3.670350  0.00254577 -5.044020 -1.718650  2.294610
## 3 -4.824760  0.765343 -2.504620  2.69992000  4.290130 -2.310330 -3.080490
## 4  0.409329  3.696870 -5.035110 -9.54874000 -4.796550 -3.665010  4.221900
## 5  0.175582 -3.201250 -0.861944  3.02739000  0.143382 -1.756010  5.042660
## 6 -1.932680  2.291090  0.951139  1.05760000 -1.540200  0.810951  2.420680
##       PC431     PC432     PC433     PC434      PC435     PC436     PC437
## 1 -6.697050  1.685660 -0.243431 -2.448690   3.431410  4.924440 -0.557961
## 2  2.249520  1.899320 -0.886325 -2.881820   4.404680  3.758520  2.652000
## 3  0.561962 -0.963899 -3.753970  0.255719   4.539240 -1.637550  1.301970
## 4 -7.683000 -5.204160  2.251000  2.005980 -11.448100 -4.724820 -3.365900
## 5  0.568323  1.611650  4.376980  1.930420  -7.120900 -3.577540  0.190141
## 6  2.721070 -0.976344 -0.615873  0.295888  -0.452162 -0.825643  0.570102
##       PC438     PC439     PC440     PC441    PC442     PC443      PC444
## 1  2.027550 -1.307050  6.075990  2.392040 -5.98478  3.875640 -1.5105900
## 2  1.606860  0.106339  1.904550  1.165110 -3.41790  5.343510 -2.1010700
## 3 -0.804997 -0.222015  0.398814 -2.446710 -3.29946  0.474361 -0.1870860
## 4  1.322950  2.981560 -0.178201  0.445028 -3.96408  1.481280 -6.1553900
## 5  1.096330  1.982040 -3.747420  2.849590  1.18340 -2.205130  6.1859400
## 6 -1.661220  0.460001 -4.163780  1.734380 -1.60864  1.425830  0.0177583
##        PC445     PC446     PC447      PC448      PC449     PC450       PC451
## 1  0.0543639 -3.483880 -1.758580  5.8307800 -12.800700  8.665420  2.57629000
## 2  0.4838180  0.842091 -1.087880  8.0486200   4.420650 -5.093200  0.72606300
## 3  4.9778000  3.322760 -4.199770  3.2852100   5.431240 -0.278039  3.61754000
## 4  1.0217200 -2.943360 -1.568410  3.5185400   3.588530  1.165150  1.37150000
## 5 -0.2619590  2.436830 -2.134500 -6.2773600  -0.162487 -7.516650  0.00819288
## 6 -0.5146880  2.233280 -0.798793 -0.0443748   0.574122 -2.278650 -2.66079000
##       PC452     PC453     PC454     PC455     PC456      PC457     PC458
## 1 -2.663170  1.336180 -3.876230  0.128933  1.647270 -2.4688800 -0.747510
## 2  0.464246 -0.880251  4.770560  1.805550  9.946890 -0.2074640  0.724752
## 3 -3.159810  1.534980 -1.749210 -2.494610 -1.520250  0.9077070 -4.305340
## 4  0.709957  5.805050  1.884610  1.677390 -0.649724  0.5085680 -2.370760
## 5  2.752160 -0.550547 -5.337650  0.399748 -5.600140  0.0219237  4.681990
## 6  1.325480  0.154809 -0.385594 -0.776716 -0.788787  2.1430000 -1.536190
##       PC459      PC460     PC461    PC462     PC463      PC464    PC465
## 1  2.270570 -5.4933200  6.461500 -3.33142  3.142090 -12.198700  3.91248
## 2  2.549500  4.7613800 -3.256860  4.54163  2.767550   1.899950 -3.89990
## 3 -1.951570  0.0337128  6.954090  5.37897  0.262643  -2.775460  8.30044
## 4  0.219423 -1.0728000  6.479310  4.85522 -2.139290  -0.349628  4.27176
## 5 10.069900  2.1485000  0.896402 -4.57293  2.335340  -2.821520 -4.37676
## 6 -0.524228 -0.5470630  2.378640  2.67134 -0.211221   1.420820  1.73797
##       PC466      PC467      PC468     PC469      PC470     PC471    PC472
## 1  3.628020 -4.0244400 -8.1711400  0.310744 -0.8074060  2.912020 -2.53329
## 2  0.418647 10.4508000 -1.0979000 -1.964110 -0.8672260 -4.281280  0.76241
## 3  6.141740 -5.1453900  8.6116400 -5.823570  3.9905600  0.883324  2.46051
## 4 -2.623500 -6.0081400  2.5242500  7.108670 -3.1207300 -4.661080 -1.30211
## 5 -1.183000  0.0101678 -0.0661312 -1.290870  5.4439800 -6.449400  2.27396
## 6  1.824960 -1.2615100 -0.2402270  1.131610  0.0380535 -0.192693  1.20366
##      PC473      PC474    PC475    PC476     PC477    PC478     PC479     PC480
## 1 1.013180  1.6745700  2.96890 -5.48375 -6.103410 -4.61101 -2.322810  0.412452
## 2 2.673770  0.0147722 -4.50127  2.04992  3.102240  2.99153  7.120000  0.830942
## 3 3.358100 -3.4148800  1.81465  3.03073 -0.987833 -2.96006  0.594713  0.195196
## 4 1.966060 -3.9964500 -2.79979 -1.95266  0.727856  1.33037 -2.553200  5.371020
## 5 4.215990  1.6974900 -2.43417 -1.75712  1.000220  7.04984 -0.121104  0.675001
## 6 0.760275  0.5023750 -1.60718  2.58132 -1.260870 -1.52995  2.888590 -0.886037
##       PC481     PC482      PC483      PC484     PC485     PC486     PC487
## 1  3.554580 -1.461740  0.0914599 -3.2171800  0.655798  3.843490  2.011310
## 2  7.372670 -5.650640  3.4037300 -0.0880227  4.858760  3.624130 -1.305790
## 3 -2.255540  3.775280  1.3882900 -1.7515100 -0.668557 -3.536550 -1.459290
## 4  1.432980  5.346770 -2.6040100  1.6998900 -1.660010  6.209710 -0.313448
## 5  0.450003 -2.554510 -0.2082930 -1.0672200  5.025500  0.353257  0.968105
## 6  3.231290 -0.392056  1.5970900 -0.1756650 -1.125020  1.700380  0.606852
##       PC488    PC489    PC490     PC491     PC492      PC493     PC494
## 1  3.802490 -4.42576 -2.72845  7.621920 -4.325580 -0.3197550  3.667250
## 2 -0.273779  4.25418 -2.62627 -8.714900  2.932970  0.0685562  0.559071
## 3  1.703190  2.56347  1.85937 -0.324487  1.980840  1.2350700 -6.113290
## 4 -1.095370 -2.46304 -2.46801  2.913660 -3.140380  0.4899490 -5.079530
## 5 -2.027000  1.03381  3.38786 -5.432900  1.385940 -2.4002500 -3.183000
## 6 -0.680082  2.57602 -1.26008 -2.528290  0.896173  0.3928170 -1.163320
##       PC495      PC496    PC497     PC498     PC499     PC500     PC501
## 1  2.152860  5.7872700 -6.75241  4.888950 10.529200  0.598650  5.330280
## 2 -0.160883 -2.1484100  4.78026 -5.771790 -3.471150  0.939116 -2.290020
## 3  6.756230 -0.0277912  1.17952  6.194580 -0.993496  0.656219  4.909960
## 4  4.118470 -1.4353600  2.49865  3.763830 -2.638930  1.726980 -4.746170
## 5 -4.630090 -0.3568730 -1.52424  0.184014  7.265410  5.768670 -0.799854
## 6 -1.031990  0.3377750  2.11723  1.698640  2.682820 -1.489850  2.085970
##       PC502    PC503     PC504     PC505     PC506     PC507    PC508     PC509
## 1  1.147590 -6.18349  2.441420  2.589680  0.945649 -0.938806  5.70242 -1.472610
## 2  0.584227 -3.77973 -0.269226  5.746340 -2.132790 -6.135870 -3.78667 -1.808490
## 3 -1.617680  3.19440 -0.131290 -3.426400  2.990390 -1.383630 -1.41255 -1.592690
## 4 -2.249930  6.06621 -5.229590 -3.874520 -0.932163 -4.240970 -1.71753  5.178590
## 5  4.279080 -2.11966 -0.039880  2.324150 -6.198930  0.201677 -1.85836  3.175350
## 6 -0.180856  2.28419  0.209238 -0.384053  0.195751  1.556930  1.70532 -0.573656
##      PC510     PC511    PC512     PC513    PC514    PC515    PC516     PC517
## 1  2.49245 -0.170717 -0.65352 -1.421020 -3.11407 -1.79039 -2.01058  0.322580
## 2 -4.53870  6.674060  5.31818  5.727520 -3.67606 -0.36751 -3.44218 -1.265880
## 3 -1.75669 -0.131597 -2.17060 -2.524800 -2.97285 -2.59630  3.07011  3.589120
## 4  7.61641 -0.945926 -7.50184 -0.244245  1.73440 -6.08824  2.38231  1.054690
## 5  1.51179 -4.933460  1.81896  5.225040  3.30415  3.05736  1.43495 -2.695320
## 6  1.69116 -2.429450  1.22643  0.605065 -0.58304  1.19657  1.36024 -0.223375
##       PC518     PC519     PC520      PC521    PC522    PC523     PC524
## 1 -0.563165 -2.154070 -0.845365 -0.6483500  1.83221  2.88531 -1.725740
## 2  0.301268 -3.229540  0.873406  1.7891300 -5.46300  7.75819  2.325860
## 3 -0.767775 -0.220158 -4.583340 -3.5858600 -1.04207 -5.68721 -0.233143
## 4  1.791160  1.734350 -2.313860  1.5824700  3.74753 -1.16347  0.521510
## 5 -1.746250 -1.497070  0.906723  2.1055100 -2.84490 -2.15630  0.213521
## 6 -1.645620 -0.315717 -1.094820 -0.0148446 -1.29688  2.53103  0.295172
##        PC525     PC526     PC527     PC528      PC529    PC530    PC531
## 1 -2.1186300  1.580550 -0.393534 -3.340450  0.7839050 -4.66696 -3.14469
## 2 -0.0507085  0.426393 -0.383365 -4.232440  2.2558000  3.93817  2.57636
## 3 -1.5023100 -1.351400 -0.664369  0.516294 -0.9451710  3.92352 -1.64968
## 4  0.7228330 -5.733700  7.168400  4.020570 -3.8497600  2.84411  1.62315
## 5 -1.2113900  5.624170 -4.654230  1.694480 -1.3925100  1.42230 -2.14384
## 6  0.0801718 -0.170995  0.501421  0.400535 -0.0388899 -0.12602 -0.49322
##       PC532     PC533     PC534      PC535     PC536     PC537     PC538
## 1  3.405020 -1.023090  6.971360   3.084210  0.583564 -1.490980 -5.154550
## 2 -6.307960  1.694060 -6.513580  -1.381010 -7.136960 -2.431020 -2.209950
## 3  0.736933 -0.884715 -6.036790 -11.649600  7.088760  2.491950  3.449040
## 4 -1.267600 -1.954300 -1.052460   3.190740  1.285960 -0.221289  0.805661
## 5  2.102560 -1.143910  0.266658  -2.888100  2.042110 -0.211161  0.818581
## 6 -0.181220  1.334190 -1.171840   0.729099 -0.210794  0.856780 -0.167919
##       PC539     PC540     PC541      PC542     PC543       PC544     PC545
## 1 -2.207470  0.758238 -3.335490  5.4590000  1.221180  1.63972000  4.427280
## 2 -0.911507 -1.389160  3.149200 -3.8821200 -3.431940 -6.08617000 -4.603970
## 3  3.543010 -1.516680  1.850980 -1.5179400 -0.407301  1.66806000 -7.206650
## 4 -2.818540 -1.144380 -2.341230 -7.4721300 -2.596550  2.33664000 -3.276970
## 5 -1.459910 -2.492320  1.155440  0.0787301  3.107540 -0.00789298  0.538279
## 6  0.139411  0.726248  0.900952  2.1527200 -1.238480  0.32846000  0.776762
##       PC546     PC547     PC548     PC549    PC550      PC551     PC552
## 1 -0.592321 -3.950480  5.398870 -0.259369 1.346980 -3.0258500  2.398070
## 2  0.868614 -0.776384 -3.824660  2.338640 0.631417 -1.0236800  3.124520
## 3  4.242580  2.912100 -2.691830 -5.502290 1.836520 -1.5376700 -7.697860
## 4  0.600199  4.452940 -1.611770 -0.782176 1.684540  0.0141803 -2.459820
## 5 -0.293807  3.026430 -6.600290 -1.351960 0.712053  0.9871010 -0.721727
## 6  2.176100 -0.281714 -0.211135  1.887900 0.134043 -1.3467200 -1.413170
##       PC553      PC554     PC555     PC556       PC557     PC558     PC559
## 1 -0.286456 -0.0663779  3.476500 -1.351740 -1.64359000 -0.778570 -1.306260
## 2  1.589360 -1.7513600 -3.728780 -0.730756 -0.00663453 -0.484378  1.384330
## 3 -7.170830  2.0107000 -9.569970  3.974570  3.56960000  8.585820  1.739970
## 4 -6.107070  1.8487100  2.812830  3.488180  0.66171100 -1.753540 -2.556800
## 5 -0.694254  0.4203220 -6.285140  1.427040  2.70552000  0.324602 -0.267143
## 6 -1.441920 -0.3393800 -0.212959  1.152500 -0.77473400  0.450112 -1.014180
##       PC560      PC561    PC562    PC563     PC564     PC565     PC566
## 1  2.825240  0.0995965  1.57620 -3.60296  0.670159 -3.209690  1.550600
## 2 -2.489070 -4.0515200 -3.70281  6.60162 -2.458920  6.003580 -7.124020
## 3 -1.748750  2.9037700  2.62248 -6.01691  1.100880 -4.439560  5.372880
## 4 -4.708210  2.8882200 -1.32227  2.67467  2.624540  0.743727  2.569070
## 5  0.653901 -0.5110950  1.18163 -2.79985 -1.797680  0.198578 -3.013150
## 6 -1.833120 -1.2206000 -1.99551  1.19657  0.685755  0.234363 -0.634971
##       PC567     PC568     PC569    PC570     PC571    PC572      PC573
## 1  0.989093 -0.459252  0.975044  2.73825  1.134910 -2.30975  0.2222010
## 2 -0.929648 -3.040190  0.822756 -1.69767 -1.093300 -2.08528 -0.0283054
## 3  1.996950  6.582050  0.692111 -2.92630  1.409960  6.58744 -1.6235900
## 4  0.980353 -3.243020  5.102860 -2.73461  0.253183  3.93572 -0.1543540
## 5 -1.986100  4.835600 -1.979180 -2.03141  0.057754 -4.35120 -0.0373671
## 6 -0.800821 -0.265115 -1.446920 -1.63397 -0.565600  2.08011 -0.6737900
##       PC574     PC575     PC576    PC577        PC578     PC579     PC580
## 1  1.188210  0.870558 -2.113010  1.37526  0.000623485  2.240530 -0.459305
## 2 -3.579570  3.242020  0.369332 -3.67347 -2.681540000 -3.973060  0.369044
## 3  0.950265 -0.902675 -3.054820  1.31367  2.586150000  0.692345  1.026450
## 4  2.626630 -0.128662  6.096140  1.26551  0.796442000 -0.817054 -1.224270
## 5 -0.530347 -0.572138 -1.720590  1.32734 -0.445303000  1.693790  0.569703
## 6 -0.331061 -0.566017  0.365110 -0.71432 -0.226785000  0.743208  1.396390
##       PC581     PC582     PC583     PC584     PC585     PC586     PC587
## 1  0.900178 -0.975244  1.351650 -0.764878  0.411443 -3.608420  0.816712
## 2  0.230988 -0.649956 -1.625020  3.628170 -3.325320  1.478650  1.625470
## 3 -3.870950  3.023630  2.915330 -1.153350  2.061790  0.483450 -5.288950
## 4 -0.117613  1.489090  2.486810 -0.658991 -0.150000 -1.361030 -2.080040
## 5  2.659190  2.698140  0.107061  0.738584  0.576776  2.061340 -0.933789
## 6 -0.741153 -0.478477 -1.091250  0.432916  0.536083 -0.676711 -0.147207
##        PC588     PC589     PC590     PC591      PC592      PC593     PC594
## 1  0.0384876 -2.219890 -0.408084 -1.612930 -1.4117400 -0.2201720  1.179640
## 2  2.5240200  1.151330 -0.354449 -0.381923 -0.5873530  0.6151750  1.015280
## 3 -7.6461100  0.354336  2.357310  5.889990  0.2842930  1.6530400 -0.253811
## 4 -1.0604400 -0.817802  2.861370  4.752060 -1.3637300 -0.4553330  0.113574
## 5 -0.3504120 -1.044770 -0.397141  0.717417 -1.2142500  0.0818215  0.682315
## 6 -0.7484500  0.578017  1.178260  1.426280 -0.0826511 -1.7031100 -0.406109
##        PC595     PC596     PC597      PC598     PC599      PC600      PC601
## 1 -0.0430501 -0.664985 -0.157422  0.0947256  2.419170  0.6925310  0.0561172
## 2 -2.9241400  1.393060  1.208010 -0.6697120 -1.009420  0.6709490 -1.2857100
## 3  3.9703000  2.300020 -3.190190  1.5798000  0.306442 -3.5776800 -0.9908110
## 4 -0.7069660 -2.730620 -1.374650  0.4564930 -1.200290 -1.1788900  1.8492300
## 5  1.2877000 -2.035230 -1.534000 -0.2660950  1.271710 -0.0783648 -3.0542800
## 6  2.1880700  1.462530 -1.806420  0.0631933 -0.662056  1.1882700 -0.9711610
##       PC602      PC603      PC604      PC605     PC606     PC607      PC608
## 1 -0.422698  1.2217300  1.1282100 -0.0968164  0.883596 -1.851110  0.2777940
## 2  2.119190 -0.2993870  1.1771100  3.2232000  1.750800 -0.367609 -0.0249427
## 3 -4.576820 -0.8257750  1.2293200 -3.1137200 -0.268914  3.724350 -1.6254500
## 4  0.316105 -0.5618170 -0.1933880 -0.6756260  0.588500  1.240880  0.6906870
## 5 -2.893920 -0.8409080  0.0943487 -1.0693300  0.974841 -0.101900 -1.5808500
## 6  0.217015 -0.0314555  1.2089000 -0.0546657 -0.414050 -1.478620  0.2828550
##        PC609     PC610      PC611      PC612      PC613     PC614      PC615
## 1  0.9498830  0.482229  0.0376125  0.1797420  1.8884100 -1.172590  0.0356375
## 2  0.1552650 -0.364791 -1.1793100 -0.9907910 -0.4088200  0.901477  0.1978340
## 3 -1.4347400  2.588180  2.9292400 -0.8670590 -0.0932232  1.349220 -1.0828100
## 4  0.0549793 -0.170540 -1.0536100  0.1080440 -2.1936100  3.458960  1.8271800
## 5 -0.8816780  0.193019  0.1477220  0.0441914  1.2076800  0.291053 -0.6286800
## 6 -1.0314900  1.159970 -0.5925440  0.4074700 -0.9531180 -0.382572 -0.0261295
##        PC616      PC617     PC618     PC619     PC620     PC621     PC622
## 1 -1.2742200 -1.0166500  1.017700  0.413055 -0.615057  0.933675  0.561719
## 2  0.0792599 -0.2762350  0.377416 -1.354380 -1.201900 -0.260239 -0.613003
## 3  0.9567240  1.8708200  1.603800  2.505120  0.409465 -1.091240 -1.182830
## 4  0.7979200  1.3687000 -2.398750  2.090690 -1.684470  0.237746  1.055300
## 5  0.0248111  0.7346960 -0.984834 -0.284761  0.104881 -0.605211  1.480840
## 6  1.2182000 -0.0674913 -0.194666 -0.839569 -0.573405  0.038825  0.137123
##        PC623     PC624      PC625     PC626     PC627     PC628      PC629
## 1  0.1222350 -1.655880 -1.0240800 -0.398773 -0.372284  0.479921 -0.0538361
## 2  0.7523290  1.770930  0.9077600 -0.342363 -1.178260 -1.118970 -0.8612290
## 3  0.4812560  0.754075 -1.5656300  0.225430  0.780495  0.447120 -0.1822280
## 4  1.0049600 -0.483412  1.6803900 -0.427996  0.511488  0.817202 -0.3105760
## 5  0.9491250  0.715926 -0.3017880  1.305980  0.541594  1.282800  0.2009910
## 6 -0.0150364 -1.275430 -0.0555473 -0.887926  0.135793  0.881540  1.3025200
##        PC630     PC631     PC632      PC633      PC634      PC635      PC636
## 1 -0.6730140  1.329650  0.654511 -0.4903520 -0.3914520 -0.3719210 -0.9230830
## 2  0.0548119 -1.407510 -0.869820  1.1010700 -1.0294800  0.5386230  0.5480640
## 3  0.5820030  0.347856  0.200176 -0.3495600  1.3099300 -0.0265796 -0.5817040
## 4 -0.1259030 -1.563200 -0.721731  0.1302550  0.6358970  0.2288740 -1.1053800
## 5  0.5597680  0.776038 -0.203955  1.5193500 -0.0816816 -1.5026400  0.0457431
## 6  0.5599610 -0.573395  0.917824 -0.0270427  1.3210200  0.6686640 -2.0031600
##       PC637      PC638     PC639     PC640     PC641     PC642      PC643
## 1 -0.545903 -0.2611980  0.663981  0.148466 -1.352060  0.534462 -0.0320718
## 2  1.160670 -0.6517060 -1.015110 -0.414513  0.627800 -0.762995  0.5611680
## 3 -0.377351  1.2528800  1.005600 -0.662310  0.233966 -0.474998  0.5785380
## 4 -0.285090 -0.0676641 -0.187437 -2.525190  0.409769 -1.207560 -0.3914100
## 5  1.077120 -0.3668680 -0.679117  0.429045  0.409687  0.160206  0.1937180
## 6 -0.929517 -1.1370400 -1.731310  0.106299 -2.042490  0.202262 -1.0385800
##       PC644      PC645     PC646     PC647      PC648      PC649      PC650
## 1  0.695318 -0.5732730  1.419190 -0.376348  0.7357110 -0.8545110 -0.3704340
## 2  0.425374 -0.7022600 -0.923579  0.105626  0.2854380 -0.2521310  0.9802470
## 3 -0.291940  0.6470010 -0.476133 -0.762751 -1.1635700 -0.0828889 -0.0932712
## 4  1.453290  0.0768286 -1.272050 -0.611429  0.4130000  0.2926600 -0.2423210
## 5  0.215671 -0.0764362  0.256408  0.724395  0.0163036 -1.2054800  0.2619800
## 6 -0.760264 -2.0082000  2.892230  2.303850 -2.2543100 -0.8165350 -0.6895360
##         PC651     PC652     PC653     PC654      PC655     PC656      PC657
## 1 -0.29158400 -0.956123 -0.972026 -0.877605 -1.1494000  0.829165 -0.0530470
## 2 -0.00710967  1.328450 -1.379670  0.160955  0.2285950 -0.466127  0.0432664
## 3  0.50577700 -0.142748  0.781087  0.590896  1.3880000 -0.124747  0.3939730
## 4  0.89601900  0.405259  0.029616  0.808107  0.0401254 -0.356069 -0.0146783
## 5 -0.26726200 -0.468042  0.183892  1.217820  0.2050710 -0.709634 -0.9518690
## 6  1.56279000 -1.084490 -1.147930 -3.845710  3.3993000  3.065770  1.6460700
##         PC658      PC659      PC660      PC661     PC662     PC663      PC664
## 1  0.00773703 -0.5788180  0.4602070  0.0772628 -0.302533 -0.457385 -0.0492801
## 2  0.58495000  0.6189400 -0.1186510  0.5637610 -0.520939  0.256424 -1.5277700
## 3  0.23331200 -0.3534350  0.6300210  0.1764910  0.155248 -0.325883  0.2595370
## 4 -0.17203300 -0.0365733 -0.1475710 -0.2947840  0.147316 -0.172945  0.7324470
## 5 -0.61314100  0.0015057 -0.1592740  0.3222340  0.147858  0.313532  0.1500250
## 6  1.63719000  1.7095000  0.0199371 -3.7314300  8.424690 -6.823410  0.6166360
##        PC665     PC666      PC667     PC668      PC669      PC670       PC671
## 1 -0.0482858 -1.071640   0.307679  0.517415  -0.247526 -0.2724220   0.0570314
## 2  0.4070540  0.978048   0.505680  0.441660   1.310400  0.2838230   0.3236680
## 3  0.1722620  0.860721   0.116949  0.344689   0.648392  0.0017265   0.1882100
## 4  0.0770676 -0.702335  -0.113037 -0.185621  -0.649254  0.1925390   0.0629604
## 5 -0.1966550 -0.104446   0.168875  0.373712   0.356984  0.2672210  -0.2675250
## 6  2.2520500 -9.838060 -10.543700 -2.177630 -18.570900 -4.9561400 -10.5035000
##        PC672      PC673      PC674      PC675      PC676      PC677       PC678
## 1  0.3658390 -0.2220490 -0.4542430 -0.1059320  0.0668257 -0.1227860 -0.07169060
## 2 -0.3572560  0.3813790 -0.0584039  0.3682240 -0.0438910 -0.2437420 -0.62563900
## 3  0.0525116  1.1491300 -0.0821967  0.0657877 -0.3136410  1.0217800 -0.12046900
## 4  0.4529030  0.1608160  0.0197362 -0.6737840 -0.4331220  0.0589937  0.28747700
## 5 -0.3019860  0.1766060  0.2178910  0.6849810  0.3028740 -0.8538600 -0.00102422
## 6  1.3322600  0.0338846 -2.9810000 -1.6761600  1.4657700 -5.6600100  1.48465000
##        PC679      PC680      PC681      PC682      PC683      PC684      PC685
## 1 -0.1012020  0.0386973  0.2632050  0.1172300  0.3892970  0.0890863  0.3201930
## 2 -0.1120630 -0.0705258 -0.0886319 -0.1414360  0.0814943 -0.0653639  0.1095060
## 3  0.0830344  0.2721590 -0.3359110 -0.3258950 -0.0744264 -0.1506270 -0.1918520
## 4 -0.1225620  0.1521610 -0.1445910  0.0501605 -0.2175600 -0.0702993 -0.0326227
## 5  0.3900610 -0.3917050 -0.4784030  0.1387840 -0.2413210 -0.1031130  0.1312310
## 6 -0.3688850  0.3400610  0.4379240  0.9850670 -0.5071560 -0.6654800 -0.7090970
##        PC686      PC687        PC688 Individual Pop_City Country Latitude
## 1  0.0385472  0.1949580 -6.61228e-07        801   Durres Albania 41.29704
## 2 -0.2263910 -0.0350247 -6.61228e-07        802   Durres Albania 41.29704
## 3  0.3104160 -0.3268830 -6.61228e-07        803   Durres Albania 41.29704
## 4 -0.1771030  0.0535150 -6.61228e-07        804   Durres Albania 41.29704
## 5  0.0731578  0.0622075 -6.61228e-07        805   Durres Albania 41.29704
## 6  0.6414640  0.0648584 -6.61228e-07        806   Durres Albania 41.29704
##   Longitude Continent Year          Region   Subregion order order2 orderold
## 1  19.50373    Europe 2018 Southern Europe East Europe    33     25       25
## 2  19.50373    Europe 2018 Southern Europe East Europe    33     25       25
## 3  19.50373    Europe 2018 Southern Europe East Europe    33     25       25
## 4  19.50373    Europe 2018 Southern Europe East Europe    33     25       25
## 5  19.50373    Europe 2018 Southern Europe East Europe    33     25       25
## 6  19.50373    Europe 2018 Southern Europe East Europe    33     25       25

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "PCA_lea_r0.01_colors_pc1_pc2.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

PC1 & 3

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "PCA_lea_r0.01_colors_pc1_pc3.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

3.4 Run LEA for SNP Set 1 (r2<0.01)

We will do 5 repetitions

# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
  genotype,
  K = 1:30,
  project = "new",
  repetitions = 5,
  percentage = 0.25,
  iterations = 500,
  CPU = 10,
  entropy = TRUE
)
project = load.snmfProject("euro_global/output/snps_sets/r2_0.01.snmfProject")

Cross entropy

# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","lea_cross_entropy_euro_global_r01.pdf"), width = 6, height = 4)

# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)

# Close the pdf file
dev.off()
## png 
##   2
plot(project, col = "blue", pch = 19, cex = 1.2)

Summary of project

check with run is best

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy         5      5      5      5      5
## without cross-entropy      0      0      0      0      0
## total                      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9741476 0.9478415 0.9371094 0.9306857 0.9274846 0.9252483 0.9230460
## mean 0.9744821 0.9482057 0.9373050 0.9308620 0.9287969 0.9258323 0.9234635
## max  0.9747943 0.9484720 0.9374630 0.9310598 0.9292955 0.9268456 0.9238938
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.9210405 0.9194040 0.9178541 0.9161954 0.9157153 0.9143805 0.9134463
## mean 0.9216164 0.9197455 0.9185914 0.9166938 0.9160336 0.9148880 0.9137159
## max  0.9220569 0.9200141 0.9191938 0.9177154 0.9167792 0.9154233 0.9140798
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.9130960 0.9125347 0.9122265 0.9114823 0.9112280 0.9108308 0.9105371
## mean 0.9133830 0.9129528 0.9124338 0.9118073 0.9117729 0.9114267 0.9109761
## max  0.9138815 0.9136672 0.9128994 0.9121939 0.9123163 0.9122307 0.9115720
##         K = 22    K = 23    K = 24    K = 25    K = 26    K = 27    K = 28
## min  0.9103878 0.9102019 0.9103990 0.9106880 0.9104110 0.9109080 0.9106481
## mean 0.9108562 0.9110041 0.9108721 0.9109012 0.9111984 0.9110827 0.9113020
## max  0.9114802 0.9114443 0.9113352 0.9111545 0.9122130 0.9113573 0.9119994
##         K = 29    K = 30
## min  0.9109183 0.9104377
## mean 0.9113406 0.9115078
## max  0.9116957 0.9122739
# get the cross-entropy of all runs for K = 20
ce = cross.entropy(project, K = 20)
ce #run 1 is best for k=20
##          K = 20
## run 1 0.9108308
## run 2 0.9122307
## run 3 0.9114574
## run 4 0.9113974
## run 5 0.9112171
#run 4 is best for k=7 & k=12

4. Plots for LEA SNP Set 1 (r2<0.01)

4.1 Plots for K=20

color_palette_20 <-
  c(
    "#FF8C1A",
    "yellow2",
    "#77DD77",
    "#1E90FF",
    "#B22222",
    "#AE8333",
    "#B20CC9",
    "#F49AC2",
    "blue",
    "#008080", 
    "purple4",
    "#FFFF99",
    "#75FAFF",
    "#AE9393",
    "magenta",
    "green4",
    "navy", 
    "green",
    "purple",
    "orangered"
  )

Default plot

# select the best run for chosen K 
best = which.min(cross.entropy(project, K = 20))
# best is run 1

barchart(project, K = 20, run = best,
        border = NA, space = 0,
        col = color_palette_20,
        xlab = "Individuals",
        ylab = "Ancestry proportions",
        main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
     labels = bp$order, las=1,
     cex.axis = .4)

4.1.1 Mean admixture by country for K=20

using ggplot

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 20, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:20)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_20[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_20)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "LEA_admixture_by_country_euro_global_k20_r01.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

4.1.2 Plot individual admixtures for K=20

Extract ancestry coefficients for k=20

change to correct matrix

leak20 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.snmf/K20/run1/r2_0.01_r1.20.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak20)
## # A tibble: 6 × 20
##          X1        X2        X3       X4      X5      X6      X7      X8      X9
##       <dbl>     <dbl>     <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 0.0138    0.000100  0.0918     2.83e-3 1.37e-2 2.21e-2 1.39e-2 5.12e-3 3.41e-2
## 2 0.00808   0.0383    0.0118     7.02e-2 4.34e-2 2.00e-2 3.24e-3 4.22e-2 3.28e-2
## 3 0.00231   0.0000998 0.0000998  9.98e-5 9.98e-5 8.77e-4 9.98e-5 9.98e-5 9.98e-5
## 4 0.0000999 0.0000999 0.0000999  3.33e-2 3.03e-2 9.99e-5 3.04e-2 9.99e-5 9.99e-5
## 5 0.00413   0.0000999 0.0000999  3.63e-2 3.15e-2 9.99e-5 9.99e-5 9.99e-5 1.05e-2
## 6 0.0000998 0.0000998 0.0000998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 11 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## #   X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.01.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak20 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak20)

head(leak20)
##    ind pop          X1          X2          X3          X4          X5
## 1 1001 OKI 1.38063e-02 9.99550e-05 9.18430e-02 2.82863e-03 1.37334e-02
## 2 1002 OKI 8.07866e-03 3.83354e-02 1.18183e-02 7.02198e-02 4.33844e-02
## 3 1003 OKI 2.31431e-03 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05
## 4 1004 OKI 9.99010e-05 9.99010e-05 9.99010e-05 3.32538e-02 3.02918e-02
## 5 1005 OKI 4.12997e-03 9.99010e-05 9.99010e-05 3.62772e-02 3.14710e-02
## 6 1006 OKI 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
##            X6          X7          X8          X9         X10      X11
## 1 2.20642e-02 1.38697e-02 5.11567e-03 3.40829e-02 9.99550e-05 0.527942
## 2 1.99984e-02 3.24241e-03 4.22411e-02 3.27885e-02 4.31895e-02 0.324555
## 3 8.77439e-04 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05 0.995111
## 4 9.99010e-05 3.03931e-02 9.99010e-05 9.99010e-05 9.99010e-05 0.818280
## 5 9.99010e-05 9.99010e-05 9.99010e-05 1.05491e-02 9.99010e-05 0.874483
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 0.998103
##           X12         X13         X14         X15         X16         X17
## 1 6.55197e-02 3.95934e-02 9.99550e-05 2.97411e-02 9.61370e-03 9.99550e-05
## 2 1.24613e-01 2.85697e-02 1.27235e-01 9.99730e-05 1.24154e-02 9.99730e-05
## 3 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05
## 4 2.45144e-02 3.72545e-02 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05
## 5 2.76394e-03 1.00913e-02 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
##           X18         X19         X20
## 1 9.99550e-05 5.70084e-02 7.27377e-02
## 2 9.99730e-05 4.37211e-02 2.52939e-02
## 3 9.98470e-05 9.98470e-05 9.98470e-05
## 4 9.09715e-03 1.28061e-02 3.01026e-03
## 5 1.32808e-02 9.99010e-05 1.58546e-02
## 6 9.98289e-05 9.98289e-05 9.98289e-05

Rename the columns

# Rename the columns starting from the third one
leak20 <- leak20 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak20)
##    ind pop          v1          v2          v3          v4          v5
## 1 1001 OKI 1.38063e-02 9.99550e-05 9.18430e-02 2.82863e-03 1.37334e-02
## 2 1002 OKI 8.07866e-03 3.83354e-02 1.18183e-02 7.02198e-02 4.33844e-02
## 3 1003 OKI 2.31431e-03 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05
## 4 1004 OKI 9.99010e-05 9.99010e-05 9.99010e-05 3.32538e-02 3.02918e-02
## 5 1005 OKI 4.12997e-03 9.99010e-05 9.99010e-05 3.62772e-02 3.14710e-02
## 6 1006 OKI 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
##            v6          v7          v8          v9         v10      v11
## 1 2.20642e-02 1.38697e-02 5.11567e-03 3.40829e-02 9.99550e-05 0.527942
## 2 1.99984e-02 3.24241e-03 4.22411e-02 3.27885e-02 4.31895e-02 0.324555
## 3 8.77439e-04 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05 0.995111
## 4 9.99010e-05 3.03931e-02 9.99010e-05 9.99010e-05 9.99010e-05 0.818280
## 5 9.99010e-05 9.99010e-05 9.99010e-05 1.05491e-02 9.99010e-05 0.874483
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 0.998103
##           v12         v13         v14         v15         v16         v17
## 1 6.55197e-02 3.95934e-02 9.99550e-05 2.97411e-02 9.61370e-03 9.99550e-05
## 2 1.24613e-01 2.85697e-02 1.27235e-01 9.99730e-05 1.24154e-02 9.99730e-05
## 3 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05
## 4 2.45144e-02 3.72545e-02 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05
## 5 2.76394e-03 1.00913e-02 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
##           v18         v19         v20
## 1 9.99550e-05 5.70084e-02 7.27377e-02
## 2 9.99730e-05 4.37211e-02 2.52939e-02
## 3 9.98470e-05 9.98470e-05 9.98470e-05
## 4 9.09715e-03 1.28061e-02 3.01026e-03
## 5 1.32808e-02 9.99010e-05 1.58546e-02
## 6 9.98289e-05 9.98289e-05 9.98289e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak20 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

color_palette_20 <-
  c(
    "#B20CC9",
    "#1E90FF",
    "purple",
    "#FF8C1A",
    "#AE8333",
    "#B22222",
    "yellow2",
    "#F49AC2",
    "orangered",
    "magenta", 
    "purple4",
    "#FFFF99",
    "#75FAFF",
    "#AE9393",
    "#008080",
    "green4",
    "navy",
    "green",
    "#77DD77",
    "blue"
  )

# Generate all potential variable names
all_variables <- paste0("v", 1:20)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_20[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=20.\n LEA inference for k20 with 19,318 SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_20) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "lea_k=20_euro_global_r2_01_run1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

4.2 Plots for K=7

Default plot for K=7

# select the best run for chosen K 
best = which.min(cross.entropy(project, K = 7))
# best is run 1

barchart(project, K = 7, run = best,
        border = NA, space = 0,
        col = color_palette_20,
        xlab = "Individuals",
        ylab = "Ancestry proportions",
        main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
     labels = bp$order, las=1,
     cex.axis = .4)

4.2.1 Mean admixture by country for k=7

using ggplot

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))

# Create a named vector to map countries to regions

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:7)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_20[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=7.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_20)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "LEA_admixture_by_country_euro_global_k7_r01.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

4.2.2 Plot individual admixtures for K=7

leak7 <- read_delim(
  here("euro_global/output/snps_sets/r2_0.01.snmf/K7/run4/r2_0.01_r4.7.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 

head(leak7)
## # A tibble: 6 × 7
##       X1     X2       X3       X4    X5      X6    X7
##    <dbl>  <dbl>    <dbl>    <dbl> <dbl>   <dbl> <dbl>
## 1 0.0978 0.0358 0.0341   0.0636   0.549 0.0292  0.191
## 2 0.0615 0.0957 0.0593   0.0430   0.528 0.0461  0.167
## 3 0.0690 0.0360 0.000100 0.000100 0.696 0.0671  0.132
## 4 0.0299 0.102  0.0296   0.000100 0.639 0.00764 0.192
## 5 0.0654 0.0784 0.000100 0.000100 0.663 0.0105  0.183
## 6 0.0441 0.0539 0.000100 0.000100 0.705 0.0506  0.146

The fam file

fam_file <- here(
  "euro_global/output/snps_sets/r2_0.01.fam"
)

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak7 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak7)

head(leak7)
##    ind pop        X1        X2          X3          X4       X5         X6
## 1 1001 OKI 0.0978103 0.0357826 0.034129800 0.063596800 0.548841 0.02919780
## 2 1002 OKI 0.0615106 0.0957283 0.059314700 0.042975600 0.527767 0.04608960
## 3 1003 OKI 0.0690331 0.0360201 0.000099982 0.000099982 0.696111 0.06705730
## 4 1004 OKI 0.0298846 0.1018460 0.029630700 0.000099991 0.638919 0.00763714
## 5 1005 OKI 0.0654267 0.0784264 0.000099982 0.000099982 0.662766 0.01046610
## 6 1006 OKI 0.0440915 0.0538924 0.000099982 0.000099982 0.704794 0.05057510
##         X7
## 1 0.190641
## 2 0.166615
## 3 0.131579
## 4 0.191983
## 5 0.182715
## 6 0.146447

Rename the columns

# Rename the columns starting from the third one
leak7 <- leak7 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak7)
##    ind pop        v1        v2          v3          v4       v5         v6
## 1 1001 OKI 0.0978103 0.0357826 0.034129800 0.063596800 0.548841 0.02919780
## 2 1002 OKI 0.0615106 0.0957283 0.059314700 0.042975600 0.527767 0.04608960
## 3 1003 OKI 0.0690331 0.0360201 0.000099982 0.000099982 0.696111 0.06705730
## 4 1004 OKI 0.0298846 0.1018460 0.029630700 0.000099991 0.638919 0.00763714
## 5 1005 OKI 0.0654267 0.0784264 0.000099982 0.000099982 0.662766 0.01046610
## 6 1006 OKI 0.0440915 0.0538924 0.000099982 0.000099982 0.704794 0.05057510
##         v7
## 1 0.190641
## 2 0.166615
## 3 0.131579
## 4 0.191983
## 5 0.182715
## 6 0.146447

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

Using ggplot2 for individual admixtures

source(
  here("scripts", "RMarkdowns", 
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak7 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 


color_palette_7 <-
  c(
    "#00FF00",
    "#FFFF00",
    "#FF0000",
    "#0000FF",
    "#FF00FF",
    "orange",
    "purple"
     )


# Generate all potential variable names
all_variables <- paste0("v", 1:7)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_7[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 10
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for k7 with 19,318 SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_7) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "lea_k=7_euro_global_r2_01_run4.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

4.3 Plots for K=12

4.3.1 Mean admixture by country for K=12

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 12, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:12)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_20[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=12.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_20)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "LEA_admixture_by_country_euro_global_k12_r01.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

4.3.2 Plot individual admixture for K=12

leak12 <- read_delim(
  here("euro_global/output/snps_sets/r2_0.01.snmf/K12/run4/r2_0.01_r4.12.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 

head(leak12)
## # A tibble: 6 × 12
##         X1      X2      X3      X4      X5      X6    X7      X8      X9     X10
##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>
## 1  4.02e-2 1.15e-2 1.06e-2 1.00e-4 2.31e-2 6.51e-2 0.619 1.10e-1 3.78e-3 5.29e-2
## 2  7.60e-2 2.78e-2 5.41e-2 1.00e-4 1.50e-2 5.90e-2 0.499 1.03e-1 3.09e-2 8.21e-2
## 3  9.99e-5 8.91e-3 9.99e-5 6.68e-3 1.82e-3 3.18e-3 0.979 9.99e-5 2.54e-4 9.99e-5
## 4  1.42e-2 2.87e-2 2.45e-2 9.99e-5 9.99e-5 6.56e-3 0.884 9.99e-5 9.99e-5 9.99e-5
## 5  1.80e-2 2.77e-2 5.17e-3 2.40e-2 1.00e-4 2.92e-4 0.896 1.00e-4 1.00e-4 1.00e-4
## 6  9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5
## # ℹ 2 more variables: X11 <dbl>, X12 <dbl>

The fam file

fam_file <- here(
  "euro_global/output/snps_sets/r2_0.01.fam"
)

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak12 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak12)

head(leak12)
##    ind pop          X1          X2          X3          X4          X5
## 1 1001 OKI 0.040199900 0.011530500 0.010638500 0.000099982 0.023146800
## 2 1002 OKI 0.075992800 0.027784300 0.054107900 0.000099991 0.014999300
## 3 1003 OKI 0.000099946 0.008908350 0.000099946 0.006684460 0.001824840
## 4 1004 OKI 0.014216000 0.028685700 0.024504700 0.000099946 0.000099946
## 5 1005 OKI 0.018042700 0.027702800 0.005169910 0.024000300 0.000099964
## 6 1006 OKI 0.000099901 0.000099901 0.000099901 0.000099901 0.000099901
##            X6       X7          X8          X9         X10         X11
## 1 0.065050400 0.618872 0.109740000 0.003775240 0.052941900 0.063905500
## 2 0.059005000 0.498837 0.103242000 0.030899000 0.082053100 0.029348300
## 3 0.003175130 0.978554 0.000099946 0.000254015 0.000099946 0.000099946
## 4 0.006562450 0.883594 0.000099946 0.000099946 0.000099946 0.000099946
## 5 0.000292443 0.896372 0.000099964 0.000099964 0.000099964 0.027566900
## 6 0.000099901 0.998901 0.000099901 0.000099901 0.000099901 0.000099901
##           X12
## 1 0.000099982
## 2 0.023630600
## 3 0.000099946
## 4 0.041837800
## 5 0.000453241
## 6 0.000099901

Rename the columns

# Rename the columns starting from the third one
leak12 <- leak12 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak12)
##    ind pop          v1          v2          v3          v4          v5
## 1 1001 OKI 0.040199900 0.011530500 0.010638500 0.000099982 0.023146800
## 2 1002 OKI 0.075992800 0.027784300 0.054107900 0.000099991 0.014999300
## 3 1003 OKI 0.000099946 0.008908350 0.000099946 0.006684460 0.001824840
## 4 1004 OKI 0.014216000 0.028685700 0.024504700 0.000099946 0.000099946
## 5 1005 OKI 0.018042700 0.027702800 0.005169910 0.024000300 0.000099964
## 6 1006 OKI 0.000099901 0.000099901 0.000099901 0.000099901 0.000099901
##            v6       v7          v8          v9         v10         v11
## 1 0.065050400 0.618872 0.109740000 0.003775240 0.052941900 0.063905500
## 2 0.059005000 0.498837 0.103242000 0.030899000 0.082053100 0.029348300
## 3 0.003175130 0.978554 0.000099946 0.000254015 0.000099946 0.000099946
## 4 0.006562450 0.883594 0.000099946 0.000099946 0.000099946 0.000099946
## 5 0.000292443 0.896372 0.000099964 0.000099964 0.000099964 0.027566900
## 6 0.000099901 0.998901 0.000099901 0.000099901 0.000099901 0.000099901
##           v12
## 1 0.000099982
## 2 0.023630600
## 3 0.000099946
## 4 0.041837800
## 5 0.000453241
## 6 0.000099901

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

Using ggplot2 for individual admixtures

source(
  here("scripts", "RMarkdowns", 
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak12 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 


color_palette_12 <-
  c(
    "#FF8C1A",
    "#1E90FF",
    "#B22222",
    "green",
    "yellow2",
    "blue",
    "#75FAFF",
    "magenta",
    "green4",
    "navy", 
    "purple",
    "orangered"
  )


# Generate all potential variable names
all_variables <- paste0("v", 1:12)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_12[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 10
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=12.\n LEA inference for k12 with 19,318 SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_12) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "lea_k=12_euro_global_r2_01_run4.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

4.4 Plots for K=5

Try plotting k=5 (See if the 5 original Asian clusters hold)

Extract ancestry coefficients for k=5

Summary of project

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy         5      5      5      5      5
## without cross-entropy      0      0      0      0      0
## total                      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9741476 0.9478415 0.9371094 0.9306857 0.9274846 0.9252483 0.9230460
## mean 0.9744821 0.9482057 0.9373050 0.9308620 0.9287969 0.9258323 0.9234635
## max  0.9747943 0.9484720 0.9374630 0.9310598 0.9292955 0.9268456 0.9238938
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.9210405 0.9194040 0.9178541 0.9161954 0.9157153 0.9143805 0.9134463
## mean 0.9216164 0.9197455 0.9185914 0.9166938 0.9160336 0.9148880 0.9137159
## max  0.9220569 0.9200141 0.9191938 0.9177154 0.9167792 0.9154233 0.9140798
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.9130960 0.9125347 0.9122265 0.9114823 0.9112280 0.9108308 0.9105371
## mean 0.9133830 0.9129528 0.9124338 0.9118073 0.9117729 0.9114267 0.9109761
## max  0.9138815 0.9136672 0.9128994 0.9121939 0.9123163 0.9122307 0.9115720
##         K = 22    K = 23    K = 24    K = 25    K = 26    K = 27    K = 28
## min  0.9103878 0.9102019 0.9103990 0.9106880 0.9104110 0.9109080 0.9106481
## mean 0.9108562 0.9110041 0.9108721 0.9109012 0.9111984 0.9110827 0.9113020
## max  0.9114802 0.9114443 0.9113352 0.9111545 0.9122130 0.9113573 0.9119994
##         K = 29    K = 30
## min  0.9109183 0.9104377
## mean 0.9113406 0.9115078
## max  0.9116957 0.9122739
# get the cross-entropy of all runs for K = 5
ce = cross.entropy(project, K = 5)
ce #run 3 is best for k=5
##           K = 5
## run 1 0.9289309
## run 2 0.9292691
## run 3 0.9274846
## run 4 0.9290042
## run 5 0.9292955

choose best run here

leak5 <- read_delim(
  here("euro_global/output/snps_sets/r2_0.01.snmf/K5/run3/r2_0.01_r3.5.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 

head(leak5)
## # A tibble: 6 × 5
##      X1    X2    X3    X4      X5
##   <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 0.201 0.128 0.212 0.420 0.0405 
## 2 0.200 0.156 0.168 0.405 0.0716 
## 3 0.162 0.148 0.172 0.478 0.0391 
## 4 0.234 0.100 0.207 0.444 0.0150 
## 5 0.197 0.124 0.214 0.462 0.00281
## 6 0.202 0.146 0.198 0.438 0.0151

The fam file

fam_file <- here(
  "euro_global/output/snps_sets/r2_0.01.fam"
)

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak5 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak5)

head(leak5)
##    ind pop       X1        X2       X3       X4         X5
## 1 1001 OKI 0.200653 0.1276200 0.211680 0.419503 0.04054400
## 2 1002 OKI 0.200195 0.1560630 0.167533 0.404649 0.07155990
## 3 1003 OKI 0.162190 0.1481010 0.172349 0.478278 0.03908180
## 4 1004 OKI 0.233547 0.0999538 0.207488 0.444058 0.01495440
## 5 1005 OKI 0.196883 0.1244250 0.214223 0.461663 0.00280657
## 6 1006 OKI 0.202352 0.1460070 0.198391 0.438121 0.01512950

Rename the columns

# Rename the columns starting from the third one
leak5 <- leak5 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak5)
##    ind pop       v1        v2       v3       v4         v5
## 1 1001 OKI 0.200653 0.1276200 0.211680 0.419503 0.04054400
## 2 1002 OKI 0.200195 0.1560630 0.167533 0.404649 0.07155990
## 3 1003 OKI 0.162190 0.1481010 0.172349 0.478278 0.03908180
## 4 1004 OKI 0.233547 0.0999538 0.207488 0.444058 0.01495440
## 5 1005 OKI 0.196883 0.1244250 0.214223 0.461663 0.00280657
## 6 1006 OKI 0.202352 0.1460070 0.198391 0.438121 0.01512950

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

Default Plot

color_palette_5 <-
  c(
    "#FF8C1A",
    "#77DD37",
    "#1E90FF",
    "purple3",
    "#FFFF19"
     )

# select the best run for K = 15 clusters
best = which.min(cross.entropy(project, K = 5))
# best is run 4

barchart(project, K = 5, run = best,
        border = NA, space = 0,
        col = color_palette_5,
        xlab = "Individuals",
        ylab = "Ancestry proportions",
        main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
     labels = bp$order, las=1,
     cex.axis = .4)

4.4.1 Mean admixture by country for K=5

using ggplot

color_palette_5 <-
  c(
    "#FF8C1A",
    "#77DD37",
    "#1E90FF",
    "purple3",
    "#FFFF19"
  )

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 5, run = best))


# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns", 
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:5)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_5[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=5.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) +  # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_5)

#save the plot             
ggsave(
  here("scripts", "RMarkdowns", 
    "output", "euro_global", "lea", "LEA_bycountry_k=5_euro_global_r01.pdf"
   ),
   width  = 12,
   height = 6,
   units  = "in",
   device = cairo_pdf
 )

4.4.2 Plot individual admixtures for K=5

source(
  here("scripts", "RMarkdowns", 
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak5 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 


color_palette_5 <-
  c(
    "#FF8C1A",
    "#FFFF19", 
    "#1E90FF",
    "purple3",
    "#77DD37"
     )


# Generate all potential variable names
all_variables <- paste0("v", 1:5)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_5[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 10
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=5.\n LEA inference for k5 with 19,318 SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_5) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "lea_k=5_euro_global_r2_01_run3.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

4.5 Plots for K=16

for r2<0.01 (Set 1)

Summary of project check with run is best

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy         5      5      5      5      5
## without cross-entropy      0      0      0      0      0
## total                      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9741476 0.9478415 0.9371094 0.9306857 0.9274846 0.9252483 0.9230460
## mean 0.9744821 0.9482057 0.9373050 0.9308620 0.9287969 0.9258323 0.9234635
## max  0.9747943 0.9484720 0.9374630 0.9310598 0.9292955 0.9268456 0.9238938
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.9210405 0.9194040 0.9178541 0.9161954 0.9157153 0.9143805 0.9134463
## mean 0.9216164 0.9197455 0.9185914 0.9166938 0.9160336 0.9148880 0.9137159
## max  0.9220569 0.9200141 0.9191938 0.9177154 0.9167792 0.9154233 0.9140798
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.9130960 0.9125347 0.9122265 0.9114823 0.9112280 0.9108308 0.9105371
## mean 0.9133830 0.9129528 0.9124338 0.9118073 0.9117729 0.9114267 0.9109761
## max  0.9138815 0.9136672 0.9128994 0.9121939 0.9123163 0.9122307 0.9115720
##         K = 22    K = 23    K = 24    K = 25    K = 26    K = 27    K = 28
## min  0.9103878 0.9102019 0.9103990 0.9106880 0.9104110 0.9109080 0.9106481
## mean 0.9108562 0.9110041 0.9108721 0.9109012 0.9111984 0.9110827 0.9113020
## max  0.9114802 0.9114443 0.9113352 0.9111545 0.9122130 0.9113573 0.9119994
##         K = 29    K = 30
## min  0.9109183 0.9104377
## mean 0.9113406 0.9115078
## max  0.9116957 0.9122739
# get the cross-entropy of all runs for K = 16
ce = cross.entropy(project, K = 16)
ce #run 3 is best for k=
##          K = 16
## run 1 0.9128210
## run 2 0.9136672
## run 3 0.9125347
## run 4 0.9126670
## run 5 0.9130741
color_palette_16 <-
  c(
    "#FF8C1A",
    "#B20CC9",
    "#77DD77",
    "#1E90FF",
    "#B22222",
    "green",
    "yellow2",
    "#F49AC2",
    "blue",
    "#FFFF99",
    "#75FAFF",
    "magenta",
    "green4",
    "navy", 
    "purple",
    "orangered"
  )

4.5.1 Mean admixture by country for K=16

using ggplot

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 16, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:20)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_16[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=16.") +
 #scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_16)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "LEA_admixture_by_country_euro_global_k16_r01.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

4.5.2 Plot individual admixtures for K=16

Extract ancestry coefficients for k=16 change to correct matrix

leak16 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.snmf/K16/run3/r2_0.01_r3.16.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak16)
## # A tibble: 6 × 16
##          X1        X2        X3       X4      X5      X6      X7      X8      X9
##       <dbl>     <dbl>     <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 0.00675   0.0401    0.000100   1.10e-1 1.11e-1 1.00e-4 2.20e-2 5.08e-2 6.08e-2
## 2 0.105     0.0349    0.0467     5.91e-2 1.87e-1 1.00e-4 4.83e-2 1.00e-4 4.28e-2
## 3 0.0000999 0.0000999 0.0000999  9.99e-5 9.99e-5 3.85e-3 9.99e-5 9.99e-5 9.99e-5
## 4 0.00139   0.0000999 0.0145     9.36e-3 3.30e-2 9.99e-5 9.99e-5 4.90e-2 9.99e-5
## 5 0.0228    0.0000999 0.0237     1.43e-2 1.61e-2 9.99e-5 9.99e-5 3.87e-2 9.99e-5
## 6 0.0000999 0.0000999 0.0000999  9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## # ℹ 7 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## #   X15 <dbl>, X16 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.01.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak16 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak16)

head(leak16)
##    ind pop          X1          X2          X3          X4          X5
## 1 1001 OKI 6.75139e-03 4.01424e-02 9.99550e-05 1.10261e-01 1.11171e-01
## 2 1002 OKI 1.05351e-01 3.48736e-02 4.66519e-02 5.91137e-02 1.87114e-01
## 3 1003 OKI 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05
## 4 1004 OKI 1.39301e-03 9.99190e-05 1.44613e-02 9.36419e-03 3.30173e-02
## 5 1005 OKI 2.27584e-02 9.99190e-05 2.37252e-02 1.42976e-02 1.60814e-02
## 6 1006 OKI 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
##            X6          X7          X8          X9         X10         X11
## 1 9.99550e-05 2.20087e-02 5.07991e-02 6.08446e-02 9.99550e-05 9.99550e-05
## 2 9.99730e-05 4.82618e-02 9.99730e-05 4.28018e-02 9.99730e-05 6.02545e-03
## 3 3.84553e-03 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05
## 4 9.99190e-05 9.99190e-05 4.89646e-02 9.99190e-05 9.99190e-05 9.99190e-05
## 5 9.99190e-05 9.99190e-05 3.86597e-02 9.99190e-05 9.99190e-05 9.99190e-05
## 6 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
##           X12      X13         X14         X15         X16
## 1 3.53490e-02 0.487606 4.00217e-02 3.45453e-02 9.99550e-05
## 2 1.00045e-02 0.314417 5.70276e-02 3.85740e-02 4.94835e-02
## 3 9.98915e-05 0.993476 1.37964e-03 9.98915e-05 9.98915e-05
## 4 2.66116e-02 0.865289 9.99190e-05 9.99190e-05 9.99190e-05
## 5 9.99190e-05 0.881810 1.76843e-03 9.99190e-05 9.99190e-05
## 6 9.98648e-05 0.998502 9.98648e-05 9.98648e-05 9.98648e-05

Rename the columns

# Rename the columns starting from the third one
leak16 <- leak16 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak16)
##    ind pop          v1          v2          v3          v4          v5
## 1 1001 OKI 6.75139e-03 4.01424e-02 9.99550e-05 1.10261e-01 1.11171e-01
## 2 1002 OKI 1.05351e-01 3.48736e-02 4.66519e-02 5.91137e-02 1.87114e-01
## 3 1003 OKI 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05
## 4 1004 OKI 1.39301e-03 9.99190e-05 1.44613e-02 9.36419e-03 3.30173e-02
## 5 1005 OKI 2.27584e-02 9.99190e-05 2.37252e-02 1.42976e-02 1.60814e-02
## 6 1006 OKI 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
##            v6          v7          v8          v9         v10         v11
## 1 9.99550e-05 2.20087e-02 5.07991e-02 6.08446e-02 9.99550e-05 9.99550e-05
## 2 9.99730e-05 4.82618e-02 9.99730e-05 4.28018e-02 9.99730e-05 6.02545e-03
## 3 3.84553e-03 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05
## 4 9.99190e-05 9.99190e-05 4.89646e-02 9.99190e-05 9.99190e-05 9.99190e-05
## 5 9.99190e-05 9.99190e-05 3.86597e-02 9.99190e-05 9.99190e-05 9.99190e-05
## 6 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
##           v12      v13         v14         v15         v16
## 1 3.53490e-02 0.487606 4.00217e-02 3.45453e-02 9.99550e-05
## 2 1.00045e-02 0.314417 5.70276e-02 3.85740e-02 4.94835e-02
## 3 9.98915e-05 0.993476 1.37964e-03 9.98915e-05 9.98915e-05
## 4 2.66116e-02 0.865289 9.99190e-05 9.99190e-05 9.99190e-05
## 5 9.99190e-05 0.881810 1.76843e-03 9.99190e-05 9.99190e-05
## 6 9.98648e-05 0.998502 9.98648e-05 9.98648e-05 9.98648e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak16 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

color_palette_16 <-
  c(
    "#FF8C1A",
    "#B20CC9",
    "#F49AC2",
    "#1E90FF",
    "#B22222",
    "purple",
    "yellow2",
    "orangered",
    "magenta",
    "#FFFF99",
    "#75FAFF",
    "blue",
    "green4",
    "navy", 
    "green",
    "#77DD77"
  )

# Generate all potential variable names
all_variables <- paste0("v", 1:16)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_16[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=16.\n LEA inference for k16 with 18,318 SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_16) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "lea_k=16_euro_global_r2_01_run3.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

5. LEA for SNP Set 3 (r<0.01, MAF>1%) for Europe_global dataset

5.1 PCA for SNP Set 3

5.1.1 Import the data for SNP Set 3

genotype <- here(
   "euro_global/output/snps_sets/r2_0.01_b.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22642
##   column count: 697
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22642
##   Character matrix gt cols: 697
##   skip: 0
##   nrows: 22642
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22642
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS GEL GES GRA GRC GRV 
##  10  12  12  10  12  12  12  13  10  12  14  12  12  16  12   2  12  11  10  12 
## HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC KAG KAN KAT KER KLP KRA KUN LAM 
##  12   4   7  12   4  11   4   5   9  12   2   6  12  11   6  12   4  12   4   9 
## MAL MAT OKI PAL POL POP QNC RAR REC ROM ROS SER SEV SIC SLO SOC SON SPB SPC SPM 
##  12  12  12  11   2  12  11  12  11   4  11   4  12   9  12  12   3   8   6   5 
## SPS SSK STS SUF SUU TAI TIK TIR TRE TUA TUH UTS YUN 
##   8  12  12   6   6   7  12   4  12   9  12  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   688
##  - number of detected loci:      22642
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   688
##  - number of detected loci:      22642
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.removed file, for more informations.
## 
## 
##  - number of detected individuals:   688
##  - number of detected loci:      22642
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.lfmm"

PCA for SNP Set 3 (MAF 1% r2<0.01 snp set)

nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          688
##         -L (number of loci)                 22642
##         -K (number of principal components) 688
##         -x (genotype file)                  /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.lfmm
##         -a (eigenvalue file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.pca/r2_0.01_b.eigenvalues
##         -e (eigenvector file)               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.pca/r2_0.01_b.eigenvectors
##         -d (standard deviation file)        /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.pca/r2_0.01_b.sdev
##         -p (projection file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.pca/r2_0.01_b.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/ 
## pca result directory:            r2_0.01_b.pca/ 
## input file:                      r2_0.01_b.lfmm 
## eigenvalue file:                 r2_0.01_b.eigenvalues 
## eigenvector file:                r2_0.01_b.eigenvectors 
## standard deviation file:         r2_0.01_b.sdev 
## projection file:                 r2_0.01_b.projections 
## pcaProject file:                   r2_0.01_b.pcaProject 
## number of individuals:           688 
## number of loci:                  22642 
## number of principal components:  688 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          688
##         -i (input file)                     /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.pca/r2_0.01_b.eigenvalues
##         -o (output file)                    /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.pca/r2_0.01_b.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8, xlim = c(0, 30))

Get values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)

Check R symbols for plot

#to see all shapes -> plot shapes 
N = 100; M = 1000
good.shapes = c(1:25,35:38,43,60,62:64)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
  scale_shape_manual(values=good.shapes[1:N]) +
  geom_point()
## Warning: Removed 640 rows containing missing values or values outside the scale range
## (`geom_point()`).

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 73 Levels: ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 73

Check the regions

unique(sampling_loc$Region)
##  [1] "North America"   "South America"   "Western Europe"  "Southern Europe"
##  [5] "Eastern Europe"  "East Asia"       "South Asia"      "Southeast Asia" 
##  [9] "West Africa"     "East Africa"     "Indian Ocean"    "Central Africa" 
## [13] "North Africa"    "Caribbean"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population     PC1     PC2     PC3      PC4      PC5      PC6       PC7
## 1        ALD 11.8985 3.85889 20.9680 1.880910 -33.6894 -6.76218 -4.533020
## 2        ALD 12.2322 4.18342 22.9889 1.947130 -37.7399 -7.03191 -3.158920
## 3        ALD 12.2253 2.88544 28.1043 1.912940 -28.8482 -8.83110 -6.905940
## 4        ALD 13.1452 3.48505 24.9198 1.685130 -34.7655 -4.18402 -1.369600
## 5        ALD 10.8278 5.66856 19.7776 0.404653 -29.8086 -4.80145  0.773310
## 6        ALD 10.4331 3.11243 24.7304 0.543678 -36.5059 -8.89831 -0.281582
##        PC8       PC9      PC10      PC11      PC12     PC13       PC14
## 1  8.98646  1.541180 -1.403160 -1.538210 -2.495950  2.47575  0.2189350
## 2 10.44180 -1.248800 -0.310105 -6.370290  2.657040  6.69481  0.0872054
## 3 12.47650 -2.779840 -2.797700 -6.614790 -2.228320  6.91340 -2.2488800
## 4  8.17536 -4.582430 -1.645110  0.443441  3.101420  1.47234  3.4897500
## 5  6.85580 -2.406870 -2.271780 -1.680240 -1.212820 -3.66094 -1.3134100
## 6  9.35066  0.164892 -1.311790 -6.877400 -0.186574  4.76815 -2.8549600
##         PC15      PC16      PC17      PC18      PC19     PC20      PC21
## 1 -0.7433950 -4.102250 -0.994354  1.553110 -1.304180  4.21418 -2.694100
## 2 -2.8777600 -1.860030 -4.347590  3.031520 -2.212580  8.05775  0.212026
## 3 -0.0648291 -2.618360 -4.343090  3.619210 -5.826280  3.74276  0.939447
## 4 -2.5178000  0.439961  0.672891 -0.396581 -0.544247 -3.22350 -0.838874
## 5  1.4977000 -3.805660 -2.805890  5.297080  3.246310  3.56786 -0.352491
## 6 -2.6450000 -2.608550 -1.146910  2.348040 -1.728110  3.43092 -1.194770
##        PC22      PC23        PC24      PC25     PC26       PC27      PC28
## 1 -7.171720  2.801470 -4.56413000 -1.700610 4.766860  -6.759340 -2.940520
## 2 -6.795390  0.934749 -0.98090100 -0.346114 4.846600  -6.659470 -3.202760
## 3 -2.548400  0.555746 -0.00772089 -0.574792 5.259230  -9.362370 -6.661650
## 4  0.861181 -1.694410  0.85624600  3.704020 5.871270   0.743232  0.746248
## 5 -6.540370  7.734690  1.40291000 -4.953990 0.562202 -12.439700 -3.259680
## 6 -5.894430  1.711360  0.80065100 -1.622210 6.891260  -5.290660 -0.827538
##       PC29     PC30      PC31      PC32      PC33    PC34      PC35     PC36
## 1 -7.38806 -1.93947  -7.68627  2.212990  -4.07782 5.66995 -2.279700 -1.99917
## 2 -3.39402 -1.81596  -9.67717 -0.447789  -4.24724 4.00922 -0.250749 -3.78005
## 3 -4.38048 -2.27692 -10.22660 -2.834660  -8.62314 2.31297 -2.826010 -2.10932
## 4 -3.02354 -1.68289   4.77615 -1.466760   2.27006 4.55003  1.537870  3.14186
## 5  7.22971 11.39920   4.69439 -2.691630 -11.42350 1.43781 -0.472601 -1.71645
## 6 -9.46076  3.04788  -5.13932  0.808606  -3.42825 3.32513 -1.893570 -2.58513
##        PC37     PC38     PC39     PC40      PC41       PC42      PC43     PC44
## 1 -10.99070  2.35826 6.281100 -4.23144   1.23823  0.0577522  6.207560 15.32210
## 2  -7.36251 -1.94576 6.414360 -5.36919   6.66074 -1.9747400  4.776720  8.01157
## 3 -11.83400 -2.60205 5.419670 -6.63548   9.67688 -0.1473240  2.141570  2.20297
## 4   3.67647 -7.31058 7.933170 -1.97308   6.95744 -2.2662100 -0.564778  4.27435
## 5  -9.44355 -1.53910 3.798990 -1.14780  -5.47507  2.2477700  8.635720  8.40626
## 6  -6.63073 12.76110 0.744103  9.13753 -12.24120  7.4629600 -5.337030 10.15090
##        PC45       PC46      PC47     PC48      PC49     PC50      PC51
## 1 -0.934501  3.8569000  -5.34155 -9.12502 -11.25280  1.78636 -11.74710
## 2  1.980790  5.0410600  -5.55942 -5.54413  -7.89991 -1.50608  -3.46455
## 3  2.263190  1.1788500   1.20225  1.07838  -3.96479 -5.18767  -1.93441
## 4 -5.385930  6.7931100   1.95438  3.49448  -2.68172 -7.88299  -2.05299
## 5  0.116632 -3.7034200  -7.79331 -1.33218  -1.45567  2.42951  -6.36327
## 6  2.781570 -0.0425489 -11.83510  2.74986   1.03119 -0.89795  -1.41032
##        PC52      PC53      PC54     PC55       PC56     PC57      PC58
## 1  2.067140  0.358824 -0.351160  6.45217 -16.561100 -3.41118 -1.277840
## 2  0.545668  0.909372  5.038720  6.27877 -13.832000 -4.87548  2.290730
## 3  2.938710 -7.580600  3.105390  1.23675 -13.720600 -0.57217  0.763471
## 4  1.268040  3.782910 -3.785790  2.78646  -0.279656  1.19117  0.336880
## 5 -0.728252 -1.003880  0.453712  3.13619  -1.738380 -3.28987 -2.215380
## 6 -3.565340  8.202730  6.491160 -3.51222   4.388420 -1.42062 -6.057310
##        PC59      PC60       PC61      PC62       PC63     PC64      PC65
## 1 -0.463471  0.725333 -11.947300 -2.912760 -3.3649600 -6.17048   5.40581
## 2 -3.294070 -1.452100  -5.657040 -3.004870  1.4187100 -1.72455   8.78201
## 3 -4.384280  1.215710  -8.646940 -1.688210  0.0826056 -5.34159   7.48674
## 4 11.793800 -7.436210  -0.645052  3.023270 -4.8547100 -2.61409   2.03267
## 5  5.101360  8.419080  -3.246330  0.501329  0.8120250 -7.35079   1.33704
## 6 -9.717840  8.018080   8.968660  1.053380 23.2322000 14.93660 -11.65460
##        PC66      PC67     PC68     PC69     PC70       PC71       PC72     PC73
## 1 -0.739923  0.890372  4.09764 -2.77532 -3.97646  -4.582900  -7.003680 -3.86519
## 2  2.870150 -0.129829 10.43760 -4.27367 -2.24687 -10.470700  -4.354890 -4.12743
## 3  1.384970  4.868260  6.14693 -8.26014  2.04405 -10.210400  -5.177980 -4.64743
## 4 -1.105010  2.834160  1.06600  7.31397 -2.80450  -9.040710  -0.128026 -2.03907
## 5  9.029660  0.998050  5.45647 -4.52312 -6.74390   0.459956   0.267002 -3.56658
## 6 -4.128160 -5.135860 -1.84614 11.18930  1.96809  13.454400 -16.504800 13.73320
##       PC74       PC75      PC76      PC77      PC78       PC79      PC80
## 1  3.28683   1.205050 -1.113160 -1.938320  4.043630   0.121096 -1.681540
## 2  2.50524   3.168370 -3.061710 -1.441830 -1.249120   4.856710 -4.413080
## 3  6.95597  -0.957647 -3.204570 -4.632810  5.581000   6.026590  2.059280
## 4 -5.26367   7.137910  0.694828  0.473936 -4.461780   6.310620 -0.971351
## 5 -3.34290   1.802250 -2.016620 -4.584130  0.584253   0.868004  7.008150
## 6  3.81356 -14.334100  9.148230  5.380290  6.502990 -19.781300 11.836000
##        PC81       PC82     PC83     PC84     PC85     PC86     PC87      PC88
## 1  6.179290   3.742580 -4.35778  6.06019 -5.37454  1.22585  4.69195  4.470680
## 2  1.751120  -1.717100  1.47410  9.95463 -3.84189 -4.28609  4.28630 -0.937928
## 3  0.389775   2.518990  1.93787 12.74170  1.62216 -4.65288 -4.13230 -2.550920
## 4  9.967140   0.243231  3.59179  4.14183 -2.33266 -4.77638  2.50605 -0.123554
## 5  4.165940   0.227164 -2.47538  3.59877 -3.39338  6.02402  7.51161  2.474580
## 6 -4.336970 -12.575000 -4.96907  1.51878  4.12724 -4.20452 -6.99142  2.720720
##       PC89      PC90       PC91        PC92      PC93      PC94     PC95
## 1 -1.34363  0.118700  0.9844460  -3.8977800 -1.440860 -3.171870 -1.36889
## 2  8.64144  1.893400 -0.0188444  -3.2230900 -0.834429 -2.978750 -2.86645
## 3  7.56701 -0.104621 -2.5311000  -1.4874800  2.528660 -9.868470 -8.62068
## 4  6.04669  4.415650 -2.5067200   1.0201900  5.591420 -8.926990  1.12416
## 5  4.29238  3.584870 -0.4815900   0.0974798  2.470260 -4.932240 -3.70299
## 6 10.87000 -8.036280  5.3530400 -10.7839000  0.459864  0.493365 -4.04304
##       PC96       PC97      PC98      PC99     PC100     PC101    PC102
## 1  6.93165 -1.3037100 -1.093510  -1.70220 -0.483900 -1.492360  2.40173
## 2 -3.67457 -4.8283400 -3.520380  -3.29133  2.981410 -0.496448 -2.66578
## 3 -8.57851 -9.4991500  4.151930  -2.38174  2.335690  0.901063 -5.87972
## 4  2.01164 -0.0282694  0.854353  -0.24969 -0.954521  4.310620  5.00815
## 5 -1.08490  3.6544700 -5.800560 -10.15810  2.489950  4.266330 -5.09592
## 6  1.31753 -1.5546500 -2.443900   2.37766  0.527705 -0.870260 -1.04518
##       PC103     PC104    PC105     PC106     PC107     PC108    PC109     PC110
## 1 -0.389261  1.286790 -3.80797  -2.26695  6.514070 -1.009370 -2.17540  3.457810
## 2  0.418618 -3.667640  3.40287  -1.25286 -5.356330  1.504250  5.77917  0.186282
## 3  3.277350  6.517350  2.36292   3.33355  1.736070 -2.700070  2.25668  4.306890
## 4 -2.873090 -0.185915  5.52946  -3.48252 -0.367124 -0.559500  6.59497 -8.278920
## 5  1.027970  0.879890  5.40572 -10.98460 -2.640520  0.281598 -5.78467 -4.027110
## 6 -5.646480 -6.122190 -8.47112  -2.55660  3.617870  2.021160  4.64364 12.412100
##       PC111    PC112     PC113    PC114     PC115     PC116     PC117
## 1 -1.446900  1.70651 -4.894200 -2.38792  5.122720 -4.276130  1.267230
## 2  0.717849 -0.89245 -0.593490 -3.11769  3.662380 -1.304200 -0.724111
## 3  3.134630 -1.72248  0.676184 -6.33110 -0.101371 -4.427790 -2.827220
## 4 11.258200  2.17220  7.036340  5.80008  3.340340 -0.770597  6.637560
## 5 -4.182230  6.40994 -2.749260 -4.25393  3.341230 -0.176298  3.536200
## 6  3.168250  6.47919 -1.530630  5.83320 -1.042500  5.055740 -8.840110
##        PC118    PC119    PC120    PC121    PC122     PC123      PC124     PC125
## 1 -1.6685300 -5.32720  1.95853 -4.73370  1.11994 -3.510790 -4.4559100  9.836470
## 2  0.0492695  3.31784 -2.97760 -3.64527  1.25323  0.341979 -2.5663600  3.728240
## 3  0.1905510  5.23757 -1.28177 -5.22730 -7.47138 -5.865210  0.0191948  6.365040
## 4 -1.6458000  4.00927 13.41000 -3.03806  2.83135  1.046140 -3.7309300 -8.959210
## 5  1.6369400 -6.29942 -1.28463 -3.73613 -9.37480 12.623800 -4.9531700 -2.636770
## 6 -7.6600000  4.91161  1.88427  5.14815  3.24284  4.011890 -2.8030300 -0.942569
##       PC126     PC127     PC128     PC129     PC130     PC131     PC132
## 1  3.781900  1.183500  3.079410 -1.237430 -4.000670  5.461280  7.111460
## 2  5.924740  0.209126  5.628260 -7.010920  0.750376 -0.320736 -1.211470
## 3  3.593720 -3.151720  7.903910 -3.167900  4.254680 -4.720320 -0.379222
## 4  1.947630 -1.728130 -8.076620 -1.096440 -8.423640  4.872620  1.470880
## 5 -0.825903 -2.270400 -0.464037  0.900414 -0.346241  4.837860  0.724787
## 6  4.431420 -1.218760  1.719750 -5.397170 -2.682860  0.760937  0.784101
##       PC133      PC134    PC135    PC136     PC137    PC138     PC139     PC140
## 1 -2.378120   1.687690 2.673780  4.04975 -0.145723 -3.46940 -8.072330 -0.611995
## 2 -0.583977   0.995366 0.278118  3.57282 -0.822583 -0.99227 -2.947450  0.852558
## 3  0.763785   3.016570 3.195090  4.71458 -7.164000 -3.95266 -2.295240 -0.183394
## 4  2.799490   0.135762 9.490730  3.58000  5.946930  5.79806 -3.517150 -0.600069
## 5  3.567450 -13.004800 4.827770 -1.05043 -0.542921  6.19495  0.825143  0.472428
## 6 -7.777590  -7.670570 1.062020 -1.22783  2.489490 -3.46471 -5.596910  1.897120
##       PC141     PC142     PC143     PC144     PC145     PC146     PC147
## 1 -0.997202 -5.236530 -0.870772 -9.329990  5.277620  0.264430  -2.14896
## 2 -8.905070 -2.311090  1.837050  3.201020  5.881510 -3.452580   5.51274
## 3 -8.576190 -5.757500 12.182000 -0.539536  7.198210 -4.657740   8.32013
## 4 -3.239030 -1.043220  3.544470 -0.830649  0.498468 -0.213647   3.50364
## 5 -1.520360  0.582979 -0.701247  1.623020  6.551760  5.747600 -11.01850
## 6  2.936710  1.779840 -3.898050 -7.489730 -0.700444 -3.800990   5.85017
##       PC148     PC149     PC150     PC151     PC152     PC153      PC154
## 1  0.313684 -7.849820 -3.866890  4.655750 -2.564390  1.627360 -3.9310000
## 2  1.709620  0.613923  2.784340  3.309270 -4.325070  0.419344 -6.3652400
## 3  6.773820  3.983930  3.300730  0.210383 -3.214640  4.895100 -1.8233800
## 4  4.757610  5.654400  4.296540 -0.827081 -3.770600  7.321870  0.0186542
## 5 -4.566200 -2.901290  0.600091  2.021960  6.230730 -3.753150  3.7881900
## 6  4.899100 -2.498930 -9.219470  1.232410  0.962651 -8.125360  0.5357390
##      PC155    PC156     PC157     PC158    PC159     PC160     PC161     PC162
## 1  3.13810 -1.46355 -3.013630 -6.622600  4.31151  2.046280  2.416730 -4.229680
## 2 -2.84686 -4.29443 -0.734930 -2.664550  9.99006 -1.788660 -0.388667  2.097670
## 3 -3.06354  1.15015  1.453980  9.361540  4.96861 -7.196140  5.069530  0.666307
## 4  4.18548  0.73210 -1.040920 -0.409835 -8.12447 -1.700210 -0.505560 -2.381160
## 5 -4.96705 -5.56457  3.627220 -1.292500  4.38248  4.740250 12.842600  9.023710
## 6 -2.77699 -2.17923  0.531267 -5.878390 -5.08949  0.238323 -1.981010 -0.318513
##       PC163    PC164     PC165    PC166    PC167     PC168    PC169    PC170
## 1 -0.977354 -1.57235  3.576590 -3.04232 -3.39439 -0.963023  6.37370  2.38532
## 2 -3.551370 -1.75907  5.701560 -5.98100 -6.55855  6.582750  1.76178 -1.73615
## 3 -3.243210 -2.37397  7.271910 -3.47200  4.19099 -3.045100  1.47464 -6.04968
## 4  4.256880  6.37976 -8.568080  6.55969 -6.54069 -6.252820  5.29830  1.80133
## 5  0.400542 -3.65314  0.254957  2.00497 -1.70544 -0.565616 -1.28996  6.96619
## 6 -4.076200 -1.91306 -5.453440  4.05089  3.90265 -1.436990 -2.79383  5.89461
##       PC171     PC172     PC173     PC174     PC175    PC176      PC177
## 1 -0.141260 -1.753580 -3.201160 -4.371390 -2.918110 -3.97661 -0.0998708
## 2 -8.414750 -0.693887  3.124380  1.726010  0.771151 -5.48584 -3.2261900
## 3 -4.390430 -2.527000  1.202060  5.838940 -2.815140 -1.78273  4.4973900
## 4 -5.234380 -3.848410 -3.252540 -2.116090 -1.929810 -4.26861 -2.6883700
## 5  1.294890 -8.476360 -0.429583 -4.852190 -3.054290  6.06653 -2.7902400
## 6  0.357824 -0.787292  2.563700 -0.379031  2.943810  6.72160 -3.6346900
##      PC178      PC179    PC180     PC181    PC182     PC183     PC184     PC185
## 1  1.11397  2.7705000 -6.41632 -0.718047  5.40606  2.768710 -0.157675 -5.296250
## 2 -2.04173 -0.1632810 -3.30494  3.162210 -1.66111  2.798700  2.504030 -2.528250
## 3 -1.59255  2.1579500 -2.50266 -8.285570 -6.36530 -0.365336  7.896620  6.849820
## 4  8.76007 -0.0967904  5.35715  5.651510  3.03202  8.365110 -1.639480 -6.466690
## 5 -2.19353  4.2507800  4.67608 -4.351820 -9.37398 -9.092460 -2.733110 -0.165382
## 6  3.75966  1.2323200 -3.29620 -2.309620  2.50132  0.352197 -1.674230 -3.168450
##      PC186     PC187     PC188     PC189    PC190     PC191     PC192     PC193
## 1 -5.29530  -1.32898  4.422620 -4.347050 -1.17324  5.215930  7.408460 -1.102280
## 2 -8.42934  -1.90825 -4.061890  2.266310 -3.85701  0.404375  0.915842 -5.226510
## 3 -1.17040   2.62380 -2.048570 -0.624122  2.77890 -2.452040 -3.036270 -4.019250
## 4  4.52913 -11.06170  0.650079 -5.651790  6.30209  8.814130  6.935000  5.774390
## 5 -1.27104  -8.09140  0.322796 -4.247770 -7.71188 -5.393390  2.957960 -0.873157
## 6 -4.10243   2.79037  3.951170 -3.347500 -2.11222 -2.412620 -0.794744 -0.164616
##        PC194    PC195     PC196    PC197    PC198     PC199     PC200     PC201
## 1  5.6279800 -8.54265 -4.436000  6.32859 -2.01467 -2.518600 -4.600950 -0.305185
## 2 -0.0337481  2.07801 -3.018280  1.35562  5.01774  0.498107  3.113090  4.860630
## 3 -3.6605100 -1.75431  2.724360 -2.22400  6.42381  0.175597  0.512386 -0.780544
## 4  3.5217200 -3.43567  0.648697  2.14305 -2.21421  3.045700  4.651460 -7.773500
## 5 -4.1743400  2.78024  2.247300 -2.46370  6.27945 -0.584857 -7.693870 -2.855090
## 6  0.0126222 -2.12899  3.167800 -1.31376 -1.27225  5.049300  3.290740 -1.016910
##       PC202     PC203     PC204     PC205    PC206    PC207     PC208
## 1  0.619995  0.511851 -3.566140  6.564960  1.05245  2.95804  1.171880
## 2 -2.242940  2.427600 -0.462441  5.246700  4.93263  2.08983 -4.565700
## 3 -3.012950 -0.943221  4.925950  2.369390 -3.13122  2.86687 -0.600310
## 4 -5.168800 -0.166476 -3.775910  0.124918  5.39915 -2.07519 -0.379023
## 5 -3.661670  5.837700  0.821897  5.528850 -1.26431  3.37289 -1.628840
## 6 -4.271770  1.435020 -0.816374 -0.938444  5.35043  2.90376  1.994700
##        PC209     PC210     PC211     PC212     PC213     PC214    PC215
## 1  3.0637500  0.326561 -7.263850 -3.866530  7.072110 -2.182620  4.69728
## 2  1.3800900  1.887270  0.583088 -0.649341  1.104540  2.639440  2.46155
## 3 -2.5983200  0.691781  0.876536  4.073340  1.429400 -1.384510  1.80725
## 4 -6.3484000  0.952962  6.126060 -1.116160 -0.687042 -6.839030 -1.59933
## 5 -1.8068900  7.318490  9.855260 -9.872910 -2.322490 -6.641690 -5.07957
## 6  0.0800038 -1.941610  1.521390 -2.767840 -0.987760 -0.378819  4.29534
##       PC216    PC217       PC218     PC219    PC220    PC221     PC222    PC223
## 1 -2.022560  4.51343  0.20998900  1.653830  8.45475 -3.32061 -0.544521  2.89980
## 2 -0.339576  5.05123  1.97419000  0.887665 -2.10645  2.86436  1.676340  8.68816
## 3  3.648840  2.84461 -0.00618843  0.225685 -2.87660 -3.37039  0.926413 -3.81371
## 4 -4.000240  6.52331  0.73299700 -1.356560  4.28038  5.32275 -0.711682 -1.93726
## 5 -7.584570  9.90239 -8.13707000  6.849920 -1.41234  2.28737  5.204920 -8.19837
## 6  6.970830 -1.15739 -2.42506000 -2.768490 -1.98175  1.06500 -2.492900  3.80023
##       PC224     PC225      PC226       PC227     PC228     PC229     PC230
## 1  1.832910  2.859500  1.6443100 -6.19263000 -7.130470  4.446880 -0.186256
## 2  3.509170  0.689052 -2.5059900 -0.71940900 -1.395450 -6.097530  3.278390
## 3  1.137120 -0.771971  0.0507143 -0.00445608 -1.598960 -1.434190  4.461100
## 4 -3.475090  1.261260 -5.2605700  0.21031600  5.988410 -3.607710 -3.866150
## 5  0.680956  1.012670  4.2677300  7.47408000  7.435590 -4.036900  4.173860
## 6  1.733650 -1.209130 -1.8056300  5.76595000 -0.292876 -0.774136  3.131270
##       PC231     PC232    PC233      PC234     PC235     PC236       PC237
## 1 -8.190690 -1.969220 -5.58511  2.3334900  0.914007 -3.948490  0.39069200
## 2 -2.158590  3.244850 -4.33197  4.2369200 -2.032430  0.388047 -3.91907000
## 3  2.374080  2.620280  2.32979 -2.9930200  0.118947  3.863260  5.97636000
## 4 -0.939006  4.875300 -4.73733 -8.7861700 -0.969056 -6.273040 -7.33774000
## 5  2.218690 -7.296970 -1.19657  0.3920730  4.239490  4.259520  0.00123725
## 6  4.212630  0.198037  5.16208  0.0533057 -0.412084 -3.455080 -5.43434000
##       PC238      PC239     PC240     PC241     PC242    PC243     PC244
## 1 -0.786844  0.7157320  2.112520  2.758120  3.183580  5.79723  0.121639
## 2 -0.676934 -3.7479800 -0.892240  0.625494  3.260310  7.39515  5.013700
## 3  5.734090  0.0970912  1.194330 -0.948146  0.150667 -2.08495 -3.908600
## 4 -1.221010 -1.2565900 -1.376880 -3.889080 -2.109230  5.72671  4.772850
## 5 -2.826820  2.7278800  3.535780 -0.662025 -4.142770  2.35954  3.869920
## 6 -3.027610  3.1014900 -0.977669  1.698270  3.128530  1.90610 -2.604320
##        PC245    PC246    PC247     PC248     PC249     PC250     PC251
## 1  1.5072500  5.00468 -3.50096 -0.104169 -5.228660  2.473540 -1.448730
## 2 -0.0311376  5.11494  8.98308  1.450940  3.224880  1.807720 -1.712260
## 3  1.2898800 -1.01829  1.40151  1.782440  0.469905 -6.430050 -6.003360
## 4 -3.4588500 -5.38979 -4.22454 -0.188380 -2.659040  6.129860 -2.268780
## 5 -2.1593500  2.57015 -8.55004  4.475860  2.570250  1.161970  5.016230
## 6  2.9842100  1.04132 -1.38910 -1.011830  1.720750  0.744594 -0.902192
##       PC252     PC253      PC254     PC255     PC256     PC257     PC258
## 1 -1.298770 -1.931920 -1.1629200  0.339418  7.094020 -1.279290  3.684530
## 2 -4.382710  0.351900  0.0794999 -2.590000 -6.585020 -0.154801 -0.762867
## 3  1.250490 -1.244750 -1.9325400  0.193598 -2.273380  1.204290  0.384443
## 4  4.427770 -0.509626 -5.2930400  5.100560 -0.822354  2.722500  2.415860
## 5 -3.527460 -1.120240 -0.4678880  0.698454  4.737260 -4.762300  0.586104
## 6  0.219882  2.287450 -1.0379600  2.254390 -2.952640 -2.717000  1.096950
##       PC259    PC260       PC261     PC262     PC263     PC264     PC265
## 1  3.349670  1.78552  0.36825300  1.460990  3.061050 -1.322840 -1.104860
## 2 -0.744912  2.41334 -4.10532000 -2.852850  0.048269  0.925379 -1.157720
## 3  1.726140 -3.28961  4.01317000 -1.463210  3.902070  3.992470 -2.276590
## 4 -3.011900  3.14689 -0.00388165  2.195830 -4.845990 -6.451570 -2.239920
## 5 -0.649155 -1.26044 -5.64032000 -0.444088  0.235939  2.850170 -0.175531
## 6  0.554808 -4.13210  1.95626000 -1.652790  0.373481  0.931666 -0.736384
##       PC266     PC267     PC268     PC269     PC270     PC271     PC272
## 1  0.668185  6.702660  0.660727  0.175709  0.283383  7.254500 -4.152170
## 2 -3.503440 -0.348669  2.387070  3.154040  1.737000 -0.168999  5.013310
## 3  3.448800 -0.903781  3.426960 -4.229630  3.132950 -0.174052  0.108654
## 4 -5.209450  0.015039  0.894539 -0.723044 -1.445870 -0.882903  2.763000
## 5 -2.044720 -3.073900  4.607310  1.090490 -1.984880 -0.529451 -3.132650
## 6 -3.013410 -3.695040 -1.634880  0.171090  0.238332 -1.163630 -0.196222
##       PC273    PC274     PC275     PC276     PC277     PC278    PC279     PC280
## 1  1.677970  7.81944  2.110250 -1.567860  2.952540  2.552990  5.61472  0.629756
## 2  7.060520  2.41947  8.498130 -0.146736  0.985365 -2.400750  5.79757  1.194720
## 3  0.432424 -2.51915 -1.744860 -0.786598 -1.582370  1.642210 -1.19837 -0.682976
## 4  1.304410  7.66498 -2.791560 -2.927290 -1.100930 -4.489640 -6.48909  1.840430
## 5 -1.513140 -3.17431  2.320440  5.630840  1.104180 -0.339380  1.65247  1.201840
## 6 -2.368800 -1.69213 -0.798591 -2.842390 -0.233671  0.846307 -1.25258  0.144443
##       PC281     PC282     PC283     PC284    PC285     PC286     PC287
## 1  0.692712 -3.488640 -2.001480 -7.559070  2.66131  1.510070 11.325000
## 2 -6.646920 -2.524960  0.964793  3.108780  6.42244 -4.000590  3.753810
## 3 -3.944820 -2.361430 -1.264450 -0.569989 -9.26104  0.140117 -4.610780
## 4 -0.748424 -0.698996 -0.320397  3.709160  6.70639  0.850139  0.753095
## 5 -0.183655 -0.374259  2.314610  0.549637  5.98890  3.744880 -0.776613
## 6  0.906798 -2.586710 -0.804821 -2.208050 -1.02308 -3.128990  3.222870
##       PC288     PC289      PC290     PC291     PC292    PC293    PC294
## 1  2.404590 -0.748032 -4.7580400 -1.147350 -0.524219 0.250731 -8.51649
## 2 -5.474040  3.367370 -0.5142120  0.026877 -0.619392 3.233640  1.62955
## 3  1.402480 -2.427300  5.4351900  3.741020  0.863480 5.053610  3.72361
## 4 -1.409670 -4.040910 -2.7730500 -0.744712  1.062200 4.419110  4.22575
## 5  4.027320  2.458220 -3.7326400 -5.947370  0.588445 3.251560 -5.64496
## 6 -0.261222 -1.457330  0.0563136 -3.039090  3.337200 2.624540  1.16674
##       PC295    PC296     PC297    PC298     PC299      PC300    PC301
## 1 -3.407200  1.82864  4.358320  3.67117  4.708780  3.9489900 -0.65519
## 2 -7.020300 -1.22006 -1.062970 -5.83735 -0.806433 -1.6187600 -5.00769
## 3 -4.137140 -3.70534 -1.662280 -1.85428  0.792453 -0.0375975  1.28279
## 4 -0.514099 -4.00937  0.995828  1.27443  2.837610  4.4402600  3.95943
## 5  2.241220  9.60890  2.787100 -1.66928  0.782261  6.8586600  2.41582
## 6  0.198664  1.20530  2.248890 -2.76466 -3.363880  0.2810570  0.50078
##        PC302       PC303    PC304     PC305     PC306     PC307     PC308
## 1  0.9491320 -2.54439000  2.94930  0.185465  1.657670 -3.038740 -3.515970
## 2  0.1957950  1.65509000 -1.86220  1.583750 -0.622924 -2.944350  3.634120
## 3 -1.8934600  1.67825000  1.83357  1.307230 -1.681070  0.615403  3.634690
## 4 -2.0623100  0.00725161 -7.64866  1.639910  2.337770  4.502890 -2.156110
## 5 -1.6569400 -0.50527200 -2.96218 -2.025100  7.262400 -3.084190 -2.560640
## 6 -0.0334848  0.40520000  1.69610  1.075850  1.985610 -2.159800 -0.263277
##        PC309     PC310      PC311    PC312     PC313     PC314      PC315
## 1  4.4662600 -7.220310  4.0035500  4.84828 -2.047380  6.459350  2.0530200
## 2 -2.6696100 -2.706070  0.0953163 -8.36116  5.867400 -0.686917 -3.6883100
## 3 -0.1056200  0.740668 -1.1779200  5.50363 -2.451050  1.676270 -2.4761100
## 4 -0.7031060 -2.958080  3.2981200 -2.76237 -4.030790 -4.244030  0.0271444
## 5 -0.2799270 -1.455730 -2.5699200 -0.30904 -1.524430 -2.266380  5.5629200
## 6 -0.0742412  2.285170 -2.0177500 -1.33069  0.114855 -0.937781  0.1677240
##      PC316     PC317      PC318      PC319     PC320    PC321     PC322
## 1  1.60793 -0.928904 -2.8963400 -3.8314300  3.322600 -5.91347  6.957620
## 2  3.40482 -5.136700 -1.5446300  0.0540386  1.899880 -1.29758 -0.866601
## 3 -1.37351  4.842630 -3.3204600 -0.8597600 -4.035920  2.64319  3.593210
## 4 -1.07955 -2.844890 -0.0536269 -0.4826110 -0.696679 -4.43431  0.534557
## 5  2.17793 -1.818010 -3.1465600  1.0611100  4.082690 -1.00184 -2.522460
## 6 -1.25075  0.276142 -0.1193020  0.8655870  0.432312 -1.31602  0.584220
##       PC323     PC324    PC325     PC326     PC327      PC328    PC329
## 1 -2.261220 -1.946790 -1.06225 -2.423140  6.433680 -9.1127800 -3.79751
## 2  1.858560 -0.443009  3.72992 -0.435356  2.911360 -2.9544200  1.53054
## 3 -2.008980  0.507728  0.34785 -2.946460  2.778320 -0.0304957  0.68032
## 4 -3.039660 -1.733570 -5.98391 -4.679300  0.176414  2.8618600 -2.10652
## 5 -1.942430  2.054440 -1.89354 -1.035090  2.462550  6.1113600  1.38743
## 6 -0.584371  0.297172 -2.25775  1.915640 -1.834600  0.7316520 -1.10027
##         PC330     PC331    PC332      PC333     PC334     PC335      PC336
## 1  -0.6697460  2.335020 -9.02480  7.0954300  2.481280  1.416810  5.1544100
## 2   4.9301800  1.395380 -1.58689 -9.4036500 -1.351530  5.762420  0.0203044
## 3 -11.0183000  3.702350  3.29380  4.2603200  1.536270 -0.779746  5.4327500
## 4  10.3303000 -0.949576  1.66993 -1.5769700  0.917715 -1.427320 -4.7079600
## 5   5.2508600 -4.016490  3.35820 -0.0464337  1.840960 -1.538450 -3.0274400
## 6   0.0381149  2.240540  2.30567  1.0953200  1.511930  1.653160 -4.3168700
##       PC337    PC338     PC339      PC340     PC341     PC342      PC343
## 1  0.595132 -2.10366  1.533210  0.4307060  1.221380  1.715490 -4.3898300
## 2  5.754790  1.85071 -6.060160 -4.0798600 -1.418390  0.912188 -0.6976580
## 3  5.779580  5.84280  1.646920 -0.0883349  0.473957 -3.012210 -4.8430400
## 4 -1.015270 -3.17295  0.647781  2.6516200  0.290116 -3.995180 -1.0811500
## 5 -0.468120 -7.29827  3.243030  3.6592200 -4.561820  7.072780  5.8585300
## 6 -1.614560  1.47322 -3.687260  1.6110900  3.520520  0.759446 -0.0980159
##        PC344     PC345     PC346     PC347     PC348     PC349    PC350
## 1 -0.6908760 -2.248950  1.599820 -1.144390  4.664490 -0.390483  5.10194
## 2  0.0941007 -1.346530 -2.941950 -1.900770 -2.139460  1.842290 -0.59382
## 3  0.1265370  0.708169 -3.135280 -1.683880 -1.383580 -1.425310  5.37760
## 4 -1.5620700  0.618369 -0.473878 -0.779087  2.097720 -7.439810 -8.00980
## 5 -3.3422600  0.816991 -3.204700  6.875240  5.791930  1.900770 -5.86972
## 6 -2.7558500  3.541620 -1.532780  1.516360 -0.295384 -1.507070  1.02258
##       PC351     PC352    PC353     PC354    PC355      PC356    PC357    PC358
## 1 -0.185552 -0.782399  2.06013 -1.428750 -4.58035 -2.8537900 -3.39104  5.70659
## 2 -2.909170  0.293619  6.19210  3.683840  5.92079 -6.5299700 -8.58787 -3.98967
## 3  3.892560 -5.713910 -1.95532 -4.528570  1.74027  0.4775880  3.58162 -4.22597
## 4  1.868760 -4.097560 -2.44472  1.130900  2.71606 -5.6525500 -4.07519  3.07901
## 5 -0.088897  1.599600  1.58747  0.302451 -2.95658  3.5443400  6.55612  1.75672
## 6 -1.927820 -1.549190 -3.31106 -1.106010  0.18390 -0.0697238  3.15082 -1.25369
##      PC359     PC360   PC361    PC362     PC363     PC364     PC365      PC366
## 1 -1.19079  3.792470 2.29117 -5.15170  5.281940  8.598590  4.469220 -1.6559300
## 2 -5.87414  0.797655 3.94421 -5.00693  2.852630 -0.487419 -4.192020 -2.1086800
## 3  3.32671  1.534180 0.44207  3.84969 -0.932832 -7.460330  6.197250  0.0573447
## 4 -6.81197 -0.991286 1.18064  1.86411  5.780690  4.180990 -3.234230  1.4324400
## 5 12.19500  2.255990 2.33111  6.89192  0.771399  2.849610 -2.229590  3.2951100
## 6 -2.54109  1.357850 2.33792  1.20592  0.136482 -1.206020  0.220469 -0.5791840
##       PC367     PC368    PC369     PC370      PC371     PC372     PC373
## 1  4.560030 -2.963340  3.28228  8.800210 -6.6371000 -1.743110 -3.347280
## 2  2.604130  5.550430  3.22494  4.077540  7.5722000 -7.964400 -6.674970
## 3 -1.603570  2.750110 -2.21031  1.880560 -1.5570200  3.056490  1.743550
## 4 -0.664225  0.398964 -3.67413  1.844910  2.7492600  2.967250  1.157680
## 5  3.709460  1.158250 -6.95520  0.381659  0.3821940  1.416710 -0.541871
## 6 -2.640120 -0.116021 -1.27714 -1.819890  0.0230128  0.848949 -3.660630
##       PC374     PC375     PC376     PC377     PC378     PC379     PC380
## 1  4.666990  3.093120  0.147127 -5.417010  0.860193 -0.477035  0.304496
## 2 -0.902418  0.703398  1.904310  3.267530  0.169598 -0.765301  5.625600
## 3  4.135700 -5.816000 -4.018550 -0.320740  1.950950  4.306870  0.978982
## 4 -2.588750 -0.293278  1.692370  0.472464 -2.868220  2.649880 -2.855800
## 5 -0.241666  6.016770  2.677910 -3.250850  3.940460  4.165690 -6.079370
## 6 -4.742270 -0.464228 -1.374020  0.798703 -2.586160 -1.565320  1.618960
##       PC381     PC382     PC383     PC384    PC385     PC386     PC387
## 1  2.124450 -0.882423  0.609469 -0.592757 -4.49149  0.610937  1.650770
## 2 -5.022800 -6.476660 -0.841114  0.620252 -5.74996 -1.856400  4.229190
## 3  0.244552 -2.306170  2.842770 -0.376082  4.20329 -3.561100  2.199490
## 4  3.353390 -6.495190 -1.600340  3.912110 -2.87357  5.196310  1.350250
## 5  2.726300  4.342850 -5.076050  3.030210  3.33195  7.579190 -1.662490
## 6  1.859110 -2.037550  0.705680  1.421200  1.46382 -1.939330 -0.520467
##       PC388     PC389     PC390     PC391     PC392    PC393    PC394
## 1 -3.065200  8.044350 -0.640716  5.392230  2.793180  2.56680  3.32342
## 2  3.128960 -0.731143  0.628872 -0.706485  0.870649  7.56201  2.95395
## 3  0.488453  1.868410 -4.262500  3.876070 -3.609700 -3.45799 -3.38279
## 4 -4.697910 -1.395930  2.831990  0.761245  1.596570 -0.53367 -6.84325
## 5  2.026030  1.841950 -2.373110 -4.806120 -6.442250  6.03059  2.24633
## 6  0.451781  0.754478  0.325290 -2.018220  0.228110  2.04450 -2.36051
##        PC395     PC396     PC397     PC398     PC399    PC400     PC401
## 1  6.8458400  2.601220 -3.902850 -4.784300 -1.103490 -2.34627  5.813530
## 2  2.2776900  1.800140  1.737280  2.343510  2.579260 -3.18111 -0.250946
## 3  2.3516900 -4.627460  4.260240 -1.274330  2.103960  2.20815 -3.583430
## 4 -4.6978800 -3.526580  0.563934 -1.589780  4.390240  2.72954  2.931680
## 5 -0.0425012  2.741770  7.498050 -0.678743 -4.734170  3.42736 -3.508500
## 6  1.1488100 -0.793504  0.334757  0.321808 -0.719136  1.21465 -2.098070
##       PC402      PC403      PC404     PC405     PC406     PC407     PC408
## 1  2.614000  1.5340100  0.5677840  2.042300 -2.877800  5.413290 -0.460487
## 2 -3.791240  0.0993136 -0.7526180  3.235600 -2.885100  3.101630  2.555550
## 3 -2.865430  2.7200200  3.9167600 -0.601597 -0.959362  5.246560 -2.492140
## 4  0.802927 -0.1342930 -0.0590634 -0.477474 10.004000 -1.160220  0.965827
## 5  3.129410 -4.4339300  2.9766200  3.267150 -0.242847 -0.985181 -1.252730
## 6  1.534780  1.6311600 -2.2387000  0.189547 -0.935675  2.900690 -1.135740
##       PC409     PC410     PC411     PC412    PC413     PC414     PC415
## 1  5.777350  0.186193 -3.697420  1.053840  3.70827 -5.480940  1.928650
## 2  0.814351 -8.592350 -4.871200 -0.858377 -7.73860  1.948890 -2.902160
## 3 -0.948669  2.233200  0.256813  6.531920 -2.19023 -0.666166  6.522690
## 4  4.144810  6.588860 -1.085550 -1.382170 -1.37113 -0.703932  0.413409
## 5 -7.554420  8.089660  0.699263  7.572820 -2.09022 -0.919366  2.917860
## 6  3.467400  1.091660  0.038636 -2.278600  0.25542  2.264440 -0.663749
##       PC416     PC417     PC418     PC419     PC420      PC421     PC422
## 1 -0.960798  0.534835 -1.627320  7.447220  6.952380 -9.5273000 -6.048750
## 2  5.853670 -1.761720 -0.360365 -8.859380  3.452240  0.0801953 -2.015720
## 3 -2.638750  1.807090  2.227800  0.517441  1.582060 -1.3469500 -0.587567
## 4  7.248530 -4.305390 -3.785030 -1.855380 -4.860160 -1.4193400  4.766900
## 5  6.631460 -1.010210  1.917000  4.946750  2.139460  5.3060900  1.107250
## 6  0.218753 -0.605274  0.706693 -1.697190  0.854723 -2.0809100 -1.260280
##        PC423    PC424     PC425    PC426     PC427     PC428     PC429
## 1 -2.4164200 -4.19649 -4.862450 -2.59090 -0.718711 -2.003220  0.149758
## 2 -0.0309085  2.86232 -4.261170  7.32878 -4.533250  6.446920 -0.684153
## 3  4.0501300  4.85166  5.264650 -3.99502  1.338970 -0.254161  0.777067
## 4  0.1984860  1.55305 -4.039200 10.39760  1.798110 -3.909950 -4.878780
## 5  2.5580600 -4.82430  0.581627 -2.12373  7.219020  3.301050 -0.278043
## 6 -0.5154770  1.14752  3.015220 -2.58170 -1.167980  0.752221 -2.054580
##       PC430    PC431     PC432     PC433     PC434     PC435     PC436    PC437
## 1  6.903710 -3.23594 -1.899440  1.755130 -1.009700  1.032420  0.605694 -5.14891
## 2  0.111747  4.16981 -0.429687 -0.896585 -1.734790 -2.926040  5.170750 -2.07149
## 3  3.422970  2.07135  2.496960 -7.949870  3.549730 -3.927070  0.626145 -3.80836
## 4  0.155235 -3.79257 -2.710380  9.570040  6.689530  0.696102  2.520570  4.68329
## 5 -3.017840 -1.67729 -2.455800  4.004330 -3.256930  2.615680 -2.162880  9.54156
## 6 -2.734310  0.76756  0.593651  1.942550  0.971759  0.034676  1.244580  3.85916
##      PC438     PC439     PC440    PC441     PC442     PC443    PC444    PC445
## 1  5.47715 -0.333951 -0.710384 -0.87455 -1.069980 -1.108100 -2.04437  1.68954
## 2  3.52655 -4.197790 -5.272720 -2.01565  7.289110 -0.748792  2.66491 -7.08296
## 3 -2.28515  3.220360  2.532970 -0.24209  0.435932 -1.061440 -3.83419 -2.07934
## 4 -8.25464 -3.819230 -7.421120 -1.55453  1.797960  5.747530 -7.55522  1.54181
## 5 -6.22470  4.017720  4.745770 -5.10651 -2.776900 -1.659780 -2.71868 -1.31831
## 6 -1.78598  0.993170 -0.560315  1.57821 -2.271400  0.712352 -1.80377 -3.87990
##       PC446     PC447    PC448     PC449     PC450     PC451     PC452
## 1  1.609370  5.777400 -2.78657 -4.841920  6.575910  1.955260  3.291230
## 2 -3.878400  1.257500 -2.74408 -1.613860  1.263820 -5.885220  4.668970
## 3 -2.621180  2.010490  2.48831  3.940340 -5.675830 -3.944010 -0.403480
## 4 -0.298918  3.403740 -1.93151 -1.045810 -4.763210  1.184080  0.768264
## 5  2.381300  4.334380  2.00908  3.423350  1.921050  0.619247  1.741630
## 6  1.689620 -0.944392  2.00521  0.797348  0.386529 -2.287020 -2.655610
##       PC453     PC454     PC455     PC456      PC457      PC458     PC459
## 1  2.740960  2.007130 -7.856160 -3.908200 -14.401300 -2.2873200  0.458626
## 2 -2.301980  2.597060 -3.995680  6.332570   8.113390 -8.6148000 -3.382040
## 3 -4.533070  3.036120 -3.583590 -2.309730   1.026400  0.7367130  5.642500
## 4  0.761276  1.089780 -7.264330 -5.383180   0.543375  0.0216743  3.115990
## 5  3.224010 -6.451810 13.767100 -3.217300  -0.284549 -6.8200900 -1.865860
## 6 -0.037085  0.915636 -0.770887  0.343602   2.740980 -0.3263570  0.144887
##        PC460     PC461      PC462     PC463     PC464     PC465     PC466
## 1 -5.8307300  0.335195  0.8371280 -3.947950  9.456580  1.378480 -3.283470
## 2  3.0782400  5.823550 -1.5176700 -6.812190 -5.074650  0.193145 -2.545930
## 3 -5.6526700  2.398080  0.2947330 -0.635040  3.685810 10.201300 -1.725050
## 4 -1.2320100  3.640730  0.0720409  4.277130  0.140552 -0.347928 -1.591960
## 5 -1.1100300 -1.169190 -5.0132400 -0.817523 -4.903980 -1.229360 -5.018170
## 6  0.0445979  4.160530 -0.5825320  0.173514 -1.391120  3.110940 -0.340302
##       PC467      PC468    PC469    PC470     PC471     PC472     PC473
## 1 11.259500 -6.6065100 -2.41028 -5.17415  4.767700 -2.066910  1.625840
## 2 -3.045640 -0.5041950  3.04394  4.78015 -0.793591  1.275440 -5.215130
## 3  2.103520  6.2128000 -1.38180  1.59864 -4.149680 -0.985679  6.012580
## 4 -4.241820 -1.0168000 -5.00685 -4.47607  2.270440 -3.317540  3.226550
## 5 -0.539145  0.0830878 10.16960 -2.43238 -2.670970 -1.868420 -1.636530
## 6 -0.618192 -1.5144400  1.11818 -0.65934 -1.397300 -3.780960  0.571235
##       PC474     PC475      PC476     PC477      PC478      PC479      PC480
## 1 -3.716030 -0.904873 -3.5439600  1.192140 -0.0160798  0.9148390 -3.6783700
## 2  0.231343 -0.436475  4.7219900 -0.598404  3.6480200  8.5704700 -3.8117800
## 3  1.686770 -2.846930 -4.1807800  2.574220 -3.3559800 -3.0842300 -0.0782256
## 4  4.589050 -4.128190 -2.6962900  3.385050 -1.0951000 -2.2036300  2.3830800
## 5  1.155690 -5.976330  3.2007000 -1.321310  3.5089600  5.4931300  2.1447300
## 6  0.677919  1.605820 -0.0390345  3.205160  0.5444620 -0.0604682 -2.6974100
##        PC481     PC482     PC483     PC484     PC485     PC486    PC487
## 1  0.0416064 -2.455510 -3.726990  2.021170  3.216720 -0.738357 -6.83587
## 2 -3.1901300 -2.004330 -3.926620  1.385990 -2.052160  7.608510  1.93646
## 3  1.2533500  0.710538 -0.487847 -4.668710  4.968950  0.191199  3.39163
## 4 -1.5768300 -8.989680 -1.225960  2.243270 -1.436910 -2.304290  1.72469
## 5  0.9552680  0.871639  1.920940 -0.739996 -0.858372  0.974944 -2.97047
## 6 -0.4576980 -0.602847 -1.728920  2.415730  2.118060  0.207212  1.83863
##      PC488     PC489     PC490    PC491     PC492     PC493      PC494
## 1  3.88737  4.943570 -3.893890  9.65027 -2.760150 -2.728860 -4.5912600
## 2 -4.54673 -2.473430  4.058000 -7.58451  1.900550 -0.448330  3.0949600
## 3  2.86445 -2.070220  2.462650  2.51220  2.058280  5.339860  2.6515900
## 4  1.68421 -1.199610 -0.530965 -1.33119 -0.975906  3.893280 -0.0340843
## 5  3.82968 -1.900950  1.625210 -2.45836  5.390610 -3.771980 -2.1347900
## 6 -1.46312  0.554516  2.867900 -1.58279  0.983982 -0.891599  0.4064800
##       PC495     PC496      PC497      PC498     PC499     PC500    PC501
## 1  2.351490  7.163820 -8.2409200 -3.9120700 -9.062020  3.441160 -3.08368
## 2 -5.062370  0.855031 -2.7127200  2.3142900  1.586490 -1.507090  5.28443
## 3  5.551460 -2.132360 -1.4278500  5.2343000  3.873320  5.284640 -8.04346
## 4 10.054500 -5.523180  5.5164200  1.8302400  3.816660  0.187067 -6.74254
## 5 -0.230529 -0.137757 -7.7412500  0.0808266 -5.174790 -5.542820  1.94645
## 6  0.258723 -0.700783 -0.0400363 -0.6213620  0.955429  2.011610 -2.18172
##       PC502     PC503     PC504     PC505     PC506     PC507    PC508    PC509
## 1 -0.419969 -2.995380  5.961550  5.361510 -1.462250  1.691420  4.43112  1.01300
## 2  0.763600  3.303000 -2.168690 -1.453230  3.321350 -2.424390 -6.57481 -2.74568
## 3 -0.855195  0.200559  2.670440  0.968265  2.032350  3.530460  1.67844 -1.72968
## 4  2.425220 -1.481630 -2.864290  0.309452 -4.523930 -6.332410 -7.21716 -9.59828
## 5  6.997940  5.287440 -0.247115 -3.957960 -0.768855 -5.349030 -3.52164  2.70210
## 6 -1.222820  1.850020  3.368660  0.123468 -1.643740 -0.116247  1.02118  1.80872
##      PC510     PC511     PC512     PC513     PC514     PC515     PC516
## 1  1.14270 -0.593595  4.568530 -0.783411  6.798120  3.465030 -1.386550
## 2 -5.43281 -6.073480 -9.238620 -3.930640  4.714750  3.786550  5.979870
## 3  1.92742  1.038900 -0.758543  3.194210 -1.424260  3.861540 -1.934080
## 4 -2.61054  3.113330  4.754330 10.433200 -0.938057 -0.495519 -3.047320
## 5  5.49661  0.799706 -3.859220 -1.651530 -2.488160 -1.019560 -0.313847
## 6  4.25996  0.588597  0.363282  0.333997 -1.110800  1.084420 -0.047089
##        PC517     PC518     PC519     PC520     PC521     PC522     PC523
## 1  0.8358150 -0.876649  0.585292 -3.637650  1.655300  0.501426  3.003400
## 2  1.7188800 -1.044720 -4.890080  8.956480 -4.874770  6.440240 -7.659130
## 3 -1.9131200 -1.373370  6.798190  1.718600  7.593670 -1.421980 -0.778801
## 4 -0.8310570  1.023570 -1.006960 -3.737210  1.595190 -0.422374 -0.502342
## 5  0.7935220 -2.730800  0.691550 -4.445310 -1.428650 -1.608260  0.645048
## 6 -0.0785162 -1.464450 -1.750720 -0.275537  0.461668  0.281456 -0.719800
##       PC524     PC525      PC526     PC527      PC528     PC529     PC530
## 1  0.142158 -0.266102  1.5461200  2.902880 -3.2089000 -0.848327 -3.410050
## 2  0.839364  5.432040  2.0222800 -3.053660 -3.1449600 -1.215720  6.297870
## 3 -0.569315 -3.984880  0.0628193 -4.170970  0.8654790  1.617450 -3.183280
## 4 -2.952250 -1.144010 -0.6536370 -3.288340  8.6818500 -6.509320 -3.589260
## 5 -1.499840 -0.112070 -2.4087800  1.257200 -4.0401000  0.260552  0.485898
## 6 -2.144600  0.108141 -0.6499950 -0.186286  0.0153899  0.111869  1.455910
##       PC531    PC532     PC533     PC534     PC535      PC536     PC537
## 1 -3.781640  1.16091 -0.199297  2.688420 -3.921750  0.9269860 -6.306970
## 2 -1.578350  1.83128 -1.105080 -3.940920  2.397470 -2.7789500  6.376910
## 3  7.645230 -2.08849  5.024420 -3.078700  7.033480 -2.6912400  4.013870
## 4  0.709288  1.40429  0.118824 -0.606427  0.373311  2.8674300  1.767860
## 5  2.082530 -1.38143  2.558050 -0.135362  1.782990 -2.0086800 -2.512510
## 6 -0.668018 -1.36199 -1.563150  0.325076 -0.269977  0.0203826 -0.178135
##        PC538     PC539     PC540     PC541     PC542    PC543     PC544
## 1 -8.0671200 -1.708610  1.948850  2.694340 -1.408230  2.19445  1.809490
## 2  0.9573450  5.234290 -6.633550 -4.090920  2.165270  1.91719 -5.977660
## 3  7.2265100 -0.134621  5.578220 -2.762610  4.422340 -2.61493  3.222350
## 4  2.9975300 -2.940770 -1.263460  3.066620 -4.970990 -7.31893  1.014820
## 5 -0.2653890 -2.483520  0.549056 -1.825210  0.372149 -1.27103  0.922858
## 6  0.0719487  1.748010  1.272710 -0.845579  0.985204  1.41856 -0.889595
##      PC545     PC546    PC547     PC548     PC549     PC550     PC551     PC552
## 1  4.40683 -5.409050  5.96768  0.398777 -0.702313 -1.357150 -2.232910  2.326290
## 2 -1.43173  6.598210 -1.12831 -3.808900  0.802527 -3.136790 -1.408970 -1.345590
## 3 -3.86566 -0.439519 -4.08789  3.048990 -5.735550 -2.143700  0.969942 -3.183430
## 4 -4.05053 -0.149712 -2.68935 -4.168250 -2.612520 -0.710597 -0.938771  3.164760
## 5 -4.88518  3.106530 -4.32789 -1.546010 -2.443470 -1.167400  5.067440 -2.298670
## 6  2.84320 -0.582349 -2.30697 -1.589510  0.670184 -1.329660  0.221059  0.586166
##      PC553      PC554     PC555     PC556     PC557     PC558     PC559
## 1  0.79319  0.0470201  1.981870 -1.703330 -2.061230  1.562570  0.825182
## 2 -3.32840  2.2720900 -0.506293 -0.436124  2.560870 -1.030220 -1.628560
## 3 -7.12748 -9.1333200  1.353370 11.436600  5.192520 -1.752540  8.428580
## 4 -4.10860 -1.9656600  0.557320 -1.547850 -0.705234 -0.601586 -4.642500
## 5  1.67823 -2.2403200 -0.481799  2.929270  2.811560 -2.738110  4.174570
## 6 -1.75585 -0.2725420  0.552749  0.570284  0.514863 -0.704511 -1.643480
##       PC560     PC561     PC562      PC563     PC564     PC565     PC566
## 1  0.420481 -3.793130 -0.414296 -0.0600313  5.563500  2.361350 -0.262055
## 2  2.953380  4.280560  0.601785 -2.9315400 -2.021430 -7.079140  4.836760
## 3 -3.234820  1.132630  0.694219  3.9883600 -5.318680  4.967230 -6.210310
## 4 -0.712875 -0.518147  1.870550 -0.9059940 -6.691840  2.143030  5.938940
## 5  0.791369  0.524604 -0.367272  4.1172100  0.705802 -1.575030 -4.433060
## 6  0.792963  0.647974  0.339495  0.4210210  0.127645  0.483052  1.349320
##         PC567      PC568     PC569      PC570     PC571    PC572     PC573
## 1  0.29151600  0.7719060  0.306834 -1.0000500  3.047820  3.18003  2.843420
## 2  0.59613900 -3.1383900 -4.055770 -0.0871318 -3.504170 -1.41691 -2.354610
## 3  2.98454000  0.0442454  4.062920 -0.0503942 -3.219580 -1.66863  2.486230
## 4 -0.00271435 -0.2810280  4.301680 -0.4060630  1.402390 -1.75967 -1.530670
## 5  0.74239100 -1.5467500 -3.770110 -2.8879800 -1.106600  1.11875  0.442358
## 6 -1.60319000 -1.8204800  0.887334  0.3000610 -0.726671 -1.73700 -0.634761
##        PC574     PC575     PC576     PC577      PC578     PC579     PC580
## 1 -0.3358360 -1.237310  0.665701 -0.605718  0.0551401 -1.007290  1.793220
## 2 -2.9567100  1.861450 -3.139170 -3.050840 -0.8580520  3.980700 -3.359080
## 3  2.6076500  1.443250  0.584546  4.961950  1.2243800 -4.983430 -0.230114
## 4  5.7013300 -1.543000  0.630317  1.868920  1.1734300 -1.295220 -0.954495
## 5 -0.0129894  0.115781  0.323865  0.380932 -0.8205720  0.449513  0.579594
## 6  0.6042500  1.319940 -0.597215  1.652550 -0.4444370 -0.331132 -0.569123
##       PC581      PC582      PC583     PC584     PC585     PC586     PC587
## 1  2.720270 -0.4616770 -2.3320300 -1.475010 -1.899580 -2.866020 -1.932290
## 2 -1.834220 -0.0690979  3.1247700  4.688780 -0.123124 -1.236990 -2.422160
## 3  3.156670  0.2686570  0.7222070 -6.301350  0.217357  1.439610 10.159800
## 4  0.814130 -3.2691900 -0.0419468 -1.381850 -0.676409 -0.266177  0.927464
## 5 -2.802740 -1.7489800  2.9339800 -0.163633 -1.765400  1.683580  1.384650
## 6 -0.466528  1.2506900 -0.4928560 -0.626011 -0.526173  0.292149  0.573847
##       PC588     PC589      PC590     PC591      PC592     PC593     PC594
## 1 -0.141236 -0.664032 -0.8161300 -0.207564  1.4935100  0.316396 -0.667961
## 2  2.604660  2.575450 -1.5535600 -0.500613  0.0649269 -0.277733 -0.694854
## 3 -1.298170 -3.526050  4.8350800  1.476620 -4.5997500  3.266830  4.748050
## 4 -0.943995 -2.347760  3.7155600  1.677190 -2.1261300 -1.451200 -0.320646
## 5 -2.474000 -1.555780  0.0218849  0.402409 -0.2095010  0.888746  0.921578
## 6 -0.722939 -0.379269  0.2168900  2.065300 -0.7622940 -1.045700  2.085040
##       PC595     PC596     PC597     PC598     PC599    PC600     PC601
## 1 -0.549683 -0.246980 -1.251890  0.414057  1.418840 -2.87165 -0.469797
## 2  0.775012 -0.540705 -1.738300 -0.582886 -0.645756 -1.21956 -0.324708
## 3 -1.797690  0.313111  3.205810 -2.811320  0.922040  1.41111  1.903940
## 4 -0.216478  1.009920 -0.662947  2.115990  0.858634  1.88245 -0.748201
## 5 -2.515350  0.499773  1.423690  1.116750  1.089660 -1.82501  2.321740
## 6 -0.131602 -0.199249  1.387420 -1.301500 -1.145690 -1.21221 -0.328161
##        PC602     PC603     PC604    PC605      PC606     PC607      PC608
## 1  0.2907260  1.473110 -0.336378 0.100602 -0.6851090 -0.781235 -0.3115810
## 2 -0.4071800 -4.197880 -1.868530 1.069350 -2.8481800  0.505759 -0.0393097
## 3 -1.9962200  3.364780  1.648440 2.445660  2.8466400  2.121700 -0.0818179
## 4  0.0959698 -0.898860  1.091910 0.178327  0.0165746 -0.598874  0.4995670
## 5 -0.8577700  0.662205  1.237620 0.781759  0.4888930  1.386450  0.0132441
## 6  0.2848460  0.041936 -0.780476 0.672115 -0.7908480 -1.269890  1.0314300
##       PC609     PC610     PC611     PC612     PC613     PC614     PC615
## 1 -0.221057  0.442800  0.521209 -2.357190 -0.850230  0.591296 -1.296590
## 2  0.200218 -2.238000 -0.872753  1.246050  0.438104 -0.094276  0.505613
## 3 -2.850980  0.723151 -0.587935 -0.245363  0.224606 -1.587410 -3.535470
## 4  1.108090 -0.402738 -1.670960  1.746220  4.937510 -1.617230 -1.059860
## 5 -0.206420 -0.885669  2.159730 -0.637576  0.935158 -0.578438 -1.365530
## 6 -0.565625  0.069130  0.532827 -0.571052  0.621579  0.243557  1.545660
##       PC616      PC617     PC618     PC619     PC620     PC621      PC622
## 1 -1.632250  0.6660840  0.389295  0.276354 -0.774150  0.953437  0.0521014
## 2  0.138709 -0.5827020 -1.151380 -0.435772 -1.163980 -0.306299  0.3336960
## 3  2.235280 -1.4992200  0.896373  0.308423  0.440106 -1.725590  1.5645700
## 4  0.438829 -0.0687455  2.197370 -0.360688 -1.277530  0.288341  0.5624200
## 5 -0.236214  0.8144490 -0.483606 -0.289504 -0.841674  0.392056 -0.8320790
## 6  0.818821 -0.6228910 -0.699817 -1.189440 -0.185710  0.129525  0.1670970
##        PC623       PC624      PC625     PC626      PC627     PC628     PC629
## 1  0.5175900 -1.81857000 -0.7316110 -0.823505  0.1560310  0.192283  0.418723
## 2 -0.4767880  1.58266000 -1.3272400  1.407600 -0.6784180 -0.135068 -0.591682
## 3  0.0563922 -0.17899200  0.4434480  0.630557  1.7496100  0.201506 -0.740755
## 4 -0.4156750  0.63909300  0.8555740 -1.431060 -0.5154220 -0.145055  0.434376
## 5  0.0536224  0.00415705  0.8372700 -0.243549  0.7443450  1.271420 -0.486573
## 6  0.3216150 -1.03834000 -0.0593882 -1.546240  0.0601514 -0.141470  1.874040
##       PC630     PC631      PC632      PC633     PC634     PC635      PC636
## 1 -0.945399 -0.116795  0.3469450 -0.7861110 -0.533855  0.509936 -1.6642200
## 2  0.205106  0.423638 -1.1551400 -0.4015300  1.020260 -0.627157  1.8214800
## 3  0.670900  0.164132 -0.1241230  1.8740000  0.104715  0.359540 -0.8932950
## 4  1.444600  0.445946 -0.0578001  0.0259392  1.181570 -0.179463 -0.0379748
## 5 -1.395740 -0.927931 -0.6534370  0.9361040  0.608401  0.398321  1.0212500
## 6  0.359775 -1.074600 -0.2059990  0.2622780 -0.164927 -0.100484 -0.1302880
##       PC637      PC638      PC639      PC640      PC641     PC642     PC643
## 1  0.162724 -0.4605770  0.4752010  0.6770570 -0.4625220 -0.834200  0.201035
## 2 -0.415161  0.4964910 -0.4146870 -0.0406003  0.6603250  0.593328 -0.184156
## 3 -0.326878  0.6808590  0.0577737  0.0221834  0.5410200  0.128853 -0.604277
## 4 -0.315693 -1.0257500 -0.5162990  0.6430490  2.2318600  1.095330  1.038110
## 5 -0.495483 -0.0768325  0.2054040 -0.0461064 -0.2768540 -0.169853  0.132705
## 6  1.498620 -2.0069700  2.5834200 -0.0282036  0.0243375  0.482614  0.949408
##        PC644    PC645     PC646     PC647      PC648      PC649      PC650
## 1 -0.5574490 1.036380 -1.150420 -0.353489  0.4766350 -0.7206810 -0.5252320
## 2  0.5225750 0.361486  1.335850  0.248652 -0.0931085  0.8748700 -0.5641140
## 3  0.4590010 0.377079 -0.155535 -0.813350 -0.7363170  0.4244160  1.1097400
## 4  1.6384200 0.306756  1.243450 -0.767603  0.0657744  0.0890435  0.2621140
## 5 -0.0914471 0.523687  0.102309  0.285675 -0.6218090 -0.1905420 -1.0366500
## 6 -3.3399100 1.651420 -2.192890  2.853810 -0.7432670 -0.0134943 -0.0703362
##       PC651     PC652     PC653     PC654     PC655      PC656      PC657
## 1  1.181390  0.411984 -0.491513  0.636168 -1.108530 -0.7914510  0.5128880
## 2 -0.306752 -1.094940 -0.974501 -0.224273  0.374006 -0.1289310 -0.4801830
## 3  0.200795  0.768692  0.203815 -0.514048  1.624800  0.0332113  0.2230850
## 4 -1.070100  0.238257 -0.242319 -0.823498  0.255163  0.2897510  0.0515537
## 5  0.855812  0.103177 -0.416720 -0.929673  0.352428  0.3306270 -1.0749700
## 6 -0.611530  2.776650 -0.394573  4.397120  0.926366 -1.7102700  2.9783800
##        PC658     PC659      PC660      PC661      PC662      PC663      PC664
## 1 -0.2485930  0.479566  0.6480230 -0.6219510  0.2869750 -0.3355290  0.1855000
## 2 -0.8273070 -0.623966 -0.5404300 -0.6169230  0.3506670 -0.0202628 -1.6292600
## 3  0.1220640 -0.021907  0.8216040  0.1395790 -0.3960220  0.1827820  0.4200820
## 4  0.0235954 -0.322674 -0.0527559  0.1230140 -0.2317690 -0.3287740  0.6901430
## 5  0.5372470 -0.578310 -0.0338003  0.0413231  0.0962499  0.2256080  0.0386441
## 6 -1.1051900 -1.850610  2.0511700  5.6060400 -7.9374700 -2.0061800  3.2096900
##        PC665     PC666     PC667      PC668      PC669      PC670     PC671
## 1  -0.754257  0.554987  0.158071  0.7458300  -0.379930  0.1047520 -0.179097
## 2   0.228498 -0.399271 -0.591397  0.4634680   1.476680 -0.0455636 -0.303098
## 3   0.531042 -0.305498 -0.350900 -0.0910953   0.638506  0.0446885  0.178340
## 4  -0.612182  0.126106  0.187181  0.0336248  -0.660484 -0.4541910 -0.345722
## 5   0.163223  0.390714  0.108911  0.3355710   0.333600 -0.0537582  0.395603
## 6 -10.648800 -6.623200  9.017770 -3.8143900 -23.000500  2.8040000  5.838030
##        PC672      PC673     PC674     PC675      PC676      PC677      PC678
## 1 -0.3342810  0.3981560 -0.435260  0.175920 -0.0468280  0.1285820  0.0498232
## 2  0.3728800 -0.4325030  0.390417  0.350570 -0.0780383  0.3937650  0.6010360
## 3 -0.0501735 -0.9105390 -0.330144  0.204279 -0.7197590 -0.9475590  0.3314220
## 4 -0.4610860  0.0223074 -0.690654 -0.574589 -0.4404890  0.0831125 -0.2581000
## 5  0.3669710 -0.6234950  0.871031  0.295893  0.6582310  0.6858390 -0.0185350
## 6 -3.1218000 -0.7085280 -2.893680  0.841936  2.9148100  5.8779700 -2.9231400
##       PC679      PC680      PC681      PC682      PC683      PC684      PC685
## 1 -0.209140 -0.0670167  0.0682520  0.1681010  0.4406970  0.0738443 -0.2731980
## 2 -0.187945  0.0337131 -0.1581240 -0.1310550  0.0736109 -0.0511851 -0.0689223
## 3  0.174690  0.3413370 -0.4316240 -0.3709540 -0.0836069 -0.1589380  0.2315860
## 4 -0.053170  0.1103960 -0.0865292 -0.0130275 -0.1663030  0.0509096  0.0573638
## 5  0.471279 -0.4053920 -0.3836460  0.2063080 -0.2012220 -0.1072520 -0.1028470
## 6 -0.844235  0.2050230  0.1600970  0.8233040 -0.5068480 -0.5304620  0.6896650
##        PC686      PC687        PC688 Individual Pop_City Country Latitude
## 1  0.1659210 -0.2032410 -5.23915e-07        801   Durres Albania 41.29704
## 2 -0.2389500  0.0493433 -5.23915e-07        802   Durres Albania 41.29704
## 3  0.2278310  0.3125050 -5.23915e-07        803   Durres Albania 41.29704
## 4 -0.2272640 -0.0666839 -5.23915e-07        804   Durres Albania 41.29704
## 5  0.0771716 -0.0817589 -5.23915e-07        805   Durres Albania 41.29704
## 6  0.4703270 -0.0361765 -5.23915e-07        806   Durres Albania 41.29704
##   Longitude Continent Year          Region   Subregion order order2 orderold
## 1  19.50373    Europe 2018 Southern Europe East Europe    33     25       25
## 2  19.50373    Europe 2018 Southern Europe East Europe    33     25       25
## 3  19.50373    Europe 2018 Southern Europe East Europe    33     25       25
## 4  19.50373    Europe 2018 Southern Europe East Europe    33     25       25
## 5  19.50373    Europe 2018 Southern Europe East Europe    33     25       25
## 6  19.50373    Europe 2018 Southern Europe East Europe    33     25       25

5.1.2 Create PCA plot

#save the pca plot
ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "PCA_lea_euro_global_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

PC1 and PC3

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "PCA_lea_euro_global_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)

Colored by region

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_euro_global_region_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

PC1 and PC3

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_euro_global_region_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)

5.1.3 Run LEA for MAF 1% and r2<0.01

We will do 5 repetitions

# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
  genotype,
  K = 1:25,
  project = "new",
  repetitions = 5,
  percentage = 0.25,
  iterations = 500,
  CPU = 10,
  entropy = TRUE
)
project = load.snmfProject("euro_global/output/snps_sets/r2_0.01_b.snmfProject")

Cross entropy

# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","MAF_1","lea_cross_entropy_euro_global_r01_b.pdf"), width = 6, height = 4)

# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)

# Close the pdf file
dev.off()
## png 
##   2
plot(project, col = "blue", pch = 19, cex = 1.2)

Summary of project check with run is best

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9295938 0.9035762 0.8929160 0.8866176 0.8831455 0.8810623 0.8793035
## mean 0.9299463 0.9038992 0.8932721 0.8869377 0.8840167 0.8813716 0.8797496
## max  0.9303090 0.9042813 0.8935996 0.8872815 0.8859704 0.8818163 0.8802178
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8777762 0.8754397 0.8737881 0.8724198 0.8709529 0.8703098 0.8702140
## mean 0.8779980 0.8760114 0.8743412 0.8732523 0.8716021 0.8712855 0.8703966
## max  0.8784509 0.8765061 0.8748110 0.8741085 0.8724610 0.8718638 0.8706670
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8691706 0.8691407 0.8684490 0.8678589 0.8677646 0.8675384 0.8673978
## mean 0.8697189 0.8695057 0.8689370 0.8686954 0.8680425 0.8681742 0.8678156
## max  0.8701692 0.8700150 0.8695248 0.8697050 0.8682069 0.8685108 0.8686613
##         K = 22    K = 23    K = 24    K = 25
## min  0.8669903 0.8671792 0.8666401 0.8673425
## mean 0.8679212 0.8675363 0.8676546 0.8677506
## max  0.8688708 0.8679872 0.8685890 0.8681663
# get the cross-entropy of all runs for K = 20
ce = cross.entropy(project, K = 20)
ce #run 1 is best for k=20
##          K = 20
## run 1 0.8675384
## run 2 0.8685108
## run 3 0.8681354
## run 4 0.8683496
## run 5 0.8683367
summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9295938 0.9035762 0.8929160 0.8866176 0.8831455 0.8810623 0.8793035
## mean 0.9299463 0.9038992 0.8932721 0.8869377 0.8840167 0.8813716 0.8797496
## max  0.9303090 0.9042813 0.8935996 0.8872815 0.8859704 0.8818163 0.8802178
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8777762 0.8754397 0.8737881 0.8724198 0.8709529 0.8703098 0.8702140
## mean 0.8779980 0.8760114 0.8743412 0.8732523 0.8716021 0.8712855 0.8703966
## max  0.8784509 0.8765061 0.8748110 0.8741085 0.8724610 0.8718638 0.8706670
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8691706 0.8691407 0.8684490 0.8678589 0.8677646 0.8675384 0.8673978
## mean 0.8697189 0.8695057 0.8689370 0.8686954 0.8680425 0.8681742 0.8678156
## max  0.8701692 0.8700150 0.8695248 0.8697050 0.8682069 0.8685108 0.8686613
##         K = 22    K = 23    K = 24    K = 25
## min  0.8669903 0.8671792 0.8666401 0.8673425
## mean 0.8679212 0.8675363 0.8676546 0.8677506
## max  0.8688708 0.8679872 0.8685890 0.8681663
# get the cross-entropy of all runs for K = 12
ce = cross.entropy(project, K = 12)
ce #run 5 is best for k=12
##          K = 12
## run 1 0.8711113
## run 2 0.8716798
## run 3 0.8718053
## run 4 0.8724610
## run 5 0.8709529
summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9295938 0.9035762 0.8929160 0.8866176 0.8831455 0.8810623 0.8793035
## mean 0.9299463 0.9038992 0.8932721 0.8869377 0.8840167 0.8813716 0.8797496
## max  0.9303090 0.9042813 0.8935996 0.8872815 0.8859704 0.8818163 0.8802178
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8777762 0.8754397 0.8737881 0.8724198 0.8709529 0.8703098 0.8702140
## mean 0.8779980 0.8760114 0.8743412 0.8732523 0.8716021 0.8712855 0.8703966
## max  0.8784509 0.8765061 0.8748110 0.8741085 0.8724610 0.8718638 0.8706670
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8691706 0.8691407 0.8684490 0.8678589 0.8677646 0.8675384 0.8673978
## mean 0.8697189 0.8695057 0.8689370 0.8686954 0.8680425 0.8681742 0.8678156
## max  0.8701692 0.8700150 0.8695248 0.8697050 0.8682069 0.8685108 0.8686613
##         K = 22    K = 23    K = 24    K = 25
## min  0.8669903 0.8671792 0.8666401 0.8673425
## mean 0.8679212 0.8675363 0.8676546 0.8677506
## max  0.8688708 0.8679872 0.8685890 0.8681663
# get the cross-entropy of all runs for K = 15
ce = cross.entropy(project, K = 15)
ce #run 5 is best for k=15
##          K = 15
## run 1 0.8695671
## run 2 0.8698192
## run 3 0.8701692
## run 4 0.8698683
## run 5 0.8691706
summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9295938 0.9035762 0.8929160 0.8866176 0.8831455 0.8810623 0.8793035
## mean 0.9299463 0.9038992 0.8932721 0.8869377 0.8840167 0.8813716 0.8797496
## max  0.9303090 0.9042813 0.8935996 0.8872815 0.8859704 0.8818163 0.8802178
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8777762 0.8754397 0.8737881 0.8724198 0.8709529 0.8703098 0.8702140
## mean 0.8779980 0.8760114 0.8743412 0.8732523 0.8716021 0.8712855 0.8703966
## max  0.8784509 0.8765061 0.8748110 0.8741085 0.8724610 0.8718638 0.8706670
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8691706 0.8691407 0.8684490 0.8678589 0.8677646 0.8675384 0.8673978
## mean 0.8697189 0.8695057 0.8689370 0.8686954 0.8680425 0.8681742 0.8678156
## max  0.8701692 0.8700150 0.8695248 0.8697050 0.8682069 0.8685108 0.8686613
##         K = 22    K = 23    K = 24    K = 25
## min  0.8669903 0.8671792 0.8666401 0.8673425
## mean 0.8679212 0.8675363 0.8676546 0.8677506
## max  0.8688708 0.8679872 0.8685890 0.8681663
# get the cross-entropy of all runs for K = 6
ce = cross.entropy(project, K = 6)
ce #run 5 is best for k=6
##           K = 6
## run 1 0.8811239
## run 2 0.8814691
## run 3 0.8818163
## run 4 0.8813863
## run 5 0.8810623
summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9295938 0.9035762 0.8929160 0.8866176 0.8831455 0.8810623 0.8793035
## mean 0.9299463 0.9038992 0.8932721 0.8869377 0.8840167 0.8813716 0.8797496
## max  0.9303090 0.9042813 0.8935996 0.8872815 0.8859704 0.8818163 0.8802178
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8777762 0.8754397 0.8737881 0.8724198 0.8709529 0.8703098 0.8702140
## mean 0.8779980 0.8760114 0.8743412 0.8732523 0.8716021 0.8712855 0.8703966
## max  0.8784509 0.8765061 0.8748110 0.8741085 0.8724610 0.8718638 0.8706670
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8691706 0.8691407 0.8684490 0.8678589 0.8677646 0.8675384 0.8673978
## mean 0.8697189 0.8695057 0.8689370 0.8686954 0.8680425 0.8681742 0.8678156
## max  0.8701692 0.8700150 0.8695248 0.8697050 0.8682069 0.8685108 0.8686613
##         K = 22    K = 23    K = 24    K = 25
## min  0.8669903 0.8671792 0.8666401 0.8673425
## mean 0.8679212 0.8675363 0.8676546 0.8677506
## max  0.8688708 0.8679872 0.8685890 0.8681663
# get the cross-entropy of all runs for K = 22
ce = cross.entropy(project, K = 22)
ce #run 5 is best for k=22
##          K = 22
## run 1 0.8688708
## run 2 0.8675688
## run 3 0.8681413
## run 4 0.8680349
## run 5 0.8669903

5.2 Plots for LEA SNP Set 3 (r2<0.01, MAF>1%)

5.2.1 Plots for K=20

color_palette_20 <-
  c(
    "#F49AC2",
    "navy", 
    "yellow2",
    "purple",      
    "#B20CC9",
    "magenta",   
    "#008080", 
    "green",
    "#77DD77",        
    "#AE9393",
    "#B22222",
    "#AE8333",
    "#FF8C1A",
    "#1E90FF",
    "orangered",
    "green4",
    "purple4",
    "#75FAFF",
    "blue",
    "#FFFF99"
  )

5.2.1.1 Mean admixture by country for K=20

using ggplot

best = which.min(cross.entropy(project, K = 20))

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 20, run = best))

# Create a named vector to map countries to regions

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:20)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_20[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_20)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "LEA_admixture_by_country_euro_global_k20_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

5.2.1.2 Plot individual admixtures for k=20

Extract ancestry coefficients for k=20 change to correct matrix

leak20 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K20/run1/r2_0.01_b_r1.20.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak20)
## # A tibble: 6 × 20
##          X1        X2        X3       X4      X5      X6      X7      X8      X9
##       <dbl>     <dbl>     <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 0.0264    0.0207    0.0131     1.31e-2 1.90e-2 3.48e-2 2.08e-3 4.19e-3 1.34e-1
## 2 0.0000999 0.0657    0.0185     9.99e-5 9.99e-5 5.16e-2 9.99e-5 6.28e-2 1.70e-1
## 3 0.0000999 0.0000999 0.0000999  9.99e-5 9.99e-5 2.84e-3 9.99e-5 9.99e-5 9.99e-5
## 4 0.0000999 0.0323    0.0000999  8.44e-3 9.99e-5 1.29e-2 1.39e-2 5.77e-3 1.90e-2
## 5 0.0000999 0.00361   0.0146     9.99e-5 1.80e-3 4.09e-3 2.93e-2 9.99e-5 9.99e-5
## 6 0.0000998 0.0000998 0.0000998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 11 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## #   X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak20 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak20)

head(leak20)
##    ind pop          X1          X2          X3          X4          X5
## 1 1001 OKI 2.64096e-02 2.06646e-02 1.31437e-02 1.30614e-02 1.90113e-02
## 2 1002 OKI 9.99460e-05 6.56597e-02 1.84685e-02 9.99460e-05 9.99460e-05
## 3 1003 OKI 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05
## 4 1004 OKI 9.99190e-05 3.23457e-02 9.99190e-05 8.43696e-03 9.99190e-05
## 5 1005 OKI 9.99190e-05 3.61125e-03 1.46469e-02 9.99190e-05 1.79802e-03
## 6 1006 OKI 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05
##            X6          X7          X8          X9         X10         X11
## 1 3.48243e-02 2.08108e-03 4.18512e-03 1.34475e-01 9.99820e-05 2.13904e-02
## 2 5.15924e-02 9.99460e-05 6.28295e-02 1.70231e-01 1.02422e-01 2.38672e-02
## 3 2.84154e-03 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05 7.08506e-03
## 4 1.28981e-02 1.38764e-02 5.76749e-03 1.89892e-02 9.99190e-05 9.99190e-05
## 5 4.08688e-03 2.93200e-02 9.99190e-05 9.99190e-05 9.99190e-05 9.99190e-05
## 6 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05
##           X12         X13         X14         X15         X16      X17
## 1 9.99820e-05 5.83628e-02 2.90473e-03 1.98357e-02 6.83355e-03 0.459098
## 2 9.99460e-05 7.69412e-03 2.59487e-02 4.60633e-02 9.99460e-05 0.335353
## 3 9.98740e-05 9.98740e-05 2.42597e-04 9.98740e-05 9.98740e-05 0.981889
## 4 2.32811e-02 1.26668e-02 9.99190e-05 3.02842e-02 9.99190e-05 0.834116
## 5 2.96501e-03 9.99190e-05 9.99190e-05 2.97077e-02 9.99190e-05 0.872147
## 6 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 0.998103
##           X18         X19         X20
## 1 8.79653e-02 5.75547e-02 1.79983e-02
## 2 2.66581e-02 6.06222e-02 1.99036e-03
## 3 5.65165e-03 9.98740e-05 8.91697e-04
## 4 9.99190e-05 6.43857e-03 9.99190e-05
## 5 1.46587e-02 1.05423e-02 1.56174e-02
## 6 9.98288e-05 9.98288e-05 9.98288e-05

Rename the columns

# Rename the columns starting from the third one
leak20 <- leak20 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak20)
##    ind pop          v1          v2          v3          v4          v5
## 1 1001 OKI 2.64096e-02 2.06646e-02 1.31437e-02 1.30614e-02 1.90113e-02
## 2 1002 OKI 9.99460e-05 6.56597e-02 1.84685e-02 9.99460e-05 9.99460e-05
## 3 1003 OKI 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05
## 4 1004 OKI 9.99190e-05 3.23457e-02 9.99190e-05 8.43696e-03 9.99190e-05
## 5 1005 OKI 9.99190e-05 3.61125e-03 1.46469e-02 9.99190e-05 1.79802e-03
## 6 1006 OKI 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05
##            v6          v7          v8          v9         v10         v11
## 1 3.48243e-02 2.08108e-03 4.18512e-03 1.34475e-01 9.99820e-05 2.13904e-02
## 2 5.15924e-02 9.99460e-05 6.28295e-02 1.70231e-01 1.02422e-01 2.38672e-02
## 3 2.84154e-03 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05 7.08506e-03
## 4 1.28981e-02 1.38764e-02 5.76749e-03 1.89892e-02 9.99190e-05 9.99190e-05
## 5 4.08688e-03 2.93200e-02 9.99190e-05 9.99190e-05 9.99190e-05 9.99190e-05
## 6 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05
##           v12         v13         v14         v15         v16      v17
## 1 9.99820e-05 5.83628e-02 2.90473e-03 1.98357e-02 6.83355e-03 0.459098
## 2 9.99460e-05 7.69412e-03 2.59487e-02 4.60633e-02 9.99460e-05 0.335353
## 3 9.98740e-05 9.98740e-05 2.42597e-04 9.98740e-05 9.98740e-05 0.981889
## 4 2.32811e-02 1.26668e-02 9.99190e-05 3.02842e-02 9.99190e-05 0.834116
## 5 2.96501e-03 9.99190e-05 9.99190e-05 2.97077e-02 9.99190e-05 0.872147
## 6 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 0.998103
##           v18         v19         v20
## 1 8.79653e-02 5.75547e-02 1.79983e-02
## 2 2.66581e-02 6.06222e-02 1.99036e-03
## 3 5.65165e-03 9.98740e-05 8.91697e-04
## 4 9.99190e-05 6.43857e-03 9.99190e-05
## 5 1.46587e-02 1.05423e-02 1.56174e-02
## 6 9.98288e-05 9.98288e-05 9.98288e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
color_palette_20 <-
  c(
    "#008080",   
    "magenta",   
    "#FF8C1A",
    "green4",
    "#B22222",    
    "#F49AC2",
    "green",
    "blue",
    "#AE9393",
    "yellow2",   
    "purple4",
    "#75FAFF",
    "#77DD77",  
    "#1E90FF",
    "#B20CC9",
    "#FFFF99",
    "goldenrod",
    "navy", 
    "orangered",
    "purple"
  )
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak20 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:20)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_20[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=20.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_20) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "lea_k=20_euro_global_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

5.2.2 Plots for K=12

5.2.2.1 Mean admixtures by country for K=12

color_palette_12 <-
  c(
    "#FF8C1A",
    "yellow2",
    "#1E90FF",
    "#B22222",
    "#F49AC2",
    "blue",
    "#75FAFF",
    "magenta",
    "green4",
    "green",
    "purple",
    "orangered"
  )

Mean admixture by country using ggplot

best = which.min(cross.entropy(project, K = 12)) #5

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 12, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:12)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_12[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=12.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_12)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "LEA_admixture_by_country_euro_global_k12_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

5.2.2.2 Plot individual admixtures for K=12

Extract ancestry coefficients for k=12

change to correct matrix

leak12 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K12/run5/r2_0.01_b_r5.12.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak12)
## # A tibble: 6 × 12
##         X1      X2      X3      X4    X5      X6      X7      X8      X9     X10
##      <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1  4.23e-2 9.77e-2 1.00e-4 4.21e-2 0.590 6.21e-2 2.51e-2 2.33e-2 2.21e-2 1.06e-2
## 2  3.17e-2 9.75e-2 1.31e-2 3.03e-2 0.479 8.86e-2 9.17e-2 1.00e-4 2.79e-2 5.20e-2
## 3  5.68e-3 1.17e-3 9.99e-5 9.99e-5 0.986 9.99e-5 9.99e-5 9.99e-5 5.94e-3 9.99e-5
## 4  1.80e-3 7.41e-3 3.62e-2 1.00e-4 0.827 4.48e-2 3.40e-2 1.00e-4 1.00e-4 4.86e-2
## 5  9.99e-5 9.99e-5 2.49e-2 1.37e-2 0.878 2.00e-2 2.47e-2 9.99e-5 9.99e-5 3.83e-2
## 6  9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## # ℹ 2 more variables: X11 <dbl>, X12 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak12 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak12)

head(leak12)
##    ind pop          X1          X2          X3          X4       X5          X6
## 1 1001 OKI 4.23137e-02 9.77107e-02 9.99910e-05 4.20743e-02 0.589946 6.21370e-02
## 2 1002 OKI 3.16848e-02 9.74647e-02 1.30881e-02 3.03256e-02 0.478817 8.86259e-02
## 3 1003 OKI 5.67710e-03 1.16962e-03 9.99280e-05 9.99280e-05 0.986417 9.99280e-05
## 4 1004 OKI 1.79768e-03 7.41490e-03 3.62091e-02 9.99550e-05 0.826658 4.47996e-02
## 5 1005 OKI 9.99460e-05 9.99460e-05 2.48609e-02 1.37150e-02 0.877755 2.00195e-02
## 6 1006 OKI 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05 0.998901 9.99009e-05
##            X7          X8          X9         X10         X11         X12
## 1 2.50839e-02 2.32812e-02 2.20865e-02 1.05599e-02 2.55935e-02 5.91129e-02
## 2 9.16646e-02 9.99910e-05 2.78675e-02 5.20009e-02 3.17949e-02 5.65665e-02
## 3 9.99280e-05 9.99280e-05 5.93690e-03 9.99280e-05 9.99280e-05 9.99280e-05
## 4 3.39939e-02 9.99550e-05 9.99550e-05 4.86267e-02 9.99550e-05 9.99550e-05
## 5 2.47145e-02 9.99460e-05 9.99460e-05 3.83352e-02 9.99460e-05 9.99460e-05
## 6 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05

Rename the columns

# Rename the columns starting from the third one
leak12 <- leak12 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak12)
##    ind pop          v1          v2          v3          v4       v5          v6
## 1 1001 OKI 4.23137e-02 9.77107e-02 9.99910e-05 4.20743e-02 0.589946 6.21370e-02
## 2 1002 OKI 3.16848e-02 9.74647e-02 1.30881e-02 3.03256e-02 0.478817 8.86259e-02
## 3 1003 OKI 5.67710e-03 1.16962e-03 9.99280e-05 9.99280e-05 0.986417 9.99280e-05
## 4 1004 OKI 1.79768e-03 7.41490e-03 3.62091e-02 9.99550e-05 0.826658 4.47996e-02
## 5 1005 OKI 9.99460e-05 9.99460e-05 2.48609e-02 1.37150e-02 0.877755 2.00195e-02
## 6 1006 OKI 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05 0.998901 9.99009e-05
##            v7          v8          v9         v10         v11         v12
## 1 2.50839e-02 2.32812e-02 2.20865e-02 1.05599e-02 2.55935e-02 5.91129e-02
## 2 9.16646e-02 9.99910e-05 2.78675e-02 5.20009e-02 3.17949e-02 5.65665e-02
## 3 9.99280e-05 9.99280e-05 5.93690e-03 9.99280e-05 9.99280e-05 9.99280e-05
## 4 3.39939e-02 9.99550e-05 9.99550e-05 4.86267e-02 9.99550e-05 9.99550e-05
## 5 2.47145e-02 9.99460e-05 9.99460e-05 3.83352e-02 9.99460e-05 9.99460e-05
## 6 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak12 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:12)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_12[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=12.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_12) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "lea_k=12_euro_global_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

5.2.3 Plots for K=15

color_palette_15 <-
  c(
    "#FF8C1A",
    "yellow2",
    "#1E90FF",
    "#B22222",
    "#AE8333",
    "#B20CC9",
    "#F49AC2",
    "blue",
    "purple4",
    "#75FAFF",
    "magenta",
    "green4",
    "green",
    "purple",
    "orangered"
  )

5.2.3.1 Mean admixture by country for K=15

using ggplot

best = which.min(cross.entropy(project, K = 15)) #5

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 15, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:15)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_15[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=15.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_15)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "LEA_admixture_by_country_euro_global_k15_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

5.2.3.2 Plot individual admixtures for k=15

Extract ancestry coefficients for k=15

change to correct matrix

leak15 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K15/run5/r2_0.01_b_r5.15.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak15)
## # A tibble: 6 × 15
##         X1      X2      X3      X4      X5      X6    X7      X8      X9     X10
##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>
## 1  1.11e-1 2.61e-2 9.82e-2 3.86e-2 8.51e-3 1.07e-2 0.557 3.97e-2 9.31e-3 1.00e-4
## 2  1.06e-1 1.35e-2 2.39e-2 4.93e-2 1.00e-4 5.54e-2 0.466 1.62e-2 9.45e-2 4.65e-2
## 3  9.99e-5 9.99e-5 9.99e-5 9.99e-5 6.81e-3 9.99e-5 0.969 9.99e-5 1.16e-2 9.99e-5
## 4  1.72e-2 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.842 9.99e-5 9.99e-5 2.37e-2
## 5  9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 1.04e-2 0.888 2.61e-2 9.99e-5 7.55e-3
## 6  9.99e-5 9.99e-5 9.99e-5 9.99e-5 4.27e-3 9.99e-5 0.994 9.99e-5 9.99e-5 9.99e-5
## # ℹ 5 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak15 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak15)

head(leak15)
##    ind pop          X1          X2          X3          X4          X5
## 1 1001 OKI 0.111112000 0.026075100 0.098216100 0.038634700 0.008512500
## 2 1002 OKI 0.105582000 0.013530700 0.023863800 0.049315600 0.000099982
## 3 1003 OKI 0.000099910 0.000099910 0.000099910 0.000099910 0.006810880
## 4 1004 OKI 0.017170000 0.000099919 0.000099919 0.000099919 0.000099919
## 5 1005 OKI 0.000099928 0.000099928 0.000099928 0.000099928 0.000099928
## 6 1006 OKI 0.000099883 0.000099883 0.000099883 0.000099883 0.004273470
##            X6       X7          X8          X9         X10         X11
## 1 0.010740000 0.556509 0.039697900 0.009314820 0.000099982 0.001370110
## 2 0.055394000 0.465974 0.016217400 0.094522000 0.046479000 0.006631220
## 3 0.000099910 0.969329 0.000099910 0.011603400 0.000099910 0.000099910
## 4 0.000099919 0.842212 0.000099919 0.000099919 0.023659600 0.030946600
## 5 0.010440600 0.887931 0.026139900 0.000099928 0.007549700 0.010461000
## 6 0.000099883 0.994428 0.000099883 0.000099883 0.000099883 0.000099883
##           X12         X13         X14         X15
## 1 0.042326300 0.034872400 0.000099982 0.022419200
## 2 0.012075400 0.016319500 0.000099982 0.093895300
## 3 0.000909957 0.010347400 0.000099910 0.000099910
## 4 0.000099919 0.000099919 0.077234200 0.007877870
## 5 0.000099928 0.006599350 0.050079000 0.000099928
## 6 0.000099883 0.000099883 0.000099883 0.000099883

Rename the columns

# Rename the columns starting from the third one
leak15 <- leak15 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak15)
##    ind pop          v1          v2          v3          v4          v5
## 1 1001 OKI 0.111112000 0.026075100 0.098216100 0.038634700 0.008512500
## 2 1002 OKI 0.105582000 0.013530700 0.023863800 0.049315600 0.000099982
## 3 1003 OKI 0.000099910 0.000099910 0.000099910 0.000099910 0.006810880
## 4 1004 OKI 0.017170000 0.000099919 0.000099919 0.000099919 0.000099919
## 5 1005 OKI 0.000099928 0.000099928 0.000099928 0.000099928 0.000099928
## 6 1006 OKI 0.000099883 0.000099883 0.000099883 0.000099883 0.004273470
##            v6       v7          v8          v9         v10         v11
## 1 0.010740000 0.556509 0.039697900 0.009314820 0.000099982 0.001370110
## 2 0.055394000 0.465974 0.016217400 0.094522000 0.046479000 0.006631220
## 3 0.000099910 0.969329 0.000099910 0.011603400 0.000099910 0.000099910
## 4 0.000099919 0.842212 0.000099919 0.000099919 0.023659600 0.030946600
## 5 0.010440600 0.887931 0.026139900 0.000099928 0.007549700 0.010461000
## 6 0.000099883 0.994428 0.000099883 0.000099883 0.000099883 0.000099883
##           v12         v13         v14         v15
## 1 0.042326300 0.034872400 0.000099982 0.022419200
## 2 0.012075400 0.016319500 0.000099982 0.093895300
## 3 0.000909957 0.010347400 0.000099910 0.000099910
## 4 0.000099919 0.000099919 0.077234200 0.007877870
## 5 0.000099928 0.006599350 0.050079000 0.000099928
## 6 0.000099883 0.000099883 0.000099883 0.000099883

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak15 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:15)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_15[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=15.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_15) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "lea_k=15_euro_global_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

5.2.4 Plots for K=6

color_palette_6 <-
    c(
    "#FF8C1A",
    "red",
    "#1E90FF",
    "#77DD37",
    "#FFFF19",
    "purple3"
)

5.2.4.1 Mean admixture by country for K=6

using ggplot

best = which.min(cross.entropy(project, K = 6)) #5

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 6, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:6)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_6[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=6.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_6)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "LEA_admixture_by_country_euro_global_k6_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

5.2.4.2 Plot individual admixtures for k=6

Extract ancestry coefficients for k=6

change to correct matrix

leak6 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K6/run5/r2_0.01_b_r5.6.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak6)
## # A tibble: 6 × 6
##      X1    X2     X3    X4     X5       X6
##   <dbl> <dbl>  <dbl> <dbl>  <dbl>    <dbl>
## 1 0.174 0.172 0.150  0.350 0.131  0.0241  
## 2 0.205 0.197 0.170  0.293 0.121  0.0131  
## 3 0.160 0.156 0.123  0.400 0.117  0.0444  
## 4 0.199 0.204 0.119  0.389 0.0885 0.000100
## 5 0.180 0.218 0.113  0.404 0.0847 0.000100
## 6 0.203 0.187 0.0976 0.404 0.108  0.000100

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak6 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak6)

head(leak6)
##    ind pop       X1       X2        X3       X4        X5          X6
## 1 1001 OKI 0.173708 0.171848 0.1501010 0.349694 0.1305770 0.024071800
## 2 1002 OKI 0.205148 0.197109 0.1701940 0.292993 0.1214940 0.013061600
## 3 1003 OKI 0.159720 0.156319 0.1226130 0.399926 0.1169840 0.044438900
## 4 1004 OKI 0.198631 0.204137 0.1192340 0.389391 0.0885066 0.000099991
## 5 1005 OKI 0.179729 0.218195 0.1134380 0.403849 0.0846894 0.000099991
## 6 1006 OKI 0.202770 0.187071 0.0975617 0.404205 0.1082920 0.000099991

Rename the columns

# Rename the columns starting from the third one
leak6 <- leak6 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak6)
##    ind pop       v1       v2        v3       v4        v5          v6
## 1 1001 OKI 0.173708 0.171848 0.1501010 0.349694 0.1305770 0.024071800
## 2 1002 OKI 0.205148 0.197109 0.1701940 0.292993 0.1214940 0.013061600
## 3 1003 OKI 0.159720 0.156319 0.1226130 0.399926 0.1169840 0.044438900
## 4 1004 OKI 0.198631 0.204137 0.1192340 0.389391 0.0885066 0.000099991
## 5 1005 OKI 0.179729 0.218195 0.1134380 0.403849 0.0846894 0.000099991
## 6 1006 OKI 0.202770 0.187071 0.0975617 0.404205 0.1082920 0.000099991

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak6 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:6)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_6[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=6.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_6) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "lea_k=6_euro_global_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

5.2.5 Plots for k=22

color_palette_22 <-
  c(
    "#FF8C1A",
    "yellow2",
    "#77DD77",
    "chocolate4",
    "#B22222",
    "purple",
    "#B20CC9",
    "#F49AC2",
    "blue",
    "#1E90FF",
    "purple4",
    "#FFFF99",
    "#75FAFF",
    "#AE9393",
    "magenta",
    "green4",
    "navy", 
    "green",
    "#008080",
    "goldenrod3",
    "coral",
    "orangered2"
    )

5.2.5.1 Mean admixture by country for K=22

using ggplot

best = which.min(cross.entropy(project, K = 22)) #5

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 22, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:22)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_22[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_22)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "LEA_admixture_by_country_euro_global_k22_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

Using ggplot2 for individual admixtures

5.2.5.2 Plot individual admixtures for K=22

Extract ancestry coefficients for k=22

change to correct matrix

leak22 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K22/run5/r2_0.01_b_r5.22.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak22)
## # A tibble: 6 × 22
##         X1      X2      X3      X4    X5      X6      X7      X8      X9     X10
##      <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1  1.13e-1 9.99e-5 9.99e-5 9.99e-5 0.494 5.05e-2 9.99e-5 1.79e-3 3.43e-2 5.14e-2
## 2  4.86e-2 1.00e-4 1.03e-2 2.78e-3 0.319 1.03e-3 3.56e-2 8.74e-3 6.26e-2 6.63e-2
## 3  9.98e-5 9.98e-5 9.98e-5 9.98e-5 0.980 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## 4  9.99e-5 9.99e-5 5.88e-3 3.40e-2 0.817 9.99e-5 1.80e-2 9.99e-5 5.80e-2 2.02e-2
## 5  9.99e-5 9.99e-5 9.99e-5 3.84e-3 0.844 5.71e-3 1.21e-2 9.99e-5 6.06e-2 6.65e-3
## 6  9.98e-5 9.98e-5 9.98e-5 9.98e-5 0.998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 12 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## #   X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>, X21 <dbl>, X22 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak22 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak22)

head(leak22)
##    ind pop          X1          X2          X3          X4       X5          X6
## 1 1001 OKI 1.12653e-01 9.99280e-05 9.99280e-05 9.99280e-05 0.493779 5.05374e-02
## 2 1002 OKI 4.86145e-02 9.99550e-05 1.02890e-02 2.77542e-03 0.319323 1.02586e-03
## 3 1003 OKI 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 0.979697 9.98380e-05
## 4 1004 OKI 9.98920e-05 9.98920e-05 5.88100e-03 3.40031e-02 0.817342 9.98920e-05
## 5 1005 OKI 9.98830e-05 9.98830e-05 9.98830e-05 3.84252e-03 0.844145 5.71364e-03
## 6 1006 OKI 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 0.997904 9.98108e-05
##            X7          X8          X9         X10         X11         X12
## 1 9.99280e-05 1.79134e-03 3.42946e-02 5.13805e-02 6.00100e-02 2.66775e-02
## 2 3.56242e-02 8.73696e-03 6.26307e-02 6.63284e-02 1.72237e-02 3.31449e-02
## 3 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05
## 4 1.80011e-02 9.98920e-05 5.80196e-02 2.01962e-02 2.09973e-02 9.98920e-05
## 5 1.21276e-02 9.98830e-05 6.05534e-02 6.65074e-03 2.46074e-02 9.98830e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
##           X13         X14         X15         X16         X17         X18
## 1 9.99280e-05 9.99280e-05 1.24226e-02 5.42137e-02 2.79981e-02 9.00488e-03
## 2 9.99550e-05 9.26243e-02 2.57066e-02 1.43312e-01 7.71757e-02 9.99550e-05
## 3 9.98380e-05 9.98380e-05 9.98380e-05 4.15423e-03 9.98380e-05 4.38346e-03
## 4 9.98920e-05 1.39880e-02 9.98920e-05 8.58207e-03 1.79089e-03 9.98920e-05
## 5 9.98830e-05 9.98830e-05 9.98830e-05 2.81391e-02 9.98830e-05 9.98830e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
##           X19         X20         X21         X22
## 1 3.64674e-02 2.79699e-02 9.99280e-05 9.99280e-05
## 2 3.60504e-02 1.89146e-02 9.99550e-05 9.99550e-05
## 3 9.98380e-05 9.98380e-05 9.96870e-03 9.98380e-05
## 4 9.98920e-05 9.98920e-05 9.98920e-05 9.98920e-05
## 5 9.98830e-05 1.29216e-02 9.98830e-05 9.98830e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05

Rename the columns

# Rename the columns starting from the third one
leak22 <- leak22 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak22)
##    ind pop          v1          v2          v3          v4       v5          v6
## 1 1001 OKI 1.12653e-01 9.99280e-05 9.99280e-05 9.99280e-05 0.493779 5.05374e-02
## 2 1002 OKI 4.86145e-02 9.99550e-05 1.02890e-02 2.77542e-03 0.319323 1.02586e-03
## 3 1003 OKI 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 0.979697 9.98380e-05
## 4 1004 OKI 9.98920e-05 9.98920e-05 5.88100e-03 3.40031e-02 0.817342 9.98920e-05
## 5 1005 OKI 9.98830e-05 9.98830e-05 9.98830e-05 3.84252e-03 0.844145 5.71364e-03
## 6 1006 OKI 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 0.997904 9.98108e-05
##            v7          v8          v9         v10         v11         v12
## 1 9.99280e-05 1.79134e-03 3.42946e-02 5.13805e-02 6.00100e-02 2.66775e-02
## 2 3.56242e-02 8.73696e-03 6.26307e-02 6.63284e-02 1.72237e-02 3.31449e-02
## 3 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05
## 4 1.80011e-02 9.98920e-05 5.80196e-02 2.01962e-02 2.09973e-02 9.98920e-05
## 5 1.21276e-02 9.98830e-05 6.05534e-02 6.65074e-03 2.46074e-02 9.98830e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
##           v13         v14         v15         v16         v17         v18
## 1 9.99280e-05 9.99280e-05 1.24226e-02 5.42137e-02 2.79981e-02 9.00488e-03
## 2 9.99550e-05 9.26243e-02 2.57066e-02 1.43312e-01 7.71757e-02 9.99550e-05
## 3 9.98380e-05 9.98380e-05 9.98380e-05 4.15423e-03 9.98380e-05 4.38346e-03
## 4 9.98920e-05 1.39880e-02 9.98920e-05 8.58207e-03 1.79089e-03 9.98920e-05
## 5 9.98830e-05 9.98830e-05 9.98830e-05 2.81391e-02 9.98830e-05 9.98830e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
##           v19         v20         v21         v22
## 1 3.64674e-02 2.79699e-02 9.99280e-05 9.99280e-05
## 2 3.60504e-02 1.89146e-02 9.99550e-05 9.99550e-05
## 3 9.98380e-05 9.98380e-05 9.96870e-03 9.98380e-05
## 4 9.98920e-05 9.98920e-05 9.98920e-05 9.98920e-05
## 5 9.98830e-05 1.29216e-02 9.98830e-05 9.98830e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak22 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:22)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_22[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=22.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_22) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "lea_k=22_euro_global_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

5.2.6 Plots for K=23

ce = cross.entropy(project, K = 23)
ce 
##          K = 23
## run 1 0.8674120
## run 2 0.8675108
## run 3 0.8679872
## run 4 0.8671792
## run 5 0.8675925
color_palette_23 <-
  c(
    "purple4",
    "purple", 
    "orangered",
    "#FF8C1A",
    "#F49AC2",
    "magenta",
    "#AE9393",
    "#FFFF99",
    "orchid1",
    "#1E90FF",
    "chocolate4",
    "#B20CC9",
    "goldenrod",
    "#77DD77",
    "blue",
    "navy", 
    "green",
    "green4", 
    "#B22222",
    "#008080", 
    "coral",
    "yellow2",
    "#75FAFF"
      )

5.2.6.1 Mean admixture by country for K=23

using ggplot

best = which.min(cross.entropy(project, K = 23)) 

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 23, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:23)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_23[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_23)

5.2.6.2 Plot individual admixtures for K=23

Extract ancestry coefficients for k=23

change to correct matrix

leak23 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K23/run4/r2_0.01_b_r4.23.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak23)
## # A tibble: 6 × 23
##          X1        X2        X3       X4      X5      X6      X7      X8      X9
##       <dbl>     <dbl>     <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 0.0000999 0.172     0.0000999  3.30e-2 2.70e-2 9.99e-5 9.99e-5 4.42e-2 2.55e-2
## 2 0.0000999 0.175     0.0000999  1.81e-2 5.98e-2 3.26e-2 9.99e-5 9.96e-2 2.33e-2
## 3 0.0000998 0.0000998 0.0000998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 5.34e-3
## 4 0.0000998 0.998     0.0000998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## 5 0.0000998 0.998     0.0000998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## 6 0.0000998 0.0000998 0.0000998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 14 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## #   X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>,
## #   X21 <dbl>, X22 <dbl>, X23 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak23 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak23)

head(leak23)
##    ind pop          X1          X2          X3          X4          X5
## 1 1001 OKI 9.99190e-05 1.72014e-01 9.99190e-05 3.29890e-02 2.69674e-02
## 2 1002 OKI 9.99460e-05 1.75082e-01 9.99460e-05 1.81437e-02 5.98223e-02
## 3 1003 OKI 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05
## 4 1004 OKI 9.98021e-05 9.97804e-01 9.98021e-05 9.98021e-05 9.98021e-05
## 5 1005 OKI 9.98018e-05 9.97804e-01 9.98018e-05 9.98018e-05 9.98018e-05
## 6 1006 OKI 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05
##            X6          X7          X8          X9         X10         X11
## 1 9.99190e-05 9.99190e-05 4.42102e-02 2.55167e-02 9.99190e-05 9.99190e-05
## 2 3.26238e-02 9.99460e-05 9.96464e-02 2.32976e-02 1.37268e-02 2.23341e-02
## 3 9.98201e-05 9.98201e-05 9.98201e-05 5.34090e-03 9.98201e-05 9.98201e-05
## 4 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 5 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05
## 6 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05
##           X12         X13         X14         X15         X16         X17
## 1 3.14792e-02 6.89065e-02 3.26504e-02 9.99190e-05 9.99190e-05 1.17634e-01
## 2 2.32446e-03 7.27989e-03 7.33037e-02 2.86953e-02 9.99460e-05 1.57901e-01
## 3 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 1.08933e-02
## 4 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 5 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05
## 6 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05
##           X18         X19         X20         X21         X22         X23
## 1 2.25024e-02 9.99190e-05 1.01914e-01 2.37851e-01 5.57836e-02 2.86824e-02
## 2 3.26230e-02 9.99460e-05 7.49364e-02 1.37089e-01 4.05708e-02 9.99460e-05
## 3 9.98201e-05 9.98201e-05 9.98201e-05 9.81769e-01 9.98201e-05 9.98201e-05
## 4 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 5 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05
## 6 9.98016e-05 9.98016e-05 9.98016e-05 9.97804e-01 9.98016e-05 9.98016e-05

Rename the columns

# Rename the columns starting from the third one
leak23 <- leak23 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak23)
##    ind pop          v1          v2          v3          v4          v5
## 1 1001 OKI 9.99190e-05 1.72014e-01 9.99190e-05 3.29890e-02 2.69674e-02
## 2 1002 OKI 9.99460e-05 1.75082e-01 9.99460e-05 1.81437e-02 5.98223e-02
## 3 1003 OKI 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05
## 4 1004 OKI 9.98021e-05 9.97804e-01 9.98021e-05 9.98021e-05 9.98021e-05
## 5 1005 OKI 9.98018e-05 9.97804e-01 9.98018e-05 9.98018e-05 9.98018e-05
## 6 1006 OKI 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05
##            v6          v7          v8          v9         v10         v11
## 1 9.99190e-05 9.99190e-05 4.42102e-02 2.55167e-02 9.99190e-05 9.99190e-05
## 2 3.26238e-02 9.99460e-05 9.96464e-02 2.32976e-02 1.37268e-02 2.23341e-02
## 3 9.98201e-05 9.98201e-05 9.98201e-05 5.34090e-03 9.98201e-05 9.98201e-05
## 4 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 5 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05
## 6 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05
##           v12         v13         v14         v15         v16         v17
## 1 3.14792e-02 6.89065e-02 3.26504e-02 9.99190e-05 9.99190e-05 1.17634e-01
## 2 2.32446e-03 7.27989e-03 7.33037e-02 2.86953e-02 9.99460e-05 1.57901e-01
## 3 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 1.08933e-02
## 4 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 5 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05
## 6 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05
##           v18         v19         v20         v21         v22         v23
## 1 2.25024e-02 9.99190e-05 1.01914e-01 2.37851e-01 5.57836e-02 2.86824e-02
## 2 3.26230e-02 9.99460e-05 7.49364e-02 1.37089e-01 4.05708e-02 9.99460e-05
## 3 9.98201e-05 9.98201e-05 9.98201e-05 9.81769e-01 9.98201e-05 9.98201e-05
## 4 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 5 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05
## 6 9.98016e-05 9.98016e-05 9.98016e-05 9.97804e-01 9.98016e-05 9.98016e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak23 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:23)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_23[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=23.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_23) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "lea_k=23_euro_global_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

5.2.7 Plots for K=21

ce = cross.entropy(project, K = 21)
ce 
##          K = 21
## run 1 0.8678011
## run 2 0.8674450
## run 3 0.8686613
## run 4 0.8677728
## run 5 0.8673978
color_palette_21 <-
  c(
    "#FF8C1A",
    "yellow2",
    "#77DD77",
    "chocolate4",
    "#B22222",
    "purple",
    "#B20CC9",
    "#F49AC2",
    "blue",
    "#1E90FF",
    "purple4",
    "#FFFF99",
    "#75FAFF",
    "#AE9393",
    "magenta",
    "green4",
    "navy", 
    "green",
    "#008080",
    "goldenrod3",
    "orangered2"
    )

5.2.7.1 Mean admixture by country for K=21

using ggplot

best = which.min(cross.entropy(project, K = 21)) 

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 21, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:21)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_21[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_21)

5.2.7.2 Plot individual admixtures for K=21

Extract ancestry coefficients for k=21

change to correct matrix

leak21 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K21/run5/r2_0.01_b_r5.21.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak21)
## # A tibble: 6 × 21
##          X1        X2        X3       X4      X5      X6      X7      X8      X9
##       <dbl>     <dbl>     <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 0.0275    0.0000999 0.0000999  4.04e-3 9.99e-5 1.79e-2 1.46e-3 3.42e-2 8.64e-2
## 2 0.0148    0.0115    0.0454     1.00e-4 1.00e-4 3.47e-4 5.23e-3 1.04e-2 2.87e-2
## 3 0.0000998 0.0000998 0.0000998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## 4 0.0000999 0.0000999 0.0000999  9.99e-5 5.75e-2 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 5 0.00210   0.0000999 0.0000999  9.99e-5 3.13e-2 9.99e-5 9.99e-5 5.84e-3 9.99e-5
## 6 0.0000998 0.0000998 0.0000998  9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 12 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## #   X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>, X21 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak21 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak21)

head(leak21)
##    ind pop          X1          X2          X3          X4          X5
## 1 1001 OKI 2.75462e-02 9.99460e-05 9.99460e-05 4.04442e-03 9.99460e-05
## 2 1002 OKI 1.48332e-02 1.14721e-02 4.54085e-02 9.99640e-05 9.99640e-05
## 3 1003 OKI 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05
## 4 1004 OKI 9.98941e-05 9.98941e-05 9.98941e-05 9.98941e-05 5.74840e-02
## 5 1005 OKI 2.09787e-03 9.98830e-05 9.98830e-05 9.98830e-05 3.12814e-02
## 6 1006 OKI 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
##            X6          X7          X8          X9         X10         X11
## 1 1.79090e-02 1.46433e-03 3.41894e-02 8.64120e-02 2.89535e-02 7.82764e-02
## 2 3.47153e-04 5.23083e-03 1.04371e-02 2.87482e-02 9.99640e-05 1.11851e-01
## 3 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05
## 4 9.98941e-05 9.98941e-05 9.98941e-05 9.98941e-05 1.10122e-03 7.62143e-04
## 5 9.98830e-05 9.98830e-05 5.84309e-03 9.98830e-05 9.98830e-05 1.28451e-02
## 6 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
##           X12      X13         X14         X15         X16         X17
## 1 9.99460e-05 0.478998 2.65349e-02 4.93407e-02 1.85118e-02 9.99460e-05
## 2 5.74222e-02 0.335978 1.95733e-02 6.52214e-02 6.26335e-02 2.71121e-04
## 3 9.98381e-05 0.975063 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05
## 4 4.50163e-03 0.860375 9.98941e-05 9.98941e-05 9.98941e-05 8.72065e-03
## 5 9.60980e-03 0.890601 9.98830e-05 9.98830e-05 6.48132e-03 9.98830e-05
## 6 9.98199e-05 0.998004 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
##           X18         X19         X20         X21
## 1 6.87504e-02 1.70532e-02 6.14159e-02 9.99460e-05
## 2 6.91112e-02 1.13466e-01 4.75945e-02 9.99640e-05
## 3 9.98381e-05 3.59147e-03 9.98381e-05 1.95484e-02
## 4 9.98941e-05 5.07547e-03 5.91446e-02 1.63704e-03
## 5 9.98830e-05 9.98830e-05 3.99420e-02 9.98830e-05
## 6 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05

Rename the columns

# Rename the columns starting from the third one
leak21 <- leak21 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak21)
##    ind pop          v1          v2          v3          v4          v5
## 1 1001 OKI 2.75462e-02 9.99460e-05 9.99460e-05 4.04442e-03 9.99460e-05
## 2 1002 OKI 1.48332e-02 1.14721e-02 4.54085e-02 9.99640e-05 9.99640e-05
## 3 1003 OKI 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05
## 4 1004 OKI 9.98941e-05 9.98941e-05 9.98941e-05 9.98941e-05 5.74840e-02
## 5 1005 OKI 2.09787e-03 9.98830e-05 9.98830e-05 9.98830e-05 3.12814e-02
## 6 1006 OKI 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
##            v6          v7          v8          v9         v10         v11
## 1 1.79090e-02 1.46433e-03 3.41894e-02 8.64120e-02 2.89535e-02 7.82764e-02
## 2 3.47153e-04 5.23083e-03 1.04371e-02 2.87482e-02 9.99640e-05 1.11851e-01
## 3 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05
## 4 9.98941e-05 9.98941e-05 9.98941e-05 9.98941e-05 1.10122e-03 7.62143e-04
## 5 9.98830e-05 9.98830e-05 5.84309e-03 9.98830e-05 9.98830e-05 1.28451e-02
## 6 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
##           v12      v13         v14         v15         v16         v17
## 1 9.99460e-05 0.478998 2.65349e-02 4.93407e-02 1.85118e-02 9.99460e-05
## 2 5.74222e-02 0.335978 1.95733e-02 6.52214e-02 6.26335e-02 2.71121e-04
## 3 9.98381e-05 0.975063 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05
## 4 4.50163e-03 0.860375 9.98941e-05 9.98941e-05 9.98941e-05 8.72065e-03
## 5 9.60980e-03 0.890601 9.98830e-05 9.98830e-05 6.48132e-03 9.98830e-05
## 6 9.98199e-05 0.998004 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
##           v18         v19         v20         v21
## 1 6.87504e-02 1.70532e-02 6.14159e-02 9.99460e-05
## 2 6.91112e-02 1.13466e-01 4.75945e-02 9.99640e-05
## 3 9.98381e-05 3.59147e-03 9.98381e-05 1.95484e-02
## 4 9.98941e-05 5.07547e-03 5.91446e-02 1.63704e-03
## 5 9.98830e-05 9.98830e-05 3.99420e-02 9.98830e-05
## 6 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak21 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:21)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_21[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=21.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_21) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "lea_k=21_euro_global_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

5.2.8 Plots for K=7

ce = cross.entropy(project, K = 7)
ce 
##           K = 7
## run 1 0.8793121
## run 2 0.8800735
## run 3 0.8802178
## run 4 0.8798409
## run 5 0.8793035
color_palette_7 <-
  c(
    "red",
    "#77DD37",  
    "#FF8C1A",    
    "#FFFF19",
    "#75FAFF", 
    "#1E90FF",   
    "purple3")

5.2.8.1 Mean admixture by country for K=7

using ggplot

best = which.min(cross.entropy(project, K = 7)) 

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:7)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_7[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_7)

5.2.8.2 Plot individual admixtures for K=7

Extract ancestry coefficients for k=7

change to correct matrix

leak7 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K7/run5/r2_0.01_b_r5.7.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak7)
## # A tibble: 6 × 7
##         X1       X2     X3      X4     X5     X6    X7
##      <dbl>    <dbl>  <dbl>   <dbl>  <dbl>  <dbl> <dbl>
## 1 0.0756   0.0293   0.0751 0.0622  0.0574 0.0800 0.620
## 2 0.0512   0.0620   0.0632 0.0807  0.0627 0.125  0.556
## 3 0.000100 0.000100 0.0497 0.0150  0.0934 0.0266 0.815
## 4 0.0159   0.00257  0.0664 0.0291  0.0366 0.0927 0.757
## 5 0.000100 0.000100 0.0483 0.0153  0.0423 0.0944 0.800
## 6 0.000100 0.000100 0.0239 0.00847 0.0909 0.0414 0.835

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak7 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak7)

head(leak7)
##    ind pop          X1          X2        X3         X4        X5        X6
## 1 1001 OKI 0.075587600 0.029332800 0.0750965 0.06223380 0.0574401 0.0799922
## 2 1002 OKI 0.051196200 0.062006400 0.0631862 0.08066190 0.0626840 0.1246690
## 3 1003 OKI 0.000099982 0.000099982 0.0496697 0.01503730 0.0933872 0.0266181
## 4 1004 OKI 0.015876700 0.002571750 0.0663977 0.02908000 0.0366296 0.0927472
## 5 1005 OKI 0.000099982 0.000099982 0.0482861 0.01532970 0.0423157 0.0943542
## 6 1006 OKI 0.000099982 0.000099982 0.0239108 0.00847465 0.0909052 0.0413913
##         X7
## 1 0.620317
## 2 0.555597
## 3 0.815088
## 4 0.756697
## 5 0.799514
## 6 0.835118

Rename the columns

# Rename the columns starting from the third one
leak7 <- leak7 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak7)
##    ind pop          v1          v2        v3         v4        v5        v6
## 1 1001 OKI 0.075587600 0.029332800 0.0750965 0.06223380 0.0574401 0.0799922
## 2 1002 OKI 0.051196200 0.062006400 0.0631862 0.08066190 0.0626840 0.1246690
## 3 1003 OKI 0.000099982 0.000099982 0.0496697 0.01503730 0.0933872 0.0266181
## 4 1004 OKI 0.015876700 0.002571750 0.0663977 0.02908000 0.0366296 0.0927472
## 5 1005 OKI 0.000099982 0.000099982 0.0482861 0.01532970 0.0423157 0.0943542
## 6 1006 OKI 0.000099982 0.000099982 0.0239108 0.00847465 0.0909052 0.0413913
##         v7
## 1 0.620317
## 2 0.555597
## 3 0.815088
## 4 0.756697
## 5 0.799514
## 6 0.835118

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak7 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:7)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_7[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_7) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "lea_k=7_euro_global_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

5.2.9 Plots for K=18

ce = cross.entropy(project, K = 18)
ce 
##          K = 18
## run 1 0.8678589
## run 2 0.8684983
## run 3 0.8697050
## run 4 0.8691746
## run 5 0.8682401
color_palette_18 <-
  c(
    "#77DD77",
    "yellow2",
    "navy", 
    "#FFFF99",
    "#AE9393",
    "orangered2", 
    "#FF8C1A",
    "chocolate4",
    "#B22222",
    "magenta",    
    "purple4", 
    "#1E90FF",
    "purple",
    "#008080",
    "green",
    "#75FAFF",
    "blue",
    "#F49AC2"
    )

5.2.9.1 Mean admixture by country for K=18

using ggplot

best = which.min(cross.entropy(project, K = 18)) 

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 18, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:18)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_18[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_18)

5.2.9.2 Plot individual admixtures for K=18

Extract ancestry coefficients for k=18

change to correct matrix

leak18 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K18/run5/r2_0.01_b_r5.18.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak18)
## # A tibble: 6 × 18
##         X1    X2      X3      X4      X5      X6      X7      X8      X9     X10
##      <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1  3.07e-2 0.508 2.43e-2 1.00e-4 7.64e-3 1.97e-3 8.55e-3 1.03e-1 3.57e-2 8.77e-2
## 2  1.00e-4 0.320 6.73e-2 1.00e-4 1.56e-2 1.00e-4 1.00e-4 2.00e-2 9.73e-2 1.59e-1
## 3  7.75e-3 0.976 9.99e-5 7.81e-3 2.44e-3 9.99e-5 2.13e-3 9.99e-5 9.99e-5 9.99e-5
## 4  9.99e-5 0.854 1.58e-2 9.99e-5 9.99e-5 9.99e-5 9.99e-5 1.08e-2 4.80e-3 4.44e-2
## 5  2.30e-4 0.898 1.23e-2 9.99e-5 9.99e-5 2.88e-3 9.99e-5 9.99e-5 9.99e-5 2.98e-2
## 6  9.98e-5 0.998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 8 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## #   X16 <dbl>, X17 <dbl>, X18 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak18 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak18)

head(leak18)
##    ind pop          X1       X2          X3          X4          X5          X6
## 1 1001 OKI 3.06744e-02 0.508398 2.42502e-02 9.99550e-05 7.63745e-03 1.96511e-03
## 2 1002 OKI 9.99550e-05 0.320112 6.72566e-02 9.99550e-05 1.56024e-02 9.99550e-05
## 3 1003 OKI 7.75016e-03 0.975955 9.98920e-05 7.80961e-03 2.44196e-03 9.98920e-05
## 4 1004 OKI 9.99280e-05 0.853619 1.57864e-02 9.99280e-05 9.99280e-05 9.99280e-05
## 5 1005 OKI 2.29525e-04 0.898306 1.23362e-02 9.99280e-05 9.99280e-05 2.87996e-03
## 6 1006 OKI 9.98468e-05 0.998303 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05
##            X7          X8          X9         X10         X11         X12
## 1 8.54832e-03 1.03097e-01 3.56559e-02 8.76786e-02 9.99550e-05 6.91777e-02
## 2 9.99550e-05 1.99509e-02 9.73101e-02 1.59343e-01 2.04739e-02 3.14025e-02
## 3 2.12764e-03 9.98920e-05 9.98920e-05 9.98920e-05 9.98920e-05 9.98920e-05
## 4 9.99280e-05 1.07794e-02 4.80383e-03 4.43832e-02 1.58909e-02 9.99280e-05
## 5 9.99280e-05 9.99280e-05 9.99280e-05 2.97674e-02 2.18784e-02 2.08629e-03
## 6 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05
##           X13         X14         X15         X16         X17         X18
## 1 3.88692e-02 9.99550e-05 9.99550e-05 9.99550e-05 5.42348e-02 2.93138e-02
## 2 4.06099e-02 9.99550e-05 3.87547e-02 1.12092e-01 1.60328e-02 6.05588e-02
## 3 9.98920e-05 9.98920e-05 9.98920e-05 2.71680e-03 9.98920e-05 9.98920e-05
## 4 9.99280e-05 8.33977e-03 2.62039e-02 9.99280e-05 1.30858e-02 6.30877e-03
## 5 9.99280e-05 9.99280e-05 1.43956e-02 9.99280e-05 3.06607e-03 1.42552e-02
## 6 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05

Rename the columns

# Rename the columns starting from the third one
leak18 <- leak18 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak18)
##    ind pop          v1       v2          v3          v4          v5          v6
## 1 1001 OKI 3.06744e-02 0.508398 2.42502e-02 9.99550e-05 7.63745e-03 1.96511e-03
## 2 1002 OKI 9.99550e-05 0.320112 6.72566e-02 9.99550e-05 1.56024e-02 9.99550e-05
## 3 1003 OKI 7.75016e-03 0.975955 9.98920e-05 7.80961e-03 2.44196e-03 9.98920e-05
## 4 1004 OKI 9.99280e-05 0.853619 1.57864e-02 9.99280e-05 9.99280e-05 9.99280e-05
## 5 1005 OKI 2.29525e-04 0.898306 1.23362e-02 9.99280e-05 9.99280e-05 2.87996e-03
## 6 1006 OKI 9.98468e-05 0.998303 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05
##            v7          v8          v9         v10         v11         v12
## 1 8.54832e-03 1.03097e-01 3.56559e-02 8.76786e-02 9.99550e-05 6.91777e-02
## 2 9.99550e-05 1.99509e-02 9.73101e-02 1.59343e-01 2.04739e-02 3.14025e-02
## 3 2.12764e-03 9.98920e-05 9.98920e-05 9.98920e-05 9.98920e-05 9.98920e-05
## 4 9.99280e-05 1.07794e-02 4.80383e-03 4.43832e-02 1.58909e-02 9.99280e-05
## 5 9.99280e-05 9.99280e-05 9.99280e-05 2.97674e-02 2.18784e-02 2.08629e-03
## 6 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05
##           v13         v14         v15         v16         v17         v18
## 1 3.88692e-02 9.99550e-05 9.99550e-05 9.99550e-05 5.42348e-02 2.93138e-02
## 2 4.06099e-02 9.99550e-05 3.87547e-02 1.12092e-01 1.60328e-02 6.05588e-02
## 3 9.98920e-05 9.98920e-05 9.98920e-05 2.71680e-03 9.98920e-05 9.98920e-05
## 4 9.99280e-05 8.33977e-03 2.62039e-02 9.99280e-05 1.30858e-02 6.30877e-03
## 5 9.99280e-05 9.99280e-05 1.43956e-02 9.99280e-05 3.06607e-03 1.42552e-02
## 6 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak18 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:18)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_18[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=18.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_18) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "lea_k=18_euro_global_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

5.2.10 Plots for K=19

ce = cross.entropy(project, K = 19)
ce 
##          K = 19
## run 1 0.8682022
## run 2 0.8680049
## run 3 0.8682069
## run 4 0.8680339
## run 5 0.8677646
color_palette_19 <-
  c(
    "#77DD77",
    "yellow2",
    "navy", 
    "#FFFF99",
    "#AE9393",
    "orangered2", 
    "#FF8C1A",
    "chocolate4",
    "#B22222",
    "magenta",    
    "purple4", 
    "#1E90FF",
    "purple",
    "#008080",
    "green",
    "#75FAFF",
    "blue",
    "#F49AC2",
    "#B20CC9"
    )

5.2.10.1 Mean admixture by country for k=19

best = which.min(cross.entropy(project, K = 19)) 

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 19, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:19)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_19[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_19)

5.2.10.2 Plot individual admixtures for k=19

Extract ancestry coefficients for k=19

change to correct matrix

leak19 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K19/run5/r2_0.01_b_r5.19.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak19)
## # A tibble: 6 × 19
##         X1      X2    X3      X4      X5      X6      X7      X8      X9     X10
##      <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1  1.64e-2 1.80e-2 0.496 1.00e-4 1.00e-4 7.31e-3 1.00e-4 1.02e-1 4.13e-2 6.34e-2
## 2  9.52e-2 2.62e-2 0.323 4.52e-2 1.95e-2 8.76e-2 1.00e-4 1.76e-1 1.00e-4 9.01e-3
## 3  9.99e-5 9.99e-3 0.982 9.99e-5 9.99e-5 1.39e-3 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 4  7.49e-3 9.99e-5 0.833 7.65e-3 9.99e-5 9.99e-5 1.45e-2 3.37e-2 4.38e-2 2.15e-3
## 5  2.12e-2 9.99e-5 0.864 1.51e-2 4.03e-3 9.99e-5 9.99e-5 2.76e-2 1.20e-2 1.07e-2
## 6  9.98e-5 9.98e-5 0.998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 9 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## #   X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>

The fam file

fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak19 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak19)

head(leak19)
##    ind pop          X1          X2       X3          X4          X5          X6
## 1 1001 OKI 1.63748e-02 1.80108e-02 0.496327 9.99640e-05 9.99640e-05 7.31110e-03
## 2 1002 OKI 9.51566e-02 2.62419e-02 0.323274 4.51663e-02 1.95366e-02 8.76055e-02
## 3 1003 OKI 9.98650e-05 9.98848e-03 0.981676 9.98650e-05 9.98650e-05 1.38856e-03
## 4 1004 OKI 7.49281e-03 9.99010e-05 0.833186 7.65312e-03 9.99010e-05 9.99010e-05
## 5 1005 OKI 2.12344e-02 9.99190e-05 0.863652 1.51044e-02 4.02701e-03 9.99190e-05
## 6 1006 OKI 9.98378e-05 9.98378e-05 0.998203 9.98378e-05 9.98378e-05 9.98378e-05
##            X7          X8          X9         X10         X11         X12
## 1 9.99640e-05 1.02275e-01 4.13298e-02 6.33793e-02 2.61333e-02 4.30549e-03
## 2 9.99550e-05 1.75763e-01 9.99550e-05 9.00764e-03 8.99474e-03 9.99550e-05
## 3 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05
## 4 1.44714e-02 3.37349e-02 4.38463e-02 2.14673e-03 9.99010e-05 9.99010e-05
## 5 9.99190e-05 2.75958e-02 1.20219e-02 1.06768e-02 9.99190e-05 9.99190e-05
## 6 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05
##           X13         X14         X15         X16         X17         X18
## 1 9.99640e-05 8.17507e-02 4.37441e-02 9.30773e-03 1.32076e-02 6.30280e-02
## 2 6.60810e-03 5.91668e-02 3.68035e-02 9.99550e-05 4.74601e-02 5.87159e-02
## 3 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05
## 4 5.63694e-02 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05
## 5 3.58518e-02 5.75777e-03 9.99190e-05 9.99190e-05 9.99190e-05 9.99190e-05
## 6 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05
##           X19
## 1 1.31156e-02
## 2 9.99550e-05
## 3 5.44933e-03
## 4 9.99010e-05
## 5 3.17852e-03
## 6 9.98378e-05

Rename the columns

# Rename the columns starting from the third one
leak19 <- leak19 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak19)
##    ind pop          v1          v2       v3          v4          v5          v6
## 1 1001 OKI 1.63748e-02 1.80108e-02 0.496327 9.99640e-05 9.99640e-05 7.31110e-03
## 2 1002 OKI 9.51566e-02 2.62419e-02 0.323274 4.51663e-02 1.95366e-02 8.76055e-02
## 3 1003 OKI 9.98650e-05 9.98848e-03 0.981676 9.98650e-05 9.98650e-05 1.38856e-03
## 4 1004 OKI 7.49281e-03 9.99010e-05 0.833186 7.65312e-03 9.99010e-05 9.99010e-05
## 5 1005 OKI 2.12344e-02 9.99190e-05 0.863652 1.51044e-02 4.02701e-03 9.99190e-05
## 6 1006 OKI 9.98378e-05 9.98378e-05 0.998203 9.98378e-05 9.98378e-05 9.98378e-05
##            v7          v8          v9         v10         v11         v12
## 1 9.99640e-05 1.02275e-01 4.13298e-02 6.33793e-02 2.61333e-02 4.30549e-03
## 2 9.99550e-05 1.75763e-01 9.99550e-05 9.00764e-03 8.99474e-03 9.99550e-05
## 3 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05
## 4 1.44714e-02 3.37349e-02 4.38463e-02 2.14673e-03 9.99010e-05 9.99010e-05
## 5 9.99190e-05 2.75958e-02 1.20219e-02 1.06768e-02 9.99190e-05 9.99190e-05
## 6 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05
##           v13         v14         v15         v16         v17         v18
## 1 9.99640e-05 8.17507e-02 4.37441e-02 9.30773e-03 1.32076e-02 6.30280e-02
## 2 6.60810e-03 5.91668e-02 3.68035e-02 9.99550e-05 4.74601e-02 5.87159e-02
## 3 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05
## 4 5.63694e-02 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05
## 5 3.58518e-02 5.75777e-03 9.99190e-05 9.99190e-05 9.99190e-05 9.99190e-05
## 6 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05
##           v19
## 1 1.31156e-02
## 2 9.99550e-05
## 3 5.44933e-03
## 4 9.99010e-05
## 5 3.17852e-03
## 6 9.98378e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak19 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:19)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_19[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=19.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_19) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "lea_k=19_euro_global_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

6. PCAs for subsets of populations

6.1 PCA for Italy + native pops for SNP Set 3 (MAF 1%, R2<0.01)

6.1.1 Import the data for SNP Set 3 subset for euro_native_italy_all

genotype <- here(
   "euro_global/output/neuroadmixture/euro_native2_italy_all.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22537
##   column count: 339
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22537
##   Character matrix gt cols: 339
##   skip: 0
##   nrows: 22537
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## BEN BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC KAG 
##  12  13  12  14  12  16   2  12   4   7  12   4  11   4   5   9  12   2   6  12 
## KAN KAT KLP KUN LAM MAT OKI QNC ROM SIC SON SSK SUF SUU TAI TRE UTS YUN 
##  11   6   4   4   9  12  12  11   4   9   3  12   6   6   7  12  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   330
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   330
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.removed file, for more informations.
## 
## 
##  - number of detected individuals:   330
##  - number of detected loci:      22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.lfmm"

PCA for MAF 1% r2<0.01 snp set of euro_native_italy_all

nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          330
##         -L (number of loci)                 22537
##         -K (number of principal components) 330
##         -x (genotype file)                  /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.lfmm
##         -a (eigenvalue file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.pca/euro_native2_italy_all.eigenvalues
##         -e (eigenvector file)               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.pca/euro_native2_italy_all.eigenvectors
##         -d (standard deviation file)        /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.pca/euro_native2_italy_all.sdev
##         -p (projection file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.pca/euro_native2_italy_all.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/ 
## pca result directory:            euro_native2_italy_all.pca/ 
## input file:                      euro_native2_italy_all.lfmm 
## eigenvalue file:                 euro_native2_italy_all.eigenvalues 
## eigenvector file:                euro_native2_italy_all.eigenvectors 
## standard deviation file:         euro_native2_italy_all.sdev 
## projection file:                 euro_native2_italy_all.projections 
## pcaProject file:                   euro_native2_italy_all.pcaProject 
## number of individuals:           330 
## number of loci:                  22537 
## number of principal components:  330 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          330
##         -i (input file)                     /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.pca/euro_native2_italy_all.eigenvalues
##         -o (output file)                    /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.pca/euro_native2_italy_all.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)

Get values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)

Check R symbols for plot

#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,33:127)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
  scale_shape_manual(values=good.shapes[1:N]) +
  geom_point()

sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_native.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_native.rds"))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_native.rds"))
head(sampling_loc)
##    Pop_City  Location Latitude Longitude Continent Abbreviation Year Region
## 1   Brescia   Brescia 45.53373 10.204450    Europe          BRE 1995  Italy
## 2    Cesena    Cesena 44.15287 12.244265    Europe          CES 1995  Italy
## 3 Desenzano Desenzano 45.46289 10.549140    Europe          DES 1995  Italy
## 4   Bologna   Bologna 44.48478 11.366584    Europe          ITB 2017  Italy
## 5   Imperia   Imperia 43.87159  8.003559    Europe          IMP 2017  Italy
## 6    Puglia    Puglia 41.12213 16.844107    Europe          ITP 2016  Italy
##     Subregion order order2 orderold
## 1 West Europe    20     12       12
## 2 West Europe    24     16       16
## 3 West Europe    21     13       13
## 4 West Europe    23     15       15
## 5 West Europe    18     10       10
## 6 West Europe    28     20       20

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 38 Levels: BEN BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ITB ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 38

Check the regions

unique(sampling_loc$Region)
## [1] "Italy"          "East Asia"      "South Asia"     "Southeast Asia"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population      PC1      PC2      PC3      PC4       PC5         PC6     PC7
## 1        BEN -36.4286 -8.65616  9.67835 1.706060  0.242176  0.00620311 3.35510
## 2        BEN -35.6578 -7.99592  9.14433 3.316050 -2.987790 -0.63408700 4.14891
## 3        BEN -37.2921 -8.22387  8.54982 1.835040 -1.936330 -0.42146200 3.54288
## 4        BEN -35.6589 -8.20466  9.41346 2.407870 -1.585510  0.07330430 3.14577
## 5        BEN -36.3292 -8.46492 10.47310 0.864815  0.747834 -1.23031000 3.35605
## 6        BEN -35.8840 -8.60298 10.39800 1.301580 -2.034310 -0.08303760 2.72750
##        PC8      PC9     PC10     PC11     PC12     PC13    PC14       PC15
## 1 -3.01092 -8.93709 1.257590 -4.37216  9.57095 1.807150 3.99259 -2.8512300
## 2 -4.05844 -6.56452 3.101820 -6.56300  9.34943 1.495770 1.91960 -0.0789511
## 3 -2.41577 -8.58320 1.848370 -6.13380 10.95160 2.024820 3.09653 -0.2232520
## 4 -2.41373 -7.71873 3.074200 -3.69073  9.34427 2.835000 2.65460 -1.6481300
## 5 -3.19459 -7.71933 3.112940 -3.06045 11.42760 0.341763 4.35146 -1.2772800
## 6 -3.45130 -9.82680 0.593669 -5.16226 10.53380 1.694970 3.81288 -2.9277000
##         PC16      PC17      PC18       PC19      PC20      PC21      PC22
## 1  1.4269200  0.444874 -2.605780 -0.1387210 -0.548535  0.446162  0.679672
## 2  2.6657600 -0.459167 -0.675325  1.7449300 -0.219636 -0.583923  0.478380
## 3 -1.0003300  1.323620 -0.496820  0.4805230 -1.521700 -0.898755  1.032990
## 4 -0.0257458 -0.540294 -1.702020 -0.2572390 -1.113980 -0.932667 -1.856180
## 5  2.2095900  0.186720  0.709656 -1.4002400 -2.768490  2.423650  0.924281
## 6  1.5628700  0.690478 -1.002700  0.0920083 -3.523690 -3.039580  5.411600
##       PC23      PC24     PC25     PC26       PC27     PC28    PC29     PC30
## 1  1.20414 -3.448410  8.67336 10.20080  3.5956500 -2.87586 1.13300 5.674920
## 2  1.63656 -1.828880  6.22152  6.88458  0.9441300 -5.16572 3.27890 4.441010
## 3 -1.27430 -2.677440  4.36255  5.60435  1.0782800 -7.61513 1.98766 1.732170
## 4 -1.35891 -2.573890  4.74408  7.11259 -0.0799014 -2.26633 2.12922 2.664020
## 5  0.92488  0.106894  5.41177  7.92296  1.1685500 -1.75458 2.26918 0.815352
## 6 -3.99058 -6.468870 10.78720 11.60920  1.0326100 -8.80816 6.29450 6.784490
##          PC31      PC32      PC33      PC34     PC35     PC36     PC37
## 1 -1.35344000  0.507352  -8.41585  0.829387 12.26600 11.85820  4.74150
## 2 -0.00149523  0.226129  -9.49899  1.133870  7.95940  2.20301  1.24861
## 3 -3.30131000 -0.940338  -5.27423  0.421175  4.99281  2.88495  5.57283
## 4 -0.17599900  1.379430  -5.63710  2.246720  3.13143  6.34839  4.92737
## 5 -0.13300000  0.541537  -2.29314  0.668216  6.68419  5.72174  6.01297
## 6 -5.08747000 -0.171277 -23.00240 -3.675780 16.02970 12.80410 16.76260
##        PC38     PC39      PC40     PC41     PC42        PC43      PC44
## 1 -9.306760  2.76689  -5.93442 -6.06762  6.25710 -3.59349000 -5.234970
## 2  0.942282  1.15206  -1.70704 -3.64219  4.40032 -4.88358000 -0.983047
## 3 -2.730500  1.48026  -2.75150 -3.23859  7.02737 -0.00871823  1.792620
## 4 -2.748640  4.86364  -3.10931 -3.63340  4.08623 -0.34372400  1.801030
## 5 -2.573450 -2.40112  -1.12456 -5.43754  4.60885 -2.15457000 -0.110867
## 6 -1.843590  3.79675 -16.48130 -4.03469 18.38850  3.29094000 12.381400
##        PC45      PC46     PC47      PC48     PC49     PC50      PC51     PC52
## 1  4.988650 -4.982460 -1.85080 -1.710760  3.93953 -1.71044 -0.485108 -5.07246
## 2  3.155360  1.187990  1.96649 -2.717490  3.74892 -3.07740  3.763990  1.96752
## 3  4.232960 -0.399819  2.01636 -3.436120 -2.23064  4.40633  3.653800 -1.67868
## 4  4.671640  1.507510  2.25192 -1.914680  1.59632  3.10852  1.710710 -2.80248
## 5  0.368042 -3.879630  1.59489  0.537028  3.50036 -1.82312  3.326420 -1.81966
## 6 14.391200  9.710800  1.61274 -9.103410  2.49952 -5.82170  3.909670 -6.41516
##        PC53      PC54       PC55     PC56      PC57       PC58     PC59
## 1 -3.498460 -0.591602 -3.4994500 -4.82154 -1.810640 -17.726900 -9.26768
## 2 -0.433642 -1.930470 -0.6457280 -2.88981  0.837444  -4.499700 -2.36933
## 3  1.006650  1.890690 -2.8024000  1.05884  1.743710  -3.537010 -3.22576
## 4 -1.712400  0.347678 -2.8923900 -1.44836  9.173920  -3.389600 -2.25018
## 5  1.983130 -1.238630  0.0951803 -2.85376 -0.779215  -0.808632  2.27708
## 6 -5.677470  5.034360  5.4366300  3.93041  3.065970  18.928700  7.49059
##        PC60     PC61      PC62      PC63      PC64     PC65      PC66     PC67
## 1 -20.61790 12.84320  14.67310 -3.457630 -10.14490 -1.50620  0.232364  4.12503
## 2  -7.87076  5.24957   7.11435 -2.593260  -3.41792  1.14602  1.396230 -2.10571
## 3  -5.54479  3.61423   7.51875  4.003790  -1.75662  3.00994  5.822280 -4.10763
## 4  -8.13063 -0.47465   5.59817 -1.154140  -7.53385  1.28329  3.698950 -3.89698
## 5  -6.65000  7.30773  11.04180 -1.247250  -1.33184 -3.02103 -1.397850 -3.99500
## 6  35.27880 -8.97710 -21.74100 -0.392993  16.62620 -6.04496 -1.847560  4.15223
##        PC68      PC69      PC70      PC71      PC72      PC73     PC74
## 1  1.536300 -13.48370  3.477350  3.720790 -5.916960 -1.536060 21.41410
## 2  0.410133   4.71199 -3.481590  4.972220 -1.976500  1.742720 -4.75006
## 3  4.377580   1.44084  0.462429 -4.110600  3.893530  0.941228 -7.14890
## 4 -3.201360  -2.81169  1.883300 -4.296660  8.613790 -4.352930 -8.34767
## 5 -8.322840   3.09596 -4.858230  0.499918 -6.630700 -6.729030 -8.89989
## 6  5.775870   8.48122  3.804050  0.434860 -0.890947  3.822750  5.00241
##        PC75      PC76     PC77      PC78      PC79      PC80     PC81      PC82
## 1  4.816550 13.458700 13.34870 12.188700 -4.819460 -11.57360  6.17078 13.476400
## 2  1.530660 -5.740000 -1.32580 -0.241753 -0.534579   1.42928 -6.28200 -4.914640
## 3 -0.446741  0.993584 -2.48911 -7.698270  5.382240   4.70027 -1.42030  0.187968
## 4  1.683970 -2.658500 -4.73852 -7.112860  2.012310   7.32083 -1.56853 -7.722160
## 5  0.992625 -3.933490 -2.05930 -0.953750 -1.942150  -4.07214  0.95736 -2.942890
## 6  0.370085 -2.142860  7.26191  1.907220 -6.636400  -1.64103  5.30904  5.049200
##       PC83      PC84      PC85     PC86     PC87      PC88      PC89      PC90
## 1  7.76193 -7.810620  17.38790 -4.99597  7.74147 -1.481580  0.176349 -0.501481
## 2 -3.09758 -2.063730  -2.48145 -3.63315  1.76304 -7.050320 -1.247400  9.195970
## 3 -2.73932  0.486628  -8.22308  0.24716 -5.48897  0.330582  1.724920 -6.792810
## 4  1.13173 -5.309550  -5.73748 -4.96979 -6.28995  1.659020 -9.198980 -5.984870
## 5 -3.89876 -0.117790 -13.50760  4.70942 -4.93501 -2.630050  4.784760  5.307010
## 6  3.58915 -2.854030   3.82292  5.45977 -4.10345  4.272530  1.657460 -3.883730
##         PC91      PC92       PC93      PC94       PC95     PC96     PC97
## 1   4.202590  4.965650 -7.4087200 -6.208640   1.590710  2.09937 -5.15330
## 2 -14.704200 -5.108240  7.1066500  1.663840   1.899690  1.74474  2.87462
## 3   4.442330 -0.533804  3.6846500 -0.154139 -12.909800  7.37524 -7.33184
## 4   0.503548  2.005400  6.5935300  4.694260  -2.200410 -5.88301 -1.10462
## 5 -11.029100  3.502840 11.1816000  4.877440  -0.128761  2.21194 15.18890
## 6  -0.361817 -3.177760  0.0502092 -0.616075  -0.968541  1.85617 -5.25075
##       PC98      PC99    PC100     PC101     PC102     PC103     PC104
## 1 -2.76715  4.668910 -6.09971 -2.169200  6.270300  3.977710 -6.981530
## 2 -3.58197 -7.732730  8.38126 -0.937371 -4.556590 -3.814630 -1.967820
## 3  3.53662 -6.219700  4.27379  4.082890  1.894790  0.424252 -4.777210
## 4  3.08432  0.489263  2.99202  1.501650  0.424027  6.592460  8.391140
## 5  4.53260  9.150640 -3.82560 -6.886630  1.589150 -0.669231  3.295550
## 6  0.41695  2.985840  1.25128 -1.136980  1.542230 -0.606253  0.763501
##         PC105     PC106     PC107      PC108     PC109     PC110     PC111
## 1 -2.15452000  4.523020   3.80938   1.531350  0.702757  4.135630 -0.273608
## 2  6.43562000 -2.728800   5.92438  -2.866840 -5.487690 -3.161320 -2.511560
## 3  5.57791000 -0.992583  -2.43924 -11.109900  4.475740  6.632170 -6.241980
## 4  3.08297000  4.307370   4.92973  -3.010530 -7.851450  4.580160  5.154650
## 5  0.00101321  2.321840 -11.48910   6.417860  5.643610 -0.598633 -5.881490
## 6 -1.09509000 -0.248610   1.51874   0.343609  1.231330 -2.490520  1.721730
##      PC112    PC113      PC114     PC115     PC116     PC117      PC118
## 1  1.94491 -2.29686  -0.308073 -0.999831  4.083370 -0.293493   0.883767
## 2 -3.36833 -6.05028   6.710480  6.181630  3.554460 -5.974590   5.684740
## 3 -4.87052 -3.96529  -5.388560  6.287600 -1.984700  4.083740  -9.429340
## 4 -1.91764 -2.23213 -11.369400 -6.917750 -8.183640  2.695530   7.313880
## 5  6.28290 -2.58825  -6.088260 -0.679319  6.029180  4.263300 -12.417700
## 6 -2.00787 -1.40786   2.166730 -1.347610 -0.300582 -1.301520   1.308540
##        PC119      PC120      PC121    PC122    PC123     PC124     PC125
## 1   4.221450  -1.564040   7.911250 -2.61741 -3.79594   7.53077  1.673740
## 2 -15.715200   1.550870  -6.573890  1.83892 10.59710 -11.08400 -3.380720
## 3 -12.139900   0.353241 -14.197300  1.51723 -8.93805  -9.65874  7.466230
## 4  -9.708120   7.559850  -6.511010  1.86800  1.61614   3.80643 -6.509970
## 5   1.590080 -14.323000   6.601470 -6.67113 12.43840  -1.01275 -1.038080
## 6  -0.409233  -3.669070  -0.784169 -1.82787  2.42521   1.32555 -0.236803
##       PC126    PC127     PC128       PC129    PC130     PC131    PC132
## 1 -0.791141 -3.44300 -3.594480   1.8163600 -2.23310  -3.75804 -3.43904
## 2 -3.964910 -3.31432  4.945450  -8.2090100 -6.61420   0.31984 -3.09043
## 3  9.292280  7.25150  6.932310   2.8002200 -3.07084  -6.12911 17.56150
## 4 -5.322430  3.51687  5.632890 -14.4926000  3.51536 -10.35130 -3.93269
## 5 -1.520330 20.49690  6.809590  -0.0800215 11.47930  -4.28857  1.40240
## 6  3.855290 -3.52590  0.411502   1.3495800 -1.18242  -1.39559 -1.16085
##        PC133     PC134       PC135     PC136     PC137      PC138    PC139
## 1  1.2963300 -4.393090   0.8236190 -2.097590 -2.814670 -1.8934500 -0.74115
## 2  8.7048000  9.048130  10.3210000 -6.284710  6.913350  4.0879700  2.96463
## 3 -6.0236500 -3.814580 -10.3348000  4.016090  1.714060  1.6771600  5.07434
## 4  2.8089000 -0.546552  -0.5071200  8.565720  5.536310 -0.8850900 -3.03628
## 5  5.9091100 -2.025550   4.4254400  9.944990 -5.560400  0.0514701  5.69492
## 6  0.0403684 -1.547740  -0.0126975 -0.575174 -0.313609  1.3002400  1.27595
##       PC140     PC141     PC142     PC143     PC144      PC145     PC146
## 1  0.297538  2.547830  -3.55896 -1.595990   1.91919  -0.110257 -0.402857
## 2  0.729823 -4.909300   4.44496 -1.743860  -5.16318   7.591420 14.467200
## 3 -1.128200 -8.056110 -11.82470 -2.224600   2.26824 -21.952000  7.497620
## 4  0.759772  0.230179  11.58190 -2.991780 -19.28690   8.705040  5.417540
## 5 -4.363940 16.202100   8.03622 -0.708072   4.29022   2.965250 -2.379100
## 6 -0.706910 -1.405860  -1.75542 -1.811070  -2.69975   2.217180 -0.493198
##        PC147      PC148    PC149     PC150    PC151      PC152     PC153
## 1  -4.608490   3.688120 -0.43062  4.909980 -3.08522   0.601014   1.46085
## 2   0.877875  -6.242550  8.06753 -1.874390  2.12882   1.423480 -18.59620
## 3  -1.860260  -5.346860 -2.48336  1.330810  2.90265 -11.719100  -5.26154
## 4 -17.799200 -16.861000  3.81050 -0.053360 -4.62369  10.370900  -6.28892
## 5   8.841900   3.743100  3.11020  4.248280  6.69694   6.445800  -2.15495
## 6  -1.695370   0.791003 -1.15501 -0.141668  2.48447  -0.237788  -1.29166
##        PC154     PC155     PC156     PC157     PC158     PC159     PC160
## 1   5.722670 -0.423216  4.201310  0.244095 -0.805949 -3.866640  5.424690
## 2  -0.625435  6.389990 14.936300  9.715080 13.279200 -8.555310  5.422110
## 3  -1.594170 10.364500 -1.123510  3.663520 11.720100  7.211250 -0.407956
## 4   2.757070 -8.743340 -6.234360 -4.000290 -2.407580 -1.000630 -4.656120
## 5 -25.056900  7.993380 -4.332630 -3.959700 -8.405300  0.794097 -7.704390
## 6   1.154230  0.163830  0.471986 -1.995310 -2.014020 -0.260112 -1.599450
##        PC161      PC162   PC163     PC164     PC165     PC166     PC167
## 1  1.6498700  -1.359840 3.79428 -3.530000 -0.853487 -0.310305 -2.290050
## 2 -8.8495800 -11.654300 9.08494 -2.520500 -0.130562  5.301890 -0.270629
## 3  4.0321500  11.996100 7.98986 12.319600  2.272150 -9.494760  6.472930
## 4  0.0514306   2.547430 5.02832  4.478120 -6.070790 -2.540390 10.593900
## 5 -1.0143100 -15.175500 3.70779  0.459311  1.854200  5.176410  9.544460
## 6 -1.7903600   0.202701 1.25694 -2.376680 -2.890320 -1.451030  1.468570
##       PC168     PC169     PC170     PC171     PC172     PC173     PC174
## 1  0.465481  2.676640 -1.601130   2.96868  -2.62084  -2.58270 -0.100016
## 2 -7.369730  1.328990 -5.188110 -15.25570  -5.76728 -16.53320 12.060300
## 3 15.124000 -9.525180 -1.132470  -5.85826  -2.58493   2.55936  9.871930
## 4 -2.576120  3.037520 21.085400  15.68240 -10.61770  13.02520 -9.889990
## 5  9.896820  4.577390  1.009540  10.29290   7.38586  -6.12128  7.591270
## 6  2.176990  0.565257  0.912449   2.95360   2.82096  -4.52211 -0.596217
##       PC175    PC176     PC177        PC178    PC179    PC180       PC181
## 1  2.871010 -1.21489  0.508154   2.07517000  1.89598 -3.65197  -3.6299200
## 2 -0.476276 -1.65314 -4.676340   2.23210000 -7.27866  1.14088 -26.2318000
## 3  1.669020  4.47738  0.889116  -0.45733200 17.97750  1.53834  -6.5801600
## 4 -4.072420 -4.76307 -5.215450   7.32140000  2.34473 -5.22767   6.1979100
## 5  2.659320  4.42937  3.850640 -10.24460000  4.89775  9.28590   0.0956869
## 6 -0.773518 -1.25914  0.271291   0.00525602 -2.64764 -2.14407  -0.3952680
##       PC182      PC183      PC184       PC185    PC186     PC187      PC188
## 1   1.24389  -3.302960   2.876120  -0.0440699 -3.30869 -1.603810  -0.268286
## 2 -18.35310   5.345250 -14.691200  -5.6640200 21.39570  6.865030 -14.165900
## 3   7.09082 -11.004800  13.481800 -20.5337000  2.79004 -4.536070   7.636240
## 4   9.04403   1.040480  13.061500  -4.2756500  2.94655 -1.229350   1.210330
## 5  -1.91249  -5.819030  -5.315890   1.6723300  4.47765 -8.252340   4.571220
## 6   1.94834   0.292433   0.871957  -0.8564560 -3.19218  0.221138   1.095620
##       PC189     PC190     PC191     PC192     PC193      PC194      PC195
## 1  1.728610  0.634863  1.204040  -1.31276  -1.80258   1.567020   2.792490
## 2  0.945586 -1.555840 12.854200  -2.89874  -1.68527   0.112624 -13.116200
## 3  2.300320 -8.008130  6.444110  -8.98300 -13.88470 -12.383800  -0.113585
## 4 12.428800 -4.788980 -4.447500  15.73080  -5.70527  -5.846530   9.306030
## 5 -5.985770  1.879450  1.476470 -11.62790  -6.21661  -3.229850   7.531100
## 6  0.714980  0.681986  0.153076  -1.70962  -1.54565   1.120300  -1.223970
##        PC196      PC197     PC198     PC199     PC200    PC201     PC202
## 1  -0.864158  -3.024560  3.592490  4.877320   1.03075  1.94791 -3.162600
## 2   7.083080 -10.919300 -0.240066  6.057200   9.91704  6.60590 -7.991950
## 3 -10.615000  -4.579790  0.957756 15.552400   1.77143 -4.19507  1.525850
## 4  23.423100  16.718100 -2.831820 -3.034520   6.46233  8.18282 -6.906330
## 5   1.602640  18.738600  4.732080 16.892400 -17.94380  7.59291  0.570049
## 6  -0.846690   0.846655 -1.244570 -0.327022   2.74285 -1.14568 -0.485728
##      PC203      PC204    PC205     PC206      PC207     PC208     PC209
## 1  1.43958  -1.518410  1.95459  5.656330  -3.165260   2.09702  0.102735
## 2  3.68832   2.636990 -3.75071 -6.810350  13.593500  -7.44244  5.649990
## 3 -6.89536  12.060000 -8.39451 -7.701610 -10.363600   2.11047 -9.061770
## 4  2.90130 -13.414600  9.13907 -0.593586  -3.380060 -17.08950 -6.544610
## 5 -5.30091   8.201080  4.50963 21.209000   3.200770  -4.36198 10.915200
## 6 -4.64714  -0.358261  1.51279  2.381280  -0.843715   1.27899  0.294638
##       PC210     PC211     PC212     PC213    PC214      PC215     PC216
## 1 -3.793070  3.757620 -1.913980  0.106880  4.64054   2.641460  1.057750
## 2  7.903730 -2.631820  0.925983  5.117520 -5.37771   2.993490 -4.576110
## 3  0.889264  0.122679  6.341800 -7.023340  5.47278 -15.066000  4.825350
## 4  1.718790  1.535190 -3.465960 -0.388398 -7.30977   5.921650 -0.304196
## 5 -2.290030  9.417030  3.900610  5.752560 -1.82579   0.352211 -3.288130
## 6 -0.507667  0.493733 -0.578154  3.186230 -2.51075   3.437370  0.422021
##      PC217     PC218     PC219     PC220     PC221     PC222     PC223
## 1 -4.23964  5.261560  0.346457  1.696060  6.609610 -0.918140 -0.652525
## 2 -1.25051 -0.712649 -9.361090  1.373260  0.941486 -0.679007 -2.396680
## 3  1.89201 -2.000300  6.197500 -8.483430  3.722360 -4.141880 -5.493690
## 4  2.09185  2.883410 -8.287690 -4.182200  7.438670  0.968637 -7.885660
## 5  3.60629  2.308410 -6.502360 -1.966790 -2.588830  3.208160  7.530030
## 6  2.21642  1.201100 -0.757425 -0.203642 -2.354220 -0.330617  0.172852
##       PC224     PC225     PC226    PC227     PC228     PC229     PC230
## 1  1.440030  0.375069  5.368990 -1.62975 -3.882370   2.55118  1.465770
## 2  1.158920 -1.649830 -3.525890 -1.56033 -0.939742   7.10125 -1.143480
## 3 -2.214400 -7.629850 -0.684586 -3.27709  4.970070  -3.72615  2.744160
## 4  1.612450 -1.170500 -4.701970 -2.02507  1.843920 -10.07330  0.651436
## 5 -2.161240 -2.060200  8.686100  5.51270 -7.836430  -3.78831 -0.415721
## 6 -0.940466 -1.301510  1.586080  1.95321 -0.276826   1.92731 -1.083310
##       PC231     PC232      PC233      PC234      PC235    PC236     PC237
## 1  0.838979 -1.215150 -7.4422700  3.0388900 -2.2598200  5.87503 -6.326390
## 2  2.612370 -2.504560 -4.6264300 -0.3901230 -0.0653116 -1.64849 -1.762290
## 3  5.116110 -1.214120 -0.0121335 -2.9612000  2.7680300 -6.01301  2.921010
## 4  2.639330 -2.288750 -2.8794700  0.0593082  3.0394700  5.45513 -2.164020
## 5 -2.224360  3.966060  1.4207800  0.0534178  7.3592100  5.60633 -1.402640
## 6 -1.261690 -0.750792 -3.0197900 -0.8436020 -0.3856480 -1.27118  0.216776
##       PC238     PC239     PC240     PC241     PC242     PC243     PC244
## 1 -2.972190 1.0442700 -0.253313  2.691210  1.332100  1.695790 -2.708640
## 2 -0.448896 1.0560500  4.273530 -1.223080  1.813660 -1.445580  0.299057
## 3 -2.602350 0.1641640  0.369107  1.247710 -2.398930  2.369080 -2.779110
## 4 -5.313580 2.2517800 -1.726960 -4.699740  0.200211  0.829226 -1.180550
## 5  1.251510 4.5528700 -1.537240 -4.584990 -0.205014 -1.850620  3.340830
## 6  2.936500 0.0154952 -1.353780  0.579329 -0.630658  1.092310 -3.475130
##       PC245     PC246     PC247     PC248     PC249     PC250     PC251
## 1  8.148420 -4.276080  6.894890 -3.070850 -0.685647 -5.314940 13.460900
## 2  2.541220  0.374377 -1.145630 -2.622020 -0.538264  0.246818  0.918522
## 3 -1.185390  0.927747  1.294350  0.220313 -1.187620  1.801910  1.660520
## 4  0.761870 -2.820680 -2.012240 -2.364380  3.156110  1.353790 -1.896550
## 5  0.379745  2.580200  2.436040  1.724550  2.283230 -2.067250  0.392675
## 6  1.345160  1.201260 -0.564827  1.243670  0.439854  1.456870 -2.140350
##        PC252     PC253     PC254      PC255      PC256       PC257     PC258
## 1  4.3786100 37.204600 -0.211792 -0.8296000  1.4709100 -13.7114000 10.404900
## 2 -0.2045570 -0.799595 -1.083990  0.0504868  0.3605240  -0.0786186 -1.371010
## 3 -0.0404249  3.268790 -1.071930  1.0412600 -0.8145450  -3.2581000  0.266288
## 4 -1.4496500 -0.126296 -1.365250  0.7934680  0.0579994   1.6465400  1.110700
## 5 -1.3385000 -1.459740  0.377419 -3.8941000  1.2156900   0.3805290  1.205490
## 6  0.9125970 -1.630110 -1.433200 -1.9005100  1.5761100   0.3080300 -0.951571
##       PC259     PC260      PC261     PC262      PC263     PC264       PC265
## 1  8.246700 10.169900 12.3661000 10.551500 -6.5520300 -8.451820 -4.58806000
## 2  0.156113  0.078879  1.2198500  2.337660 -1.1434200  1.553940 -1.84909000
## 3 -3.401480 -0.537942 -2.6529100  0.552492 -0.5463700 -1.159910  0.68678400
## 4  0.848868  1.616660 -0.0403552  0.984601 -1.4430300 -1.720050  4.14152000
## 5  0.723288 -1.525150  0.1171330 -1.025430  0.0814161  1.786080 -0.00378826
## 6  1.005760 -1.625370  0.3543250 -0.717980 -0.2991210  0.335407 -0.70056100
##       PC266       PC267       PC268     PC269      PC270     PC271      PC272
## 1  5.240320  0.84292000 -3.42696000  1.375530  0.2347510 -2.084360  0.8850150
## 2 -2.147090  0.00202997  0.21067500  1.608430 -0.9286760 -0.593916  0.6440190
## 3 -0.852395 -2.70650000 -2.68938000  1.477680 -0.1594480 -3.127140  0.2016290
## 4 -1.550920  2.73145000 -1.88184000  1.107690  0.0595403 -2.294110 -1.3184000
## 5  1.408520 -1.59335000 -0.00707087  0.581427 -0.0469112 -0.383952  0.0653132
## 6  0.791272  0.98820300 -1.80654000 -0.446946 -1.6573100  2.303220  1.4251300
##       PC273     PC274     PC275      PC276     PC277     PC278     PC279
## 1 -7.066590 -3.831980 -2.041340  1.9109100  0.235371 -1.683040  0.593002
## 2 -1.887220  0.734744  0.478119  2.5843000 -0.726244  0.561962 -1.562550
## 3  0.972553 -0.362317  0.981879  1.6486200  2.052890 -0.744881 -0.248530
## 4  2.048610 -1.325460 -0.751355 -0.0172412 -0.121998 -0.577015  0.910234
## 5  1.610640 -3.079070  0.443442 -0.8247770  0.678580 -0.104648 -1.296460
## 6 -3.027620  0.441175 -5.661570 -1.9336500  0.394137 -1.752080 -8.592760
##       PC280    PC281    PC282     PC283     PC284       PC285     PC286
## 1  3.294350 4.928870  0.22167 -1.839910  2.852200 -1.03176000  3.605580
## 2 -0.281637 0.499033  0.72375  0.540829  0.721245 -0.00990029  0.104565
## 3  0.812689 0.743964 -1.19117 -0.213697 -0.483257 -0.06532060 -0.756868
## 4  0.643373 1.707770 -1.24763  1.911780  0.774342  0.12405300  0.150799
## 5  1.410500 0.355993  0.48977 -0.415595 -1.320460 -0.58338300  0.178719
## 6 -3.012900 5.596500  4.84887 -6.007840  2.409990  2.69619000  9.282960
##       PC287     PC288     PC289     PC290       PC291      PC292     PC293
## 1  0.750799  2.481190 -3.905700 -0.936461   0.5486000  2.6105100 -0.303720
## 2  0.214913 -1.469200 -0.289237  0.530904   0.9807980 -1.8608200 -0.260329
## 3  1.258940 -0.459047  0.236495  0.219122   0.0451856 -0.2709470 -0.144081
## 4 -0.602306  2.346140  0.246626 -0.666626  -1.0121300 -0.0633217 -0.455679
## 5  0.396745  0.575871 -0.809585 -0.373673   0.1655020 -0.5395460  0.786351
## 6  3.104320 -2.620390 -8.282950 32.762800 -26.0616000  5.9327800  5.437010
##       PC294     PC295      PC296     PC297      PC298      PC299     PC300
## 1 -0.798181  0.478913 -1.2460100  0.796365  0.0845726 -1.4198000  0.172801
## 2 -0.493476  0.900055 -0.1344480  0.397938  1.0070100  0.9602260 -0.230228
## 3 -0.829300 -0.121166  1.1496600 -0.958856  0.8517270 -0.5905860 -0.110834
## 4 -0.840823 -0.674758 -0.0459401  0.461341  0.4481720 -0.0601338  0.250727
## 5  0.448526 -0.321390 -0.3594780 -0.674036 -0.0334604  0.7397040  0.590811
## 6 -5.968770  2.589400 -1.6682400  3.948190 -3.2865500  0.6164350  1.712460
##         PC301     PC302     PC303     PC304      PC305     PC306     PC307
## 1 -0.02857240  0.784914 -0.111594  0.299842  0.1537890  0.257618  0.581506
## 2  0.00664791 -0.299704 -0.365714  0.161070  0.0510703  0.662804  1.036940
## 3  0.48116000  0.622439 -0.207147  0.375918  0.3409570 -0.491765  0.212827
## 4 -0.51127000  0.220587  0.334186 -0.752543 -0.0686399 -0.247551  0.710792
## 5 -0.08005430  0.672211  0.203599  0.177536  0.6002280 -0.438392 -0.123792
## 6 -1.55321000 -2.020450 -1.418340 -0.912601  0.8920440  0.499495  0.719343
##       PC308     PC309     PC310     PC311     PC312     PC313       PC314
## 1 -0.912519  0.131409  0.297420  0.590261 -1.487690 -0.358880  0.11508000
## 2 -0.191389 -0.217103  0.644524  0.341326 -0.309193 -0.240516  0.12306700
## 3  0.656745  0.814825 -0.873942  0.326364 -0.447306  0.226318  0.10180300
## 4 -0.274965  0.646337 -0.385686  0.343867  0.224897  0.442280 -1.00193000
## 5  0.165050 -1.296820  0.112506 -0.108202  0.387461 -0.358981 -0.00209422
## 6 -1.782870  1.209840 -2.716690 -0.935250  0.423990  0.991128 -1.04000000
##        PC315      PC316     PC317      PC318      PC319      PC320     PC321
## 1 -0.1361810  0.3707860  0.516543 -0.2106360  0.0629784 -0.2354070 0.0745866
## 2  0.2838960  0.0905737  0.497427  0.0790915 -0.1036950  0.0608285 0.0557892
## 3  0.2841800  0.4468840  0.175940 -0.2585500 -0.4107850  0.0991483 0.1526120
## 4  0.4180580 -0.3414670  0.120261  0.0687079 -0.3069000  0.1089410 0.2863580
## 5  0.1452500 -0.5249250  0.201887  0.2170580  0.4764040 -0.0473087 0.2351270
## 6  0.0775644  0.1263700 -0.248673 -0.3149170  0.7343540  0.4174750 0.6310480
##         PC322      PC323      PC324      PC325      PC326       PC327
## 1 -0.26463200 -0.0522274  0.0492131 -0.4739130 -0.1011950 -0.02837090
## 2 -0.14297300  0.0105028  0.2550780  0.2915880 -0.1465930 -0.06746470
## 3 -0.27335000  0.1458950  0.6288990  0.1844770  0.0152865  0.10312400
## 4 -0.00865434  0.4698300  0.3435860 -0.1014130  0.1906390  0.00470319
## 5  0.19431100  0.0761079  0.0427247  0.0456039  0.3292730  0.01840060
## 6 -0.04306320 -0.1040430 -0.3736180  0.2089580 -0.2925900 -0.38677800
##         PC328      PC329        PC330 Individual  Pop_City Location Latitude
## 1 -0.06080890 -0.2694990 -1.75322e-06        255 Bengaluru    India  12.9716
## 2  0.09760780 -0.2002560 -1.75322e-06        261 Bengaluru    India  12.9716
## 3 -0.00548772 -0.0353852 -1.75322e-06        256 Bengaluru    India  12.9716
## 4  0.02166250  0.0259966 -1.75322e-06        258 Bengaluru    India  12.9716
## 5 -0.20685700 -0.0353230 -1.75322e-06        265 Bengaluru    India  12.9716
## 6 -0.00352486  0.0335714 -1.75322e-06        262 Bengaluru    India  12.9716
##   Longitude Continent    Year     Region Subregion order order2 orderold
## 1   77.5946      Asia Unknown South Asia              60     52       52
## 2   77.5946      Asia Unknown South Asia              60     52       52
## 3   77.5946      Asia Unknown South Asia              60     52       52
## 4   77.5946      Asia Unknown South Asia              60     52       52
## 5   77.5946      Asia Unknown South Asia              60     52       52
## 6   77.5946      Asia Unknown South Asia              60     52       52

6.1.2 Create PCA plots for Italy + Native

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1","PCA_lea_native_italy_all_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

PC1 and PC3

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "PCA_lea_native_italy_all_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)

Repeat, but separate out 1990s Italian pops into own region

sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "euro_global","lea", "MAF_1", "sampling_loc_italy_native_temporal.rds"
    ))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_native_temporal.rds"))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_native_temporal.rds"))
head(sampling_loc)
##    Pop_City  Location Latitude Longitude Continent Abbreviation Year
## 1   Brescia   Brescia 45.53373 10.204450    Europe          BRE 1995
## 2    Cesena    Cesena 44.15287 12.244265    Europe          CES 1995
## 3 Desenzano Desenzano 45.46289 10.549140    Europe          DES 1995
## 4   Bologna   Bologna 44.48478 11.366584    Europe          ITB 2017
## 5   Imperia   Imperia 43.87159  8.003559    Europe          IMP 2017
## 6    Puglia    Puglia 41.12213 16.844107    Europe          ITP 2016
##           Region   Subregion order order2 orderold
## 1   Italy (1995) West Europe    20     12       12
## 2   Italy (1995) West Europe    24     16       16
## 3   Italy (1995) West Europe    21     13       13
## 4 Italy (modern) West Europe    23     15       15
## 5 Italy (modern) West Europe    18     10       10
## 6 Italy (modern) West Europe    28     20       20

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 38 Levels: BEN BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ITB ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 38

Check the regions

unique(sampling_loc$Region)
## [1] "Italy (1995)"   "Italy (modern)" "East Asia"      "South Asia"    
## [5] "Southeast Asia"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population      PC1      PC2      PC3      PC4       PC5         PC6     PC7
## 1        BEN -36.4286 -8.65616  9.67835 1.706060  0.242176  0.00620311 3.35510
## 2        BEN -35.6578 -7.99592  9.14433 3.316050 -2.987790 -0.63408700 4.14891
## 3        BEN -37.2921 -8.22387  8.54982 1.835040 -1.936330 -0.42146200 3.54288
## 4        BEN -35.6589 -8.20466  9.41346 2.407870 -1.585510  0.07330430 3.14577
## 5        BEN -36.3292 -8.46492 10.47310 0.864815  0.747834 -1.23031000 3.35605
## 6        BEN -35.8840 -8.60298 10.39800 1.301580 -2.034310 -0.08303760 2.72750
##        PC8      PC9     PC10     PC11     PC12     PC13    PC14       PC15
## 1 -3.01092 -8.93709 1.257590 -4.37216  9.57095 1.807150 3.99259 -2.8512300
## 2 -4.05844 -6.56452 3.101820 -6.56300  9.34943 1.495770 1.91960 -0.0789511
## 3 -2.41577 -8.58320 1.848370 -6.13380 10.95160 2.024820 3.09653 -0.2232520
## 4 -2.41373 -7.71873 3.074200 -3.69073  9.34427 2.835000 2.65460 -1.6481300
## 5 -3.19459 -7.71933 3.112940 -3.06045 11.42760 0.341763 4.35146 -1.2772800
## 6 -3.45130 -9.82680 0.593669 -5.16226 10.53380 1.694970 3.81288 -2.9277000
##         PC16      PC17      PC18       PC19      PC20      PC21      PC22
## 1  1.4269200  0.444874 -2.605780 -0.1387210 -0.548535  0.446162  0.679672
## 2  2.6657600 -0.459167 -0.675325  1.7449300 -0.219636 -0.583923  0.478380
## 3 -1.0003300  1.323620 -0.496820  0.4805230 -1.521700 -0.898755  1.032990
## 4 -0.0257458 -0.540294 -1.702020 -0.2572390 -1.113980 -0.932667 -1.856180
## 5  2.2095900  0.186720  0.709656 -1.4002400 -2.768490  2.423650  0.924281
## 6  1.5628700  0.690478 -1.002700  0.0920083 -3.523690 -3.039580  5.411600
##       PC23      PC24     PC25     PC26       PC27     PC28    PC29     PC30
## 1  1.20414 -3.448410  8.67336 10.20080  3.5956500 -2.87586 1.13300 5.674920
## 2  1.63656 -1.828880  6.22152  6.88458  0.9441300 -5.16572 3.27890 4.441010
## 3 -1.27430 -2.677440  4.36255  5.60435  1.0782800 -7.61513 1.98766 1.732170
## 4 -1.35891 -2.573890  4.74408  7.11259 -0.0799014 -2.26633 2.12922 2.664020
## 5  0.92488  0.106894  5.41177  7.92296  1.1685500 -1.75458 2.26918 0.815352
## 6 -3.99058 -6.468870 10.78720 11.60920  1.0326100 -8.80816 6.29450 6.784490
##          PC31      PC32      PC33      PC34     PC35     PC36     PC37
## 1 -1.35344000  0.507352  -8.41585  0.829387 12.26600 11.85820  4.74150
## 2 -0.00149523  0.226129  -9.49899  1.133870  7.95940  2.20301  1.24861
## 3 -3.30131000 -0.940338  -5.27423  0.421175  4.99281  2.88495  5.57283
## 4 -0.17599900  1.379430  -5.63710  2.246720  3.13143  6.34839  4.92737
## 5 -0.13300000  0.541537  -2.29314  0.668216  6.68419  5.72174  6.01297
## 6 -5.08747000 -0.171277 -23.00240 -3.675780 16.02970 12.80410 16.76260
##        PC38     PC39      PC40     PC41     PC42        PC43      PC44
## 1 -9.306760  2.76689  -5.93442 -6.06762  6.25710 -3.59349000 -5.234970
## 2  0.942282  1.15206  -1.70704 -3.64219  4.40032 -4.88358000 -0.983047
## 3 -2.730500  1.48026  -2.75150 -3.23859  7.02737 -0.00871823  1.792620
## 4 -2.748640  4.86364  -3.10931 -3.63340  4.08623 -0.34372400  1.801030
## 5 -2.573450 -2.40112  -1.12456 -5.43754  4.60885 -2.15457000 -0.110867
## 6 -1.843590  3.79675 -16.48130 -4.03469 18.38850  3.29094000 12.381400
##        PC45      PC46     PC47      PC48     PC49     PC50      PC51     PC52
## 1  4.988650 -4.982460 -1.85080 -1.710760  3.93953 -1.71044 -0.485108 -5.07246
## 2  3.155360  1.187990  1.96649 -2.717490  3.74892 -3.07740  3.763990  1.96752
## 3  4.232960 -0.399819  2.01636 -3.436120 -2.23064  4.40633  3.653800 -1.67868
## 4  4.671640  1.507510  2.25192 -1.914680  1.59632  3.10852  1.710710 -2.80248
## 5  0.368042 -3.879630  1.59489  0.537028  3.50036 -1.82312  3.326420 -1.81966
## 6 14.391200  9.710800  1.61274 -9.103410  2.49952 -5.82170  3.909670 -6.41516
##        PC53      PC54       PC55     PC56      PC57       PC58     PC59
## 1 -3.498460 -0.591602 -3.4994500 -4.82154 -1.810640 -17.726900 -9.26768
## 2 -0.433642 -1.930470 -0.6457280 -2.88981  0.837444  -4.499700 -2.36933
## 3  1.006650  1.890690 -2.8024000  1.05884  1.743710  -3.537010 -3.22576
## 4 -1.712400  0.347678 -2.8923900 -1.44836  9.173920  -3.389600 -2.25018
## 5  1.983130 -1.238630  0.0951803 -2.85376 -0.779215  -0.808632  2.27708
## 6 -5.677470  5.034360  5.4366300  3.93041  3.065970  18.928700  7.49059
##        PC60     PC61      PC62      PC63      PC64     PC65      PC66     PC67
## 1 -20.61790 12.84320  14.67310 -3.457630 -10.14490 -1.50620  0.232364  4.12503
## 2  -7.87076  5.24957   7.11435 -2.593260  -3.41792  1.14602  1.396230 -2.10571
## 3  -5.54479  3.61423   7.51875  4.003790  -1.75662  3.00994  5.822280 -4.10763
## 4  -8.13063 -0.47465   5.59817 -1.154140  -7.53385  1.28329  3.698950 -3.89698
## 5  -6.65000  7.30773  11.04180 -1.247250  -1.33184 -3.02103 -1.397850 -3.99500
## 6  35.27880 -8.97710 -21.74100 -0.392993  16.62620 -6.04496 -1.847560  4.15223
##        PC68      PC69      PC70      PC71      PC72      PC73     PC74
## 1  1.536300 -13.48370  3.477350  3.720790 -5.916960 -1.536060 21.41410
## 2  0.410133   4.71199 -3.481590  4.972220 -1.976500  1.742720 -4.75006
## 3  4.377580   1.44084  0.462429 -4.110600  3.893530  0.941228 -7.14890
## 4 -3.201360  -2.81169  1.883300 -4.296660  8.613790 -4.352930 -8.34767
## 5 -8.322840   3.09596 -4.858230  0.499918 -6.630700 -6.729030 -8.89989
## 6  5.775870   8.48122  3.804050  0.434860 -0.890947  3.822750  5.00241
##        PC75      PC76     PC77      PC78      PC79      PC80     PC81      PC82
## 1  4.816550 13.458700 13.34870 12.188700 -4.819460 -11.57360  6.17078 13.476400
## 2  1.530660 -5.740000 -1.32580 -0.241753 -0.534579   1.42928 -6.28200 -4.914640
## 3 -0.446741  0.993584 -2.48911 -7.698270  5.382240   4.70027 -1.42030  0.187968
## 4  1.683970 -2.658500 -4.73852 -7.112860  2.012310   7.32083 -1.56853 -7.722160
## 5  0.992625 -3.933490 -2.05930 -0.953750 -1.942150  -4.07214  0.95736 -2.942890
## 6  0.370085 -2.142860  7.26191  1.907220 -6.636400  -1.64103  5.30904  5.049200
##       PC83      PC84      PC85     PC86     PC87      PC88      PC89      PC90
## 1  7.76193 -7.810620  17.38790 -4.99597  7.74147 -1.481580  0.176349 -0.501481
## 2 -3.09758 -2.063730  -2.48145 -3.63315  1.76304 -7.050320 -1.247400  9.195970
## 3 -2.73932  0.486628  -8.22308  0.24716 -5.48897  0.330582  1.724920 -6.792810
## 4  1.13173 -5.309550  -5.73748 -4.96979 -6.28995  1.659020 -9.198980 -5.984870
## 5 -3.89876 -0.117790 -13.50760  4.70942 -4.93501 -2.630050  4.784760  5.307010
## 6  3.58915 -2.854030   3.82292  5.45977 -4.10345  4.272530  1.657460 -3.883730
##         PC91      PC92       PC93      PC94       PC95     PC96     PC97
## 1   4.202590  4.965650 -7.4087200 -6.208640   1.590710  2.09937 -5.15330
## 2 -14.704200 -5.108240  7.1066500  1.663840   1.899690  1.74474  2.87462
## 3   4.442330 -0.533804  3.6846500 -0.154139 -12.909800  7.37524 -7.33184
## 4   0.503548  2.005400  6.5935300  4.694260  -2.200410 -5.88301 -1.10462
## 5 -11.029100  3.502840 11.1816000  4.877440  -0.128761  2.21194 15.18890
## 6  -0.361817 -3.177760  0.0502092 -0.616075  -0.968541  1.85617 -5.25075
##       PC98      PC99    PC100     PC101     PC102     PC103     PC104
## 1 -2.76715  4.668910 -6.09971 -2.169200  6.270300  3.977710 -6.981530
## 2 -3.58197 -7.732730  8.38126 -0.937371 -4.556590 -3.814630 -1.967820
## 3  3.53662 -6.219700  4.27379  4.082890  1.894790  0.424252 -4.777210
## 4  3.08432  0.489263  2.99202  1.501650  0.424027  6.592460  8.391140
## 5  4.53260  9.150640 -3.82560 -6.886630  1.589150 -0.669231  3.295550
## 6  0.41695  2.985840  1.25128 -1.136980  1.542230 -0.606253  0.763501
##         PC105     PC106     PC107      PC108     PC109     PC110     PC111
## 1 -2.15452000  4.523020   3.80938   1.531350  0.702757  4.135630 -0.273608
## 2  6.43562000 -2.728800   5.92438  -2.866840 -5.487690 -3.161320 -2.511560
## 3  5.57791000 -0.992583  -2.43924 -11.109900  4.475740  6.632170 -6.241980
## 4  3.08297000  4.307370   4.92973  -3.010530 -7.851450  4.580160  5.154650
## 5  0.00101321  2.321840 -11.48910   6.417860  5.643610 -0.598633 -5.881490
## 6 -1.09509000 -0.248610   1.51874   0.343609  1.231330 -2.490520  1.721730
##      PC112    PC113      PC114     PC115     PC116     PC117      PC118
## 1  1.94491 -2.29686  -0.308073 -0.999831  4.083370 -0.293493   0.883767
## 2 -3.36833 -6.05028   6.710480  6.181630  3.554460 -5.974590   5.684740
## 3 -4.87052 -3.96529  -5.388560  6.287600 -1.984700  4.083740  -9.429340
## 4 -1.91764 -2.23213 -11.369400 -6.917750 -8.183640  2.695530   7.313880
## 5  6.28290 -2.58825  -6.088260 -0.679319  6.029180  4.263300 -12.417700
## 6 -2.00787 -1.40786   2.166730 -1.347610 -0.300582 -1.301520   1.308540
##        PC119      PC120      PC121    PC122    PC123     PC124     PC125
## 1   4.221450  -1.564040   7.911250 -2.61741 -3.79594   7.53077  1.673740
## 2 -15.715200   1.550870  -6.573890  1.83892 10.59710 -11.08400 -3.380720
## 3 -12.139900   0.353241 -14.197300  1.51723 -8.93805  -9.65874  7.466230
## 4  -9.708120   7.559850  -6.511010  1.86800  1.61614   3.80643 -6.509970
## 5   1.590080 -14.323000   6.601470 -6.67113 12.43840  -1.01275 -1.038080
## 6  -0.409233  -3.669070  -0.784169 -1.82787  2.42521   1.32555 -0.236803
##       PC126    PC127     PC128       PC129    PC130     PC131    PC132
## 1 -0.791141 -3.44300 -3.594480   1.8163600 -2.23310  -3.75804 -3.43904
## 2 -3.964910 -3.31432  4.945450  -8.2090100 -6.61420   0.31984 -3.09043
## 3  9.292280  7.25150  6.932310   2.8002200 -3.07084  -6.12911 17.56150
## 4 -5.322430  3.51687  5.632890 -14.4926000  3.51536 -10.35130 -3.93269
## 5 -1.520330 20.49690  6.809590  -0.0800215 11.47930  -4.28857  1.40240
## 6  3.855290 -3.52590  0.411502   1.3495800 -1.18242  -1.39559 -1.16085
##        PC133     PC134       PC135     PC136     PC137      PC138    PC139
## 1  1.2963300 -4.393090   0.8236190 -2.097590 -2.814670 -1.8934500 -0.74115
## 2  8.7048000  9.048130  10.3210000 -6.284710  6.913350  4.0879700  2.96463
## 3 -6.0236500 -3.814580 -10.3348000  4.016090  1.714060  1.6771600  5.07434
## 4  2.8089000 -0.546552  -0.5071200  8.565720  5.536310 -0.8850900 -3.03628
## 5  5.9091100 -2.025550   4.4254400  9.944990 -5.560400  0.0514701  5.69492
## 6  0.0403684 -1.547740  -0.0126975 -0.575174 -0.313609  1.3002400  1.27595
##       PC140     PC141     PC142     PC143     PC144      PC145     PC146
## 1  0.297538  2.547830  -3.55896 -1.595990   1.91919  -0.110257 -0.402857
## 2  0.729823 -4.909300   4.44496 -1.743860  -5.16318   7.591420 14.467200
## 3 -1.128200 -8.056110 -11.82470 -2.224600   2.26824 -21.952000  7.497620
## 4  0.759772  0.230179  11.58190 -2.991780 -19.28690   8.705040  5.417540
## 5 -4.363940 16.202100   8.03622 -0.708072   4.29022   2.965250 -2.379100
## 6 -0.706910 -1.405860  -1.75542 -1.811070  -2.69975   2.217180 -0.493198
##        PC147      PC148    PC149     PC150    PC151      PC152     PC153
## 1  -4.608490   3.688120 -0.43062  4.909980 -3.08522   0.601014   1.46085
## 2   0.877875  -6.242550  8.06753 -1.874390  2.12882   1.423480 -18.59620
## 3  -1.860260  -5.346860 -2.48336  1.330810  2.90265 -11.719100  -5.26154
## 4 -17.799200 -16.861000  3.81050 -0.053360 -4.62369  10.370900  -6.28892
## 5   8.841900   3.743100  3.11020  4.248280  6.69694   6.445800  -2.15495
## 6  -1.695370   0.791003 -1.15501 -0.141668  2.48447  -0.237788  -1.29166
##        PC154     PC155     PC156     PC157     PC158     PC159     PC160
## 1   5.722670 -0.423216  4.201310  0.244095 -0.805949 -3.866640  5.424690
## 2  -0.625435  6.389990 14.936300  9.715080 13.279200 -8.555310  5.422110
## 3  -1.594170 10.364500 -1.123510  3.663520 11.720100  7.211250 -0.407956
## 4   2.757070 -8.743340 -6.234360 -4.000290 -2.407580 -1.000630 -4.656120
## 5 -25.056900  7.993380 -4.332630 -3.959700 -8.405300  0.794097 -7.704390
## 6   1.154230  0.163830  0.471986 -1.995310 -2.014020 -0.260112 -1.599450
##        PC161      PC162   PC163     PC164     PC165     PC166     PC167
## 1  1.6498700  -1.359840 3.79428 -3.530000 -0.853487 -0.310305 -2.290050
## 2 -8.8495800 -11.654300 9.08494 -2.520500 -0.130562  5.301890 -0.270629
## 3  4.0321500  11.996100 7.98986 12.319600  2.272150 -9.494760  6.472930
## 4  0.0514306   2.547430 5.02832  4.478120 -6.070790 -2.540390 10.593900
## 5 -1.0143100 -15.175500 3.70779  0.459311  1.854200  5.176410  9.544460
## 6 -1.7903600   0.202701 1.25694 -2.376680 -2.890320 -1.451030  1.468570
##       PC168     PC169     PC170     PC171     PC172     PC173     PC174
## 1  0.465481  2.676640 -1.601130   2.96868  -2.62084  -2.58270 -0.100016
## 2 -7.369730  1.328990 -5.188110 -15.25570  -5.76728 -16.53320 12.060300
## 3 15.124000 -9.525180 -1.132470  -5.85826  -2.58493   2.55936  9.871930
## 4 -2.576120  3.037520 21.085400  15.68240 -10.61770  13.02520 -9.889990
## 5  9.896820  4.577390  1.009540  10.29290   7.38586  -6.12128  7.591270
## 6  2.176990  0.565257  0.912449   2.95360   2.82096  -4.52211 -0.596217
##       PC175    PC176     PC177        PC178    PC179    PC180       PC181
## 1  2.871010 -1.21489  0.508154   2.07517000  1.89598 -3.65197  -3.6299200
## 2 -0.476276 -1.65314 -4.676340   2.23210000 -7.27866  1.14088 -26.2318000
## 3  1.669020  4.47738  0.889116  -0.45733200 17.97750  1.53834  -6.5801600
## 4 -4.072420 -4.76307 -5.215450   7.32140000  2.34473 -5.22767   6.1979100
## 5  2.659320  4.42937  3.850640 -10.24460000  4.89775  9.28590   0.0956869
## 6 -0.773518 -1.25914  0.271291   0.00525602 -2.64764 -2.14407  -0.3952680
##       PC182      PC183      PC184       PC185    PC186     PC187      PC188
## 1   1.24389  -3.302960   2.876120  -0.0440699 -3.30869 -1.603810  -0.268286
## 2 -18.35310   5.345250 -14.691200  -5.6640200 21.39570  6.865030 -14.165900
## 3   7.09082 -11.004800  13.481800 -20.5337000  2.79004 -4.536070   7.636240
## 4   9.04403   1.040480  13.061500  -4.2756500  2.94655 -1.229350   1.210330
## 5  -1.91249  -5.819030  -5.315890   1.6723300  4.47765 -8.252340   4.571220
## 6   1.94834   0.292433   0.871957  -0.8564560 -3.19218  0.221138   1.095620
##       PC189     PC190     PC191     PC192     PC193      PC194      PC195
## 1  1.728610  0.634863  1.204040  -1.31276  -1.80258   1.567020   2.792490
## 2  0.945586 -1.555840 12.854200  -2.89874  -1.68527   0.112624 -13.116200
## 3  2.300320 -8.008130  6.444110  -8.98300 -13.88470 -12.383800  -0.113585
## 4 12.428800 -4.788980 -4.447500  15.73080  -5.70527  -5.846530   9.306030
## 5 -5.985770  1.879450  1.476470 -11.62790  -6.21661  -3.229850   7.531100
## 6  0.714980  0.681986  0.153076  -1.70962  -1.54565   1.120300  -1.223970
##        PC196      PC197     PC198     PC199     PC200    PC201     PC202
## 1  -0.864158  -3.024560  3.592490  4.877320   1.03075  1.94791 -3.162600
## 2   7.083080 -10.919300 -0.240066  6.057200   9.91704  6.60590 -7.991950
## 3 -10.615000  -4.579790  0.957756 15.552400   1.77143 -4.19507  1.525850
## 4  23.423100  16.718100 -2.831820 -3.034520   6.46233  8.18282 -6.906330
## 5   1.602640  18.738600  4.732080 16.892400 -17.94380  7.59291  0.570049
## 6  -0.846690   0.846655 -1.244570 -0.327022   2.74285 -1.14568 -0.485728
##      PC203      PC204    PC205     PC206      PC207     PC208     PC209
## 1  1.43958  -1.518410  1.95459  5.656330  -3.165260   2.09702  0.102735
## 2  3.68832   2.636990 -3.75071 -6.810350  13.593500  -7.44244  5.649990
## 3 -6.89536  12.060000 -8.39451 -7.701610 -10.363600   2.11047 -9.061770
## 4  2.90130 -13.414600  9.13907 -0.593586  -3.380060 -17.08950 -6.544610
## 5 -5.30091   8.201080  4.50963 21.209000   3.200770  -4.36198 10.915200
## 6 -4.64714  -0.358261  1.51279  2.381280  -0.843715   1.27899  0.294638
##       PC210     PC211     PC212     PC213    PC214      PC215     PC216
## 1 -3.793070  3.757620 -1.913980  0.106880  4.64054   2.641460  1.057750
## 2  7.903730 -2.631820  0.925983  5.117520 -5.37771   2.993490 -4.576110
## 3  0.889264  0.122679  6.341800 -7.023340  5.47278 -15.066000  4.825350
## 4  1.718790  1.535190 -3.465960 -0.388398 -7.30977   5.921650 -0.304196
## 5 -2.290030  9.417030  3.900610  5.752560 -1.82579   0.352211 -3.288130
## 6 -0.507667  0.493733 -0.578154  3.186230 -2.51075   3.437370  0.422021
##      PC217     PC218     PC219     PC220     PC221     PC222     PC223
## 1 -4.23964  5.261560  0.346457  1.696060  6.609610 -0.918140 -0.652525
## 2 -1.25051 -0.712649 -9.361090  1.373260  0.941486 -0.679007 -2.396680
## 3  1.89201 -2.000300  6.197500 -8.483430  3.722360 -4.141880 -5.493690
## 4  2.09185  2.883410 -8.287690 -4.182200  7.438670  0.968637 -7.885660
## 5  3.60629  2.308410 -6.502360 -1.966790 -2.588830  3.208160  7.530030
## 6  2.21642  1.201100 -0.757425 -0.203642 -2.354220 -0.330617  0.172852
##       PC224     PC225     PC226    PC227     PC228     PC229     PC230
## 1  1.440030  0.375069  5.368990 -1.62975 -3.882370   2.55118  1.465770
## 2  1.158920 -1.649830 -3.525890 -1.56033 -0.939742   7.10125 -1.143480
## 3 -2.214400 -7.629850 -0.684586 -3.27709  4.970070  -3.72615  2.744160
## 4  1.612450 -1.170500 -4.701970 -2.02507  1.843920 -10.07330  0.651436
## 5 -2.161240 -2.060200  8.686100  5.51270 -7.836430  -3.78831 -0.415721
## 6 -0.940466 -1.301510  1.586080  1.95321 -0.276826   1.92731 -1.083310
##       PC231     PC232      PC233      PC234      PC235    PC236     PC237
## 1  0.838979 -1.215150 -7.4422700  3.0388900 -2.2598200  5.87503 -6.326390
## 2  2.612370 -2.504560 -4.6264300 -0.3901230 -0.0653116 -1.64849 -1.762290
## 3  5.116110 -1.214120 -0.0121335 -2.9612000  2.7680300 -6.01301  2.921010
## 4  2.639330 -2.288750 -2.8794700  0.0593082  3.0394700  5.45513 -2.164020
## 5 -2.224360  3.966060  1.4207800  0.0534178  7.3592100  5.60633 -1.402640
## 6 -1.261690 -0.750792 -3.0197900 -0.8436020 -0.3856480 -1.27118  0.216776
##       PC238     PC239     PC240     PC241     PC242     PC243     PC244
## 1 -2.972190 1.0442700 -0.253313  2.691210  1.332100  1.695790 -2.708640
## 2 -0.448896 1.0560500  4.273530 -1.223080  1.813660 -1.445580  0.299057
## 3 -2.602350 0.1641640  0.369107  1.247710 -2.398930  2.369080 -2.779110
## 4 -5.313580 2.2517800 -1.726960 -4.699740  0.200211  0.829226 -1.180550
## 5  1.251510 4.5528700 -1.537240 -4.584990 -0.205014 -1.850620  3.340830
## 6  2.936500 0.0154952 -1.353780  0.579329 -0.630658  1.092310 -3.475130
##       PC245     PC246     PC247     PC248     PC249     PC250     PC251
## 1  8.148420 -4.276080  6.894890 -3.070850 -0.685647 -5.314940 13.460900
## 2  2.541220  0.374377 -1.145630 -2.622020 -0.538264  0.246818  0.918522
## 3 -1.185390  0.927747  1.294350  0.220313 -1.187620  1.801910  1.660520
## 4  0.761870 -2.820680 -2.012240 -2.364380  3.156110  1.353790 -1.896550
## 5  0.379745  2.580200  2.436040  1.724550  2.283230 -2.067250  0.392675
## 6  1.345160  1.201260 -0.564827  1.243670  0.439854  1.456870 -2.140350
##        PC252     PC253     PC254      PC255      PC256       PC257     PC258
## 1  4.3786100 37.204600 -0.211792 -0.8296000  1.4709100 -13.7114000 10.404900
## 2 -0.2045570 -0.799595 -1.083990  0.0504868  0.3605240  -0.0786186 -1.371010
## 3 -0.0404249  3.268790 -1.071930  1.0412600 -0.8145450  -3.2581000  0.266288
## 4 -1.4496500 -0.126296 -1.365250  0.7934680  0.0579994   1.6465400  1.110700
## 5 -1.3385000 -1.459740  0.377419 -3.8941000  1.2156900   0.3805290  1.205490
## 6  0.9125970 -1.630110 -1.433200 -1.9005100  1.5761100   0.3080300 -0.951571
##       PC259     PC260      PC261     PC262      PC263     PC264       PC265
## 1  8.246700 10.169900 12.3661000 10.551500 -6.5520300 -8.451820 -4.58806000
## 2  0.156113  0.078879  1.2198500  2.337660 -1.1434200  1.553940 -1.84909000
## 3 -3.401480 -0.537942 -2.6529100  0.552492 -0.5463700 -1.159910  0.68678400
## 4  0.848868  1.616660 -0.0403552  0.984601 -1.4430300 -1.720050  4.14152000
## 5  0.723288 -1.525150  0.1171330 -1.025430  0.0814161  1.786080 -0.00378826
## 6  1.005760 -1.625370  0.3543250 -0.717980 -0.2991210  0.335407 -0.70056100
##       PC266       PC267       PC268     PC269      PC270     PC271      PC272
## 1  5.240320  0.84292000 -3.42696000  1.375530  0.2347510 -2.084360  0.8850150
## 2 -2.147090  0.00202997  0.21067500  1.608430 -0.9286760 -0.593916  0.6440190
## 3 -0.852395 -2.70650000 -2.68938000  1.477680 -0.1594480 -3.127140  0.2016290
## 4 -1.550920  2.73145000 -1.88184000  1.107690  0.0595403 -2.294110 -1.3184000
## 5  1.408520 -1.59335000 -0.00707087  0.581427 -0.0469112 -0.383952  0.0653132
## 6  0.791272  0.98820300 -1.80654000 -0.446946 -1.6573100  2.303220  1.4251300
##       PC273     PC274     PC275      PC276     PC277     PC278     PC279
## 1 -7.066590 -3.831980 -2.041340  1.9109100  0.235371 -1.683040  0.593002
## 2 -1.887220  0.734744  0.478119  2.5843000 -0.726244  0.561962 -1.562550
## 3  0.972553 -0.362317  0.981879  1.6486200  2.052890 -0.744881 -0.248530
## 4  2.048610 -1.325460 -0.751355 -0.0172412 -0.121998 -0.577015  0.910234
## 5  1.610640 -3.079070  0.443442 -0.8247770  0.678580 -0.104648 -1.296460
## 6 -3.027620  0.441175 -5.661570 -1.9336500  0.394137 -1.752080 -8.592760
##       PC280    PC281    PC282     PC283     PC284       PC285     PC286
## 1  3.294350 4.928870  0.22167 -1.839910  2.852200 -1.03176000  3.605580
## 2 -0.281637 0.499033  0.72375  0.540829  0.721245 -0.00990029  0.104565
## 3  0.812689 0.743964 -1.19117 -0.213697 -0.483257 -0.06532060 -0.756868
## 4  0.643373 1.707770 -1.24763  1.911780  0.774342  0.12405300  0.150799
## 5  1.410500 0.355993  0.48977 -0.415595 -1.320460 -0.58338300  0.178719
## 6 -3.012900 5.596500  4.84887 -6.007840  2.409990  2.69619000  9.282960
##       PC287     PC288     PC289     PC290       PC291      PC292     PC293
## 1  0.750799  2.481190 -3.905700 -0.936461   0.5486000  2.6105100 -0.303720
## 2  0.214913 -1.469200 -0.289237  0.530904   0.9807980 -1.8608200 -0.260329
## 3  1.258940 -0.459047  0.236495  0.219122   0.0451856 -0.2709470 -0.144081
## 4 -0.602306  2.346140  0.246626 -0.666626  -1.0121300 -0.0633217 -0.455679
## 5  0.396745  0.575871 -0.809585 -0.373673   0.1655020 -0.5395460  0.786351
## 6  3.104320 -2.620390 -8.282950 32.762800 -26.0616000  5.9327800  5.437010
##       PC294     PC295      PC296     PC297      PC298      PC299     PC300
## 1 -0.798181  0.478913 -1.2460100  0.796365  0.0845726 -1.4198000  0.172801
## 2 -0.493476  0.900055 -0.1344480  0.397938  1.0070100  0.9602260 -0.230228
## 3 -0.829300 -0.121166  1.1496600 -0.958856  0.8517270 -0.5905860 -0.110834
## 4 -0.840823 -0.674758 -0.0459401  0.461341  0.4481720 -0.0601338  0.250727
## 5  0.448526 -0.321390 -0.3594780 -0.674036 -0.0334604  0.7397040  0.590811
## 6 -5.968770  2.589400 -1.6682400  3.948190 -3.2865500  0.6164350  1.712460
##         PC301     PC302     PC303     PC304      PC305     PC306     PC307
## 1 -0.02857240  0.784914 -0.111594  0.299842  0.1537890  0.257618  0.581506
## 2  0.00664791 -0.299704 -0.365714  0.161070  0.0510703  0.662804  1.036940
## 3  0.48116000  0.622439 -0.207147  0.375918  0.3409570 -0.491765  0.212827
## 4 -0.51127000  0.220587  0.334186 -0.752543 -0.0686399 -0.247551  0.710792
## 5 -0.08005430  0.672211  0.203599  0.177536  0.6002280 -0.438392 -0.123792
## 6 -1.55321000 -2.020450 -1.418340 -0.912601  0.8920440  0.499495  0.719343
##       PC308     PC309     PC310     PC311     PC312     PC313       PC314
## 1 -0.912519  0.131409  0.297420  0.590261 -1.487690 -0.358880  0.11508000
## 2 -0.191389 -0.217103  0.644524  0.341326 -0.309193 -0.240516  0.12306700
## 3  0.656745  0.814825 -0.873942  0.326364 -0.447306  0.226318  0.10180300
## 4 -0.274965  0.646337 -0.385686  0.343867  0.224897  0.442280 -1.00193000
## 5  0.165050 -1.296820  0.112506 -0.108202  0.387461 -0.358981 -0.00209422
## 6 -1.782870  1.209840 -2.716690 -0.935250  0.423990  0.991128 -1.04000000
##        PC315      PC316     PC317      PC318      PC319      PC320     PC321
## 1 -0.1361810  0.3707860  0.516543 -0.2106360  0.0629784 -0.2354070 0.0745866
## 2  0.2838960  0.0905737  0.497427  0.0790915 -0.1036950  0.0608285 0.0557892
## 3  0.2841800  0.4468840  0.175940 -0.2585500 -0.4107850  0.0991483 0.1526120
## 4  0.4180580 -0.3414670  0.120261  0.0687079 -0.3069000  0.1089410 0.2863580
## 5  0.1452500 -0.5249250  0.201887  0.2170580  0.4764040 -0.0473087 0.2351270
## 6  0.0775644  0.1263700 -0.248673 -0.3149170  0.7343540  0.4174750 0.6310480
##         PC322      PC323      PC324      PC325      PC326       PC327
## 1 -0.26463200 -0.0522274  0.0492131 -0.4739130 -0.1011950 -0.02837090
## 2 -0.14297300  0.0105028  0.2550780  0.2915880 -0.1465930 -0.06746470
## 3 -0.27335000  0.1458950  0.6288990  0.1844770  0.0152865  0.10312400
## 4 -0.00865434  0.4698300  0.3435860 -0.1014130  0.1906390  0.00470319
## 5  0.19431100  0.0761079  0.0427247  0.0456039  0.3292730  0.01840060
## 6 -0.04306320 -0.1040430 -0.3736180  0.2089580 -0.2925900 -0.38677800
##         PC328      PC329        PC330 Individual  Pop_City Location Latitude
## 1 -0.06080890 -0.2694990 -1.75322e-06        255 Bengaluru    India  12.9716
## 2  0.09760780 -0.2002560 -1.75322e-06        261 Bengaluru    India  12.9716
## 3 -0.00548772 -0.0353852 -1.75322e-06        256 Bengaluru    India  12.9716
## 4  0.02166250  0.0259966 -1.75322e-06        258 Bengaluru    India  12.9716
## 5 -0.20685700 -0.0353230 -1.75322e-06        265 Bengaluru    India  12.9716
## 6 -0.00352486  0.0335714 -1.75322e-06        262 Bengaluru    India  12.9716
##   Longitude Continent    Year     Region Subregion order order2 orderold
## 1   77.5946      Asia Unknown South Asia              60     52       52
## 2   77.5946      Asia Unknown South Asia              60     52       52
## 3   77.5946      Asia Unknown South Asia              60     52       52
## 4   77.5946      Asia Unknown South Asia              60     52       52
## 5   77.5946      Asia Unknown South Asia              60     52       52
## 6   77.5946      Asia Unknown South Asia              60     52       52

6.1.3 Create PCA plots for Italy, with temporal pops separated

# #   ____________________________________________________________________________
# #   save the pca plot                                                       ####
ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_native_italy_all_temporal_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

PC1 and PC3

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_native_italy_all_temporal_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)

6.1.4 Re-do PCA with USA pops included

genotype <- here(
   "euro_global/output/dapc/dapc_italy_all_and_US.vcf"
  )
d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22642
##   column count: 362
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22642
##   Character matrix gt cols: 362
##   skip: 0
##   nrows: 22642
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22642
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## BEN BER BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC 
##  12  12  13  12  14  12  16   2  12   4   7  12   4  11   4   5   9  12   2   6 
## KAG KAN KAT KLP KUN LAM MAT OKI PAL QNC ROM SIC SON SSK SUF SUU TAI TRE UTS YUN 
##  12  11   6   4   4   9  12  12  11  11   4   9   3  12   6   6   7  12  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   353
##  - number of detected loci:      22642
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   353
##  - number of detected loci:      22642
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.removed file, for more informations.
## 
## 
##  - number of detected individuals:   353
##  - number of detected loci:      22642
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.lfmm"

PCA for SNP Set 3 for italy_all_US

nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          353
##         -L (number of loci)                 22642
##         -K (number of principal components) 353
##         -x (genotype file)                  /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.lfmm
##         -a (eigenvalue file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.pca/dapc_italy_all_and_US.eigenvalues
##         -e (eigenvector file)               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.pca/dapc_italy_all_and_US.eigenvectors
##         -d (standard deviation file)        /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.pca/dapc_italy_all_and_US.sdev
##         -p (projection file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.pca/dapc_italy_all_and_US.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/ 
## pca result directory:            dapc_italy_all_and_US.pca/ 
## input file:                      dapc_italy_all_and_US.lfmm 
## eigenvalue file:                 dapc_italy_all_and_US.eigenvalues 
## eigenvector file:                dapc_italy_all_and_US.eigenvectors 
## standard deviation file:         dapc_italy_all_and_US.sdev 
## projection file:                 dapc_italy_all_and_US.projections 
## pcaProject file:                   dapc_italy_all_and_US.pcaProject 
## number of individuals:           353 
## number of loci:                  22642 
## number of principal components:  353 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          353
##         -i (input file)                     /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.pca/dapc_italy_all_and_US.eigenvalues
##         -o (output file)                    /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.pca/dapc_italy_all_and_US.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)

Get values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)

Check R symbols for plot

#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,33:127)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
  scale_shape_manual(values=good.shapes[1:N]) +
  geom_point()

separate out 1990s Italian pops into own region

sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "euro_global","lea",  "sampling_loc_italy_all_US_temporal.csv"
    ))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_italy_all_US_temporal.rds"))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_all_US_temporal.rds"))
head(sampling_loc)
##     Pop_City  Location Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ       USA 39.79081 -74.92910  Americas          BER 2018
## 2 Palm Beach       USA 26.70560 -80.03640  Americas          PAL 2018
## 3    Brescia   Brescia 45.53373  10.20445    Europe          BRE 1995
## 4     Cesena    Cesena 44.15287  12.24427    Europe          CES 1995
## 5  Desenzano Desenzano 45.46289  10.54914    Europe          DES 1995
## 6    Bologna   Bologna 44.48478  11.36658    Europe          ITB 2017
##           Region   Subregion order order2 orderold
## 1  United States                 1     NA       75
## 2  United States                 3     NA       77
## 3   Italy (1995) West Europe    20     12       12
## 4   Italy (1995) West Europe    24     16       16
## 5   Italy (1995) West Europe    21     13       13
## 6 Italy (modern) West Europe    23     15       15

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 40 Levels: BEN BER BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 40

Check the regions

unique(sampling_loc$Region)
## [1] "United States"  "Italy (1995)"   "Italy (modern)" "East Asia"     
## [5] "South Asia"     "Southeast Asia"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population      PC1      PC2      PC3      PC4      PC5      PC6     PC7
## 1        BEN -37.2163 -6.90353  8.16388 1.015140 -2.97139 1.216970 2.13081
## 2        BEN -38.1887 -8.46262  9.77705 1.759510  0.60285 0.718323 3.56887
## 3        BEN -38.9790 -8.01976  8.76755 1.819310 -1.70063 0.935598 3.33746
## 4        BEN -38.0194 -8.22571 10.65540 0.928921  1.01601 1.707930 3.33731
## 5        BEN -37.4027 -7.80192  9.28229 3.267360 -2.53762 1.591770 4.66121
## 6        BEN -37.4843 -8.34769 10.70030 1.298730 -1.92420 0.541393 2.62710
##          PC8     PC9     PC10    PC11      PC12     PC13     PC14    PC15
## 1  0.5684750 2.02544 -7.37037 6.38881  0.520803  7.69074 -3.68301 1.65370
## 2 -0.4230660 2.12577 -8.71463 6.16739  0.829171  7.87555 -5.71411 4.18477
## 3 -0.8433900 3.22544 -7.93577 5.46541  0.356936 10.77630 -4.93038 3.19678
## 4  0.4307510 3.59423 -6.98231 6.08042  3.081570  9.18732 -4.72208 5.23138
## 5  0.9992170 2.46233 -6.41999 6.78189 -0.472345  9.88053 -3.36828 1.86085
## 6 -0.0327903 2.74647 -9.73923 4.67111  0.448524  9.52015 -6.28360 4.53345
##        PC16      PC17      PC18      PC19      PC20      PC21      PC22
## 1  1.057740  0.325190 -2.627180 -1.411280 -0.812558  0.822262 -1.825470
## 2  0.626415 -0.900056 -2.301140  0.190287 -2.100780 -0.682164 -0.916025
## 3 -1.313700 -1.368550  0.318453  0.770833 -0.186347 -1.338750 -1.464200
## 4  0.423200  0.223283 -1.895550 -0.951273  1.353340  0.160229 -3.740640
## 5 -1.240070  0.272152 -2.009790 -1.818460 -0.832699 -1.632900  0.949766
## 6  1.500170 -0.395267 -2.100190 -0.170254  0.542526 -2.972270 -4.421940
##        PC23      PC24      PC25     PC26      PC27    PC28     PC29     PC30
## 1 0.2692770 -0.313204  2.275700 -2.66022  1.861520 4.67830 5.329780  9.38405
## 2 0.7452640  0.349514  0.348397 -4.04510 -0.845730 8.25081 6.409090  9.06703
## 3 0.2266230  1.009580  3.223050 -1.67743 -0.471676 3.85200 0.658986  5.68706
## 4 0.3508680 -1.300530  0.366478 -2.86441 -1.131490 5.12790 0.581550  7.25504
## 5 0.0403922  0.597904 -0.475996 -3.05369  0.221962 5.79812 3.803570  6.04938
## 6 2.9205000  4.985500  5.500240 -1.63308 -0.399567 9.70931 7.013580 11.14980
##        PC31      PC32     PC33      PC34      PC35      PC36      PC37
## 1 -0.590222  2.622320 -3.53950  1.114820 -2.001980  4.409940  0.397127
## 2 -0.468867 -0.410797 -3.82882  2.299420 -5.857600  3.508330 -0.162496
## 3 -2.213030  5.280780 -3.79915  1.054540 -2.789350  5.472000 -0.159691
## 4 -1.352950  0.219452 -1.26560 -0.458891 -0.435605  0.910645  1.595290
## 5 -2.483460  3.640380 -2.92439 -0.730481 -4.511460  4.320720 -1.012690
## 6 -0.934066  5.314380 -6.94339 -0.098423 -7.024860 11.169800 -6.095850
##        PC38      PC39      PC40     PC41      PC42      PC43     PC44     PC45
## 1  0.395983  -9.57939  -7.43704 7.239610  2.504920 -0.759847 -5.22493  3.40859
## 2 -0.352294 -11.82960 -11.19050 9.416400  0.278659 -5.999260 -7.15720  7.44748
## 3 -1.235990  -7.42145  -3.76834 2.079530 -1.904390 -1.981260 -4.74069  4.14637
## 4 -0.166100  -7.98883  -3.14392 4.433250  0.852985 -3.483240 -7.57404  1.96215
## 5 -2.503830  -9.17584  -4.84570 0.468827  2.982720  2.120710 -5.19782  2.03000
## 6 -7.941000 -29.42410  -8.62099 8.761730  1.911680 -1.126970 -7.09627 17.12020
##        PC46      PC47       PC48      PC49      PC50     PC51      PC52
## 1  2.090570  -5.77720  -1.189860  -2.29581  0.318810 -2.08005 -2.191190
## 2  2.152260  -9.49303   0.520355  -3.18648 -3.929510 -1.69894 -2.595630
## 3  3.511120  -4.69953  -3.713010  -4.54833  2.151300 -1.58466  3.035130
## 4  1.916040  -4.73867  -0.566523   1.22846  0.567066 -1.54684  0.428433
## 5  0.602696  -6.50677  -3.278270  -2.82465 -2.895220  2.26975  1.829660
## 6 11.857900 -10.71730 -10.966300 -16.49510 13.759100  5.21100  4.245830
##       PC53     PC54      PC55      PC56      PC57      PC58      PC59
## 1 4.954960 -2.37780  0.896697  5.395320 -1.145590  3.564010  4.524750
## 2 0.727126 -1.18380 -3.342650 -0.150400  5.392310  1.196610 -3.677540
## 3 0.354250 -1.79022 -0.877577  4.954680  0.556605 -0.534725  1.182060
## 4 4.543530 -2.27671  1.616000  1.550080  3.946260  0.625859 -0.564126
## 5 1.885540 -2.71967  0.819113  0.963668 -0.251802  2.420420 -3.017930
## 6 3.951710 -3.01572 -3.935450 -4.748690  0.830369  6.884580  0.388583
##         PC60     PC61      PC62       PC63      PC64      PC65      PC66
## 1 -0.8263280  1.81974  2.431430 -0.0643969   5.55498  1.467660 -1.481830
## 2  4.2603700 -0.50301  1.587340  9.3115500  24.78930 -5.398620 -2.891800
## 3  1.8544800  3.69886 -1.483870  1.6366100   5.74501 -4.580270 -1.837320
## 4 -1.5422500 -1.47563 -0.427616  0.0958617   2.36394  1.228640  0.898035
## 5 -0.0808026 -3.34171 -0.117048  4.0755800   6.48018 -0.246435 -2.077610
## 6  6.6604400  3.12734  0.171965 -9.6483300 -31.62390  8.028320  9.381630
##         PC67      PC68      PC69       PC70      PC71      PC72      PC73
## 1  -4.164680   5.61138   3.83808  -2.128040 -0.238589 -0.193283  2.562090
## 2 -11.437800  15.92480  10.77720   5.812610 -9.094440 -3.195210  4.952080
## 3  -0.726276   4.80709   6.07589   6.022580  3.459730  4.732810 -1.252730
## 4  -9.353560  11.07980   6.58152  -0.119536  1.329850  0.602433  0.284996
## 5  -7.027850   7.80645   4.26135   0.089026 -2.194810  2.416980 -2.026810
## 6  21.644600 -13.80390 -15.37180 -13.442600  9.503160 -7.263210  1.913940
##        PC74      PC75      PC76      PC77      PC78     PC79       PC80
## 1  0.958617   2.73364  0.451171 -1.822620 -0.182561  5.19936   0.509525
## 2 -3.655770   4.69057 -7.160320  0.509124  0.973660  1.24334 -18.565300
## 3 -0.527297  -5.21244 -0.237189 -0.149916 -5.155280  1.92483   7.837730
## 4 -6.093060   6.15596  6.629350 -5.568830  4.620180  3.79086   8.497740
## 5 -2.007370  -1.22402  2.184120 -0.407072  4.365160 -5.47635   3.338510
## 6  4.064630 -11.47350  4.288710  4.533900 -0.786008 -1.17659  -4.751030
##       PC81      PC82       PC83     PC84      PC85      PC86      PC87
## 1  6.00692  4.448750  -0.177065 -6.95261 -2.195040 -6.588350 -0.603661
## 2 -5.45099 -6.672300 -25.049500  5.37501 -3.476640 12.910200  9.164480
## 3  5.36210 -2.170700   1.175030 -1.73433  1.102810 -7.643620 -2.121850
## 4  2.81163  3.213330   3.649690 -3.38869 -1.975100  0.826779 -0.434818
## 5 -1.17468  2.968970   4.821630 -5.96955 -0.746094 -2.057260 -3.501540
## 6 -4.26167  0.785808  -0.277082  1.04392  7.450440 11.629900  1.364960
##        PC88      PC89     PC90      PC91       PC92       PC93     PC94
## 1  5.414430  0.013979 -7.48452  0.604923  -6.464180  -0.227803  9.92392
## 2 -3.135480 10.394300 12.25910  2.177440 -17.264200  10.370700  2.87782
## 3  5.473760  0.921444 -3.14565 -2.906390  10.198800  -1.234940 -3.32567
## 4  4.239260 -2.228360 -3.70182 -4.045790   4.185270 -11.283600 -3.71541
## 5 -0.630829 -6.814160  2.20122 -0.038764  -4.362270  -7.512580 -1.73986
## 6 -3.797450  3.386640  5.68336 -1.140120   0.343172   3.261860  2.55321
##       PC95      PC96       PC97     PC98      PC99    PC100    PC101     PC102
## 1  9.26586 -5.553240   6.405000  7.71460  1.578660  3.86968 -4.16344 -4.629450
## 2 -2.91785 -3.076880   1.792370 -8.33705 -0.705961 -3.03898 -3.44964 -1.440070
## 3  1.46641  4.765990  -0.495241 -3.93044  4.082810 -1.04107  1.32525 -1.015940
## 4  4.88287  1.909630 -13.020100 12.89230  4.879510  4.60853 -1.34667  1.444590
## 5 -4.95794  0.840153  -2.639320 16.81240 -0.852836  2.29255 -2.58181 -4.006070
## 6  5.63180 -1.502850   1.442360 -1.49457  1.237340  1.95710  7.42489  0.815122
##       PC103       PC104    PC105    PC106    PC107    PC108     PC109     PC110
## 1  9.847060   4.8570900 -1.49122 -3.16346  8.35051 -7.86179 -4.939470 -14.34130
## 2 -5.080550  -1.6806900  3.14699  3.41234 -5.48929  5.30144  4.239760   4.70956
## 3 -5.939040 -13.0688000 -4.61475  5.21346  5.81333 -6.32581 -5.191790   3.16850
## 4 10.532900  -2.8967000  4.32134 -9.25436 -7.55576  2.95399  0.543659   8.26517
## 5  2.880730   2.7698900  2.40714  2.61896  6.88885 -6.74691 -1.607330  -2.02634
## 6 -0.813424  -0.0401658 -3.34307  2.14064  0.32628  1.01315  2.053410   1.38524
##       PC111     PC112     PC113     PC114      PC115     PC116     PC117
## 1  2.813680 -1.215830 -4.616600 -12.32540  1.4200400 -3.335950 -2.194070
## 2 -3.656750  2.254870 -4.340980   1.05799  0.9666860  5.049780 -6.964390
## 3  1.780190 12.550200  8.518010  -6.79152 -6.4443700 -0.924222 -2.078180
## 4  3.564250 -7.616980  2.356720   1.48668  1.5370100 12.315200  1.249730
## 5 -2.931870  1.525940 -0.646879 -10.68990 -0.9625750 -8.422430  7.790970
## 6 -0.892481 -0.754794  0.484680   1.77637 -0.0906339 -2.070330 -0.843481
##        PC118    PC119    PC120     PC121     PC122     PC123     PC124
## 1  -1.517170 11.09280 -1.07037  5.999500 -12.01400 -2.401110  3.324020
## 2   1.095350 -2.07567 -2.52639  0.986412   1.93845  2.775770 -0.389459
## 3 -10.150900 -7.12411  3.62448 -4.445940   4.95399 -3.153290  9.177360
## 4   5.372170  3.15508  0.14191 -5.484870  -2.75911 -0.381302 11.563700
## 5  -1.834240 -2.59692  2.84946 -5.990300   3.67164 -7.873220 -5.824640
## 6   0.200012  2.03898  2.95781 -1.131490   1.93482 -2.615500 -2.845660
##       PC125     PC126     PC127     PC128     PC129     PC130    PC131
## 1  5.819390 -0.184176  -9.95533 -7.791950  0.222528 -7.367430 -3.82414
## 2 -4.275520 -0.156791   4.53326  2.181980 -3.652290 -6.869180  5.46631
## 3 -0.651213 -1.687460 -11.28530 -3.282510 11.855900 12.340000 -2.63842
## 4  0.448775 -4.826850   3.99225  0.146227  4.174790 -3.065300 17.56640
## 5 -2.044640 -8.998780 -11.52720 -2.180850 -5.849920  2.257200 -3.70467
## 6  1.043160 -0.170034   1.60509 -1.560590 -1.995800 -0.411025  2.51800
##       PC132     PC133     PC134     PC135    PC136      PC137    PC138
## 1 -1.517100   2.02710 13.197500  6.910530  9.58284   1.504540  4.62906
## 2  0.458472   5.06087 -3.139790 -1.852510 -3.16872   0.101584 -0.57667
## 3 10.638600  -1.48781  0.696223  0.476868  7.76636   6.271820 -6.82674
## 4 -7.274210   3.05790  7.581370 -2.686990 14.64790 -10.996300  2.58538
## 5 -0.836249 -13.62470  5.703340  3.110320  5.70559  -2.291810  6.24592
## 6  0.320501   2.33693  2.097010  2.409500 -2.71610  -1.073870  1.74500
##       PC139     PC140      PC141     PC142     PC143    PC144    PC145
## 1 -4.872290 -1.291600  -1.870690  4.475900  -2.00113 10.11170  1.32238
## 2 -0.874231 -0.611146   7.191280  0.649931   2.33304  6.18937 -1.19040
## 3  1.443740 11.043400  -0.293550 -6.267920  16.97040 -8.07867 -6.61166
## 4 -5.814640 -2.796800 -21.454500 -2.731940   4.55432  2.23595 -0.85853
## 5 11.926800  3.954420  -7.137120  1.038880 -13.61840  2.54991  7.13757
## 6 -1.090150  0.526904   0.864949 -0.346388   1.39443  0.45065  1.35301
##       PC146      PC147      PC148    PC149      PC150      PC151    PC152
## 1 -1.836910 -2.6950000  0.9994530 -3.72830 -12.221500   5.772740 -1.73444
## 2 -1.120740 -4.1102800  0.0942148 -1.55061   3.695220   0.553763 -1.71085
## 3  7.738200 -0.0127532 -0.4983650  2.25453   0.294455  11.570100 -4.45945
## 4  5.118430  1.7560400  1.0225900  6.86183   6.374410 -13.516400  5.90705
## 5 -4.475320 -0.8183350  7.3425100 -4.87818  -5.551660   3.412620  1.08903
## 6  0.369585 -1.2836200  0.9649440 -3.31766   0.602201   1.625320  1.74354
##       PC153     PC154     PC155     PC156     PC157     PC158    PC159
## 1 -17.90740 -5.202690  5.948770  4.352760  -7.03850  3.721990 -2.53728
## 2   4.57882 -2.035130  1.097780 -1.403490  -2.30925 -0.547093 -5.01209
## 3  -2.70191 -0.463896 -5.194520 11.143000 -16.59010  6.087970  8.52568
## 4  -5.39607 -1.034730 13.884800 -1.188220   1.31077  9.279310  6.27999
## 5   1.52049 10.034000 -0.660857 -9.585580  -3.52619 -8.759880 10.72900
## 6   3.28944  1.946040 -1.082770 -0.161758   2.02381  1.520940 -1.02833
##       PC160    PC161      PC162     PC163     PC164      PC165     PC166
## 1  9.286490 -9.80941  0.0634464  3.571930 -6.132830   4.235850  3.480150
## 2  1.236520 -2.21464 -7.0336700 -1.324130  1.166360   2.326180  0.219571
## 3  0.248725  3.63218  2.1174100  9.625760 -1.044560 -10.892000  3.389200
## 4 -2.620920  4.13362  7.7879300  5.602260  7.883490   7.553580 -3.489980
## 5  8.114180 -6.67751 -2.6727900 10.302600  8.936990   3.837210  9.753110
## 6  1.227530  1.03112  2.1029300  0.466882 -0.194435  -0.195545 -2.296970
##        PC167    PC168     PC169      PC170     PC171    PC172    PC173
## 1 -13.885100  2.28453  15.83310  -5.623020  -5.10345 11.89280 -3.83828
## 2  -1.301960  1.23902  -2.80288   1.054320  -1.04918 -1.81956 -3.33645
## 3   7.104330 -1.00865  -1.28397  -4.899100 -10.42260  5.35418 -6.82687
## 4  12.771300 -1.97726   9.16515  -5.071190  16.88840 -6.48527 10.52040
## 5   4.548820  2.43470 -16.38980 -19.603700  -4.55623 -5.94214 -6.49890
## 6   0.225724 -2.11463  -1.58292   0.135736  -1.70491 -1.45946 -1.01384
##       PC174      PC175      PC176     PC177     PC178    PC179     PC180
## 1  4.118520   8.078020  5.5310600  6.064530  6.878710  5.79570  -5.64530
## 2 -3.883720   0.636334  0.7006900 -0.405249 -2.239920 -1.10821  -1.59937
## 3  5.880920   9.774880 14.9522000  1.395240  7.712630 -4.14718   3.70241
## 4 -4.617630 -10.420200  9.6121900 12.939200  0.630583 -6.50927   5.83060
## 5 -0.427483  -5.076660 -1.6409300  2.821980 -7.651990  9.41352 -10.88510
## 6 -2.940440  -1.401290  0.0991526  2.884440  1.868630 -2.89713   2.92469
##        PC181      PC182      PC183    PC184    PC185     PC186      PC187
## 1  -4.232930 -10.011700  -6.900140 -7.28992 -8.52278 -9.221240 -15.849700
## 2   0.367756  -0.723134   2.786030  2.18826  5.27580  1.031870   0.758629
## 3 -12.397500   3.606680  -1.200270  4.70236 -2.17793 -1.670930  15.494500
## 4   8.520050  -2.173560   0.606372  7.70590  4.36108 -0.644644   7.131180
## 5  -6.071170  -7.321510 -17.684600  4.75439 14.73380 -4.374830   3.541290
## 6   1.631840  -1.592960  -0.082407  2.27033  3.60718 -0.237748  -1.583130
##       PC188      PC189     PC190     PC191     PC192     PC193     PC194
## 1  1.858910 -14.540400  7.463540  -3.40255  -7.90779 -5.510220  2.836420
## 2  0.842773   1.966160 -0.766134   6.77392  -1.67406 -0.471702  3.168310
## 3 -5.628550  -3.069020 -6.544690   1.29123 -18.02510 -6.845210 18.699600
## 4  2.896110   0.170743 -2.430260 -12.04440  -9.58860  3.117910 -3.227580
## 5 -5.221950   2.308260 23.749200   1.19344  -6.49064 24.275400 -6.881190
## 6 -1.254430  -2.052290  0.536871   2.47689   3.41933 -1.140760  0.470326
##       PC195       PC196      PC197      PC198    PC199    PC200      PC201
## 1  -5.35120   1.8103600   6.842580   6.201950  6.92850 -5.49036   7.270970
## 2   3.00008   0.5952670   1.789050  -0.458206  1.15611 -3.66120  -1.524690
## 3   5.67847 -18.1178000   2.453550   1.633080 -3.38287  6.39387 -11.282700
## 4   7.48080   0.1467490   8.882330  -2.151750  1.86752 10.29380  -3.301030
## 5 -15.42870  -0.0696018 -10.162800 -15.774900 12.47940  1.10893 -12.733700
## 6  -0.18008   0.7341080   0.862116   1.880360 -1.91350 -1.12129   0.238685
##        PC202     PC203      PC204     PC205      PC206     PC207     PC208
## 1 -12.462000 -4.778000 -18.035900 -0.532531 -9.1278400  9.715630 -0.490794
## 2  -1.121240  3.152790   4.485210 -1.588600  2.6936100 -0.265028  3.652950
## 3   6.566590  3.958870   0.279617  7.692290  3.5672300 -6.330670 -0.627159
## 4   3.758650 -2.192670   6.208360  2.795050 -2.1705800 15.685800 12.157100
## 5  12.713200  2.427950   4.088340  7.799830  2.9429500 -8.044550 -4.305950
## 6   0.468914 -0.850908   1.368320  1.401280 -0.0735814 -0.125449  1.217980
##       PC209    PC210      PC211    PC212     PC213    PC214     PC215     PC216
## 1 -3.513740  3.54397   0.387571 -8.82160 -5.607230 -7.51641   9.35403  5.879680
## 2 -0.547561  2.43223  -0.115685  2.52513  3.542480  1.53285  -3.91949 -2.502880
## 3 13.251000  8.07358 -12.135400 10.15140  6.645870  5.15132  22.14520 -0.149686
## 4 14.197100  7.63045  13.660500 -7.23077 14.946300  3.46075   1.00408 -8.565910
## 5 -3.101940  4.10934  -6.322110  1.98161 -5.087440  3.28440 -12.12970 -0.572817
## 6  0.898895 -3.03037  -0.278060  2.67619 -0.484659  1.74179   2.35686 -1.467420
##       PC217     PC218     PC219     PC220      PC221     PC222    PC223
## 1  19.26480 -3.137400 11.071400 12.870400  0.6398130  15.06190 -7.59028
## 2   5.22980  0.542928 -0.760684 -1.837190 -3.5223500  -3.32979 -2.54436
## 3  -4.02030 -4.666050  5.202530 -3.887920 -3.9502300  -3.45665 -4.47217
## 4   8.56317  6.649540  3.872890 -0.284108  8.4878100 -13.31170  7.53663
## 5 -11.88450 -8.144070 -0.525392  8.363680  7.0502000  -0.24391  7.37316
## 6   1.61979  1.787520 -3.385790  0.427645 -0.0499479  -0.25317 -1.00382
##        PC224     PC225     PC226     PC227     PC228    PC229     PC230
## 1  -8.084790 -11.51260 -3.937810   5.87411 -5.462010  2.14933  1.695830
## 2  -5.009810  -4.07868  2.953850   2.36136 -3.607180  2.04615 -0.831913
## 3   0.973225  -3.51610 -9.974220 -15.96100 -2.225650 -5.37068 -3.130120
## 4 -13.264000   8.38465 -4.163090   7.01938 -0.106498  1.82626  5.171280
## 5   3.195760   7.03553  4.082500   4.38256  3.189240 -3.53044  4.546750
## 6  -0.613000   4.03901 -0.884451   3.22195  2.753130  1.76970  0.466559
##       PC231     PC232     PC233     PC234     PC235     PC236      PC237
## 1  1.553860  4.782800 -2.749260 -1.755180  2.863740  3.936750  6.0190400
## 2 -0.554079 -2.683960 -2.205200  2.327890 -0.794698 -2.039530 -4.7288600
## 3 -1.777080 -7.009510  3.806610  6.175080 -0.279089 -2.644020 -7.5750100
## 4  1.190240  3.605350  4.879270 -0.953465 -5.975150  8.113830 -3.1210700
## 5 -3.606990  5.225810  2.335190 -1.474530  0.259147 -0.596752 -4.3258300
## 6  0.567009 -0.536693  0.116808 -1.449060 -1.321010 -0.473417 -0.0846443
##       PC238    PC239    PC240      PC241     PC242     PC243    PC244    PC245
## 1 -1.404510  5.60743  8.38844 -10.578200 -3.051890  5.500160  2.06766  1.14805
## 2  5.396560 -3.71861 -1.26536   2.768260 -4.888940 -3.971290 -2.00073 -1.30643
## 3 -4.153380 -1.31689  5.67011  -1.000720  7.487610 -1.052430  1.62645 -3.22069
## 4  1.788340 -8.19962 -3.20668   2.451300 -6.313990 -0.160164  5.35240  2.39894
## 5  0.927021  6.15589 -1.15607   2.413450 -1.357190 -5.044420 -4.46584  1.77346
## 6 -1.182870 -1.12245 -2.62583  -0.753323 -0.798388 -0.989399 -1.66093  2.58336
##       PC246     PC247    PC248     PC249    PC250     PC251      PC252
## 1  3.767430 -2.647600  3.49967 -4.310880  5.14785 -4.961910  0.8333450
## 2 -2.770960 -2.242400  7.05074 -4.556240  4.37798  2.057740 -2.7946600
## 3 -5.211310 -1.750010 -3.45672  4.118110 -3.59872  0.185661  0.1621490
## 4  2.353740 -0.527736 -2.51637  3.154160  3.15777  4.508400 -1.3894500
## 5 -4.113550 -2.481920  2.19272 -0.951083 -2.42434 -0.968350 -0.0275769
## 6 -0.427994 -1.246580  1.75902 -0.277301 -2.47773 -1.489000 -0.6051430
##       PC253    PC254     PC255     PC256     PC257     PC258     PC259
## 1  1.854340  3.13650 -0.664829  0.693134  3.361710  1.045370  2.326450
## 2 -0.637841  1.42913  2.944340  4.867670  0.522718 -0.724822  9.524000
## 3 -3.735260  1.58096 -1.438430 -0.637720  3.342480 -1.603180 -1.239580
## 4  7.226070 -2.70647 -3.467850  0.287083 -2.452230  3.241980 -0.614516
## 5 -2.384700 -1.79120 -1.706680  1.994750 -3.487750 -0.333868  0.502889
## 6  1.605310  2.65217 -1.368280  0.835537  1.831050 -1.047120 -1.092050
##       PC260       PC261    PC262     PC263     PC264     PC265     PC266
## 1 -2.777920 -1.95322000 -2.07877  3.195600  1.345990  0.275427 -2.559010
## 2  7.989790  1.62617000 -7.60646  0.835935 -9.781800  0.590346 15.608200
## 3  3.265430  0.44738300  0.48502  1.386520  0.846490  1.482330  1.789920
## 4 -3.071270  1.70066000  1.04902  1.217850 -2.983800 -1.021330  1.205720
## 5  0.902227  0.00190388 -2.03280 -1.769550 -0.307306  0.582840 -0.509289
## 6  1.204670 -2.69179000  1.10412  0.537858  1.179470  1.118340 -0.719916
##        PC267     PC268     PC269     PC270      PC271     PC272     PC273
## 1  -2.143000  0.863042  2.778440  0.136910  -1.467990 -0.711070 -0.601976
## 2 -20.080400 27.223100 -6.868710  4.004540 -12.531500  4.774630  4.627560
## 3  -3.104600  2.450220  0.212684  0.821071  -2.337130 -0.119570  1.141260
## 4   0.916439 -3.144310 -0.736625 -1.191730  -2.186240 -0.803661 -1.491480
## 5  -0.646843 -0.637993  1.081910 -0.639125   0.359545 -0.338795  1.158290
## 6   2.016850 -0.178199  0.759125 -2.812540  -0.594396 -1.094580 -0.428685
##        PC274      PC275      PC276    PC277      PC278      PC279     PC280
## 1  1.4780100   0.823670  -1.714060 -0.67909 -1.5034000  4.8798000  1.340700
## 2  0.3553940 -13.032400 -14.587000  4.76811 -6.6642900 10.7046000 -6.946140
## 3 -2.3262700   0.184944   2.053030 -3.68246 -0.0973468 -0.0198046 -0.174498
## 4  0.7328620   2.012800  -0.843853  2.44489 -1.3144300 -0.4585440  0.346058
## 5 -0.2701790  -1.378740   0.557797  1.05570 -0.7874850 -1.6605900 -0.342479
## 6 -0.0309142   0.667012   0.401887  2.09692  0.1748000  0.0920300  0.182606
##         PC281     PC282      PC283     PC284     PC285     PC286      PC287
## 1  0.73774800 -0.219410 -0.5817050 -0.914899  0.552053 -0.749923 -0.0673759
## 2 -6.68965000 -5.031680  5.1597100 -2.424530 -2.720410 -2.685180 -0.4999390
## 3 -0.00611086  0.374657 -2.0810500 -2.684240 -2.272230 -2.613870 -1.1641700
## 4  0.97405400  1.765050  0.2351030 -0.813811 -0.250675 -0.863614 -0.1390070
## 5 -3.11698000 -0.793641 -1.4055100  1.493910 -0.186053 -1.002550  0.1591750
## 6 -0.78684400 -1.379630  0.0313976 -1.351390 -1.358130 -0.467317  1.1953000
##        PC288      PC289     PC290    PC291     PC292      PC293      PC294
## 1  0.0877016  0.0127312 -0.525059 -1.15551  0.971415 -0.0652187  2.0796200
## 2  0.7871490  3.4397300 -0.929990 -5.32491 -4.162390 -2.0194400  1.5909600
## 3  0.8057580 -1.3788600  0.852070 -1.61029  1.228690  0.5189210  1.0591500
## 4 -0.2088700 -1.8083600  1.809490 -1.00467 -1.324430  0.3877700 -0.0342288
## 5 -0.4940000  1.3250200 -2.141980 -1.87450  0.797641  0.9049020  1.4598000
## 6 -1.8641400  2.1354400 -1.734800  2.29779 -7.476230 -3.6850200 -0.9018080
##       PC295      PC296      PC297      PC298      PC299     PC300     PC301
## 1  1.071730  1.3868200  0.0542372 -0.4815720  0.0264312 -1.983140  0.274870
## 2 -1.104370 -2.7377700 -1.1016000 -4.1093200 -2.5193700 -0.312920 -1.207590
## 3 -2.240230  0.3100860 -0.3687790 -1.0236800 -0.2783530 -1.164010 -0.369251
## 4 -0.101127  0.7541810  0.0470820 -1.6237800  0.8645190  0.181799 -0.813301
## 5 -0.410506  0.0671651 -1.9652200 -0.0239154  0.1945640  1.038650 -0.250763
## 6 -0.275706 -2.2459900 -7.3447300  2.1935900  8.0920400  1.262790 -5.090690
##       PC302     PC303      PC304      PC305     PC306       PC307      PC308
## 1 -1.458250  0.709720 -0.0421059   0.407448 0.7782790  -0.7052840  2.2297900
## 2  0.725306  1.298510 -1.8290600  -4.483180 1.4911100  -2.0684600  2.0522900
## 3  0.122073 -0.604087  0.5332070   1.050690 0.7760090   0.0206425  0.0605125
## 4  0.476817 -1.173360 -0.4931780   0.412802 0.0402457  -0.8641810  0.7106590
## 5 -0.781629  0.257777 -0.0882721   0.395462 0.3311180  -0.8031150 -0.8485220
## 6 -1.147150 -2.120710 -9.1826800 -10.265700 5.8447000 -13.4441000  0.0841197
##       PC309      PC310     PC311     PC312     PC313     PC314     PC315
## 1 -0.267768 -0.8472460 -0.314386 -0.986070  0.553302  0.101887  0.292182
## 2 -1.573330  0.5858130  1.393560  1.970490 -0.465429  2.210850 -0.145276
## 3  0.300007 -0.0108822 -0.100460 -0.327450  1.493130 -0.710889 -0.210780
## 4 -0.689473  0.5151830 -0.599547 -0.048379 -0.942814  0.947402  0.224050
## 5  0.270386 -1.5381500 -1.361500 -0.536435  0.384209  0.242467 -0.828820
## 6 36.385600 13.4505000  8.969700 -3.973680  3.632820  2.349070 -1.537220
##        PC316     PC317     PC318     PC319       PC320      PC321     PC322
## 1 -0.0578941  0.925388  0.612744 -0.511085  0.74431400  0.2642700  0.573491
## 2  1.2308400  0.763512  0.197456  1.483330 -0.02875870 -0.0537602 -0.802819
## 3 -0.7862020 -0.946648 -1.213510  0.452176  0.02074090  0.4314920 -0.409122
## 4 -0.1229940 -0.436630  0.167505 -0.648077  0.76464900 -0.0972988 -0.766162
## 5  0.3399550  0.436367 -1.076140 -1.207180 -0.00981003  0.0539852  0.526952
## 6  2.0425900  3.318330  3.219610 -0.500412  1.61449000 -1.7660700  2.150020
##       PC323      PC324     PC325      PC326     PC327      PC328     PC329
## 1 -0.996313  0.2486230 -0.573440  0.3714150  0.234298  0.6489650  0.303600
## 2 -0.493265 -0.0437834 -0.176258  0.3449410  0.481877 -0.7397780 -0.228351
## 3 -0.223398 -0.6961550 -0.299019 -0.6208470  0.113228  0.5344440  0.968375
## 4 -0.130816 -0.2181390 -0.514174 -0.0958358 -0.546446  0.1743940 -1.256220
## 5 -0.150911 -0.0108711 -0.221861  0.4702230  1.031290 -0.0843454 -0.547098
## 6 -1.039850  0.8745920 -0.942692  0.3012320  0.716169 -1.5554400  1.768970
##       PC330      PC331     PC332      PC333     PC334    PC335      PC336
## 1 -0.118850  0.2704280 -0.184674 -0.0537026  0.268162 0.532530 -0.6034540
## 2  0.797714 -0.1556930 -0.525773  1.2804600 -0.354151 0.941927 -0.0285140
## 3 -0.538072  0.3225270 -0.345629  0.2606370  0.196721 0.426817 -0.0717353
## 4 -0.215704  0.0500965  0.153409 -0.3820600 -0.259605 0.134653 -0.0705485
## 5  0.566725 -0.1695040 -0.310048  0.2401800 -0.253752 0.357621 -0.0620567
## 6 -0.699025  1.7625100  0.972912 -0.7788220  0.965009 1.320090  1.2295900
##        PC337      PC338      PC339     PC340      PC341      PC342      PC343
## 1 -0.6487000 -0.3366380 -0.1954530  0.150603 -0.0493815 -0.3354550  0.1317770
## 2  0.1047380 -0.3295320 -0.3829760 -0.272557  0.1843130 -0.0238051 -0.1792370
## 3  0.4291140 -0.4008970 -0.0405974 -0.346916 -0.0321387  0.3521720  0.1974460
## 4  0.0210873  0.6379810  0.3861810 -0.355886 -0.2158510 -0.5362330 -0.5423710
## 5  0.4152080 -0.0417838 -0.2362560 -0.242030 -0.2350190  0.0032567  0.0690464
## 6  0.3178510 -0.0229162  0.7649860 -0.148682  0.3228920 -0.6665610  0.0555563
##        PC344      PC345      PC346      PC347      PC348      PC349      PC350
## 1  0.1981880 -0.3480010  0.1173250 -0.0416354  0.0267634 -0.3073740 -0.2022120
## 2 -0.0380013 -0.1843660 -0.1410460  0.0572115  0.4692510 -0.1033020 -0.0375376
## 3 -0.0204594 -0.2057300 -0.1673220 -0.6778110  0.0527420  0.0355308  0.0356868
## 4 -0.2253400  0.0426583  0.1013450 -0.2063460  0.2040370  0.2990810 -0.0189545
## 5  0.0388593 -0.0861736 -0.0923959 -0.3225470 -0.2266730 -0.1478300 -0.0452171
## 6 -0.4401270 -0.1043480  0.0843801  0.2537730 -0.2020580 -0.2614700 -0.3745610
##         PC351      PC352       PC353 Individual  Pop_City Location Latitude
## 1  0.00836228 -0.0332632 2.42165e-07        266 Bengaluru    India  12.9716
## 2 -0.07021830  0.2772360 2.42165e-07        255 Bengaluru    India  12.9716
## 3 -0.02586280  0.0257898 2.42165e-07        256 Bengaluru    India  12.9716
## 4 -0.21985700  0.0395047 2.42165e-07        265 Bengaluru    India  12.9716
## 5  0.08758560  0.1838020 2.42165e-07        261 Bengaluru    India  12.9716
## 6 -0.01765120 -0.0508445 2.42165e-07        262 Bengaluru    India  12.9716
##   Longitude Continent    Year     Region Subregion order order2 orderold
## 1   77.5946      Asia Unknown South Asia              60     52       52
## 2   77.5946      Asia Unknown South Asia              60     52       52
## 3   77.5946      Asia Unknown South Asia              60     52       52
## 4   77.5946      Asia Unknown South Asia              60     52       52
## 5   77.5946      Asia Unknown South Asia              60     52       52
## 6   77.5946      Asia Unknown South Asia              60     52       52

6.1.4.1 Create PCA plots for Italy All + US

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "native_italy", "PCA_lea_italy_all_US_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

# #   ____________________________________________________________________________
# #   save the pca plot                                                       ####
ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "native_italy", "PCA_lea_italy_all_US_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

6.2 PCA for Greece & Albania & Croatia (MAF 1%, R2<0.01 snp set)

6.2.1 Import the data for SNP Set 3 subset for euro_native2_albania_croatia_greece_US

genotype <- here(
   "euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22537
##   column count: 323
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22537
##   Character matrix gt cols: 323
##   skip: 0
##   nrows: 22537
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## ALD ALV BEN BER CAM CHA CRO GEL GRA GRC HAI HAN HOC HUN INJ INW JAF KAC KAG KAN 
##  10  12  12  12  12  12  12   2  11  10  12   4   7  12  11   4   2   6  12  11 
## KAT KLP KUN LAM MAT OKI PAL QNC SON SSK SUF SUU TAI TIR UTS YUN 
##   6   4   4   9  12  12  11  11   3  12   6   6   7   4  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   314
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   314
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.removed file, for more informations.
## 
## 
##  - number of detected individuals:   314
##  - number of detected loci:      22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.lfmm"

PCA for MAF 1% r2<0.01 snp set of euro_native2_albania_croatia_greece_US

nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          314
##         -L (number of loci)                 22537
##         -K (number of principal components) 314
##         -x (genotype file)                  /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.lfmm
##         -a (eigenvalue file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.pca/euro_native2_albania_croatia_greece_US.eigenvalues
##         -e (eigenvector file)               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.pca/euro_native2_albania_croatia_greece_US.eigenvectors
##         -d (standard deviation file)        /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.pca/euro_native2_albania_croatia_greece_US.sdev
##         -p (projection file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.pca/euro_native2_albania_croatia_greece_US.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/ 
## pca result directory:            euro_native2_albania_croatia_greece_US.pca/ 
## input file:                      euro_native2_albania_croatia_greece_US.lfmm 
## eigenvalue file:                 euro_native2_albania_croatia_greece_US.eigenvalues 
## eigenvector file:                euro_native2_albania_croatia_greece_US.eigenvectors 
## standard deviation file:         euro_native2_albania_croatia_greece_US.sdev 
## projection file:                 euro_native2_albania_croatia_greece_US.projections 
## pcaProject file:                   euro_native2_albania_croatia_greece_US.pcaProject 
## number of individuals:           314 
## number of loci:                  22537 
## number of principal components:  314 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          314
##         -i (input file)                     /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.pca/euro_native2_albania_croatia_greece_US.eigenvalues
##         -o (output file)                    /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.pca/euro_native2_albania_croatia_greece_US.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)

Get values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_albania_croatia_greece_US.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_albania_croatia_greece_US.rds"))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_albania_croatia_greece_US.rds"))
head(sampling_loc)
##     Pop_City Location Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ      USA 39.79081 -74.92910  Americas          BER 2018
## 2 Palm Beach      USA 26.70560 -80.03640  Americas          PAL 2018
## 3  Dubrovnik  Croatia 42.60654  18.22661    Europe          CRO 2017
## 4      Vlore  Albania 40.46600  19.48970    Europe          ALV 2020
## 5     Durres  Albania 41.29704  19.50373    Europe          ALD 2018
## 6     Tirana  Albania 41.31473  19.83172    Europe          TIR 2017
##            Region   Subregion order order2 orderold
## 1   North America                 1     NA       75
## 2   North America                 3     NA       77
## 3 Southern Europe East Europe    31     23       23
## 4 Southern Europe East Europe    32     24       24
## 5 Southern Europe East Europe    33     25       25
## 6 Southern Europe East Europe    34     26       26

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 36 Levels: ALD ALV BEN BER CAM CHA CRO GEL GRA GRC HAI HAN HOC HUN INJ ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 36

Check the regions

unique(sampling_loc$Region)
## [1] "North America"   "Southern Europe" "East Asia"       "South Asia"     
## [5] "Southeast Asia"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population     PC1      PC2      PC3      PC4        PC5      PC6       PC7
## 1        ALD 33.6545 1.847770 -30.1859 11.95910 -4.4866900 3.505150  0.860564
## 2        ALD 34.8737 2.012080 -33.8088 15.53010 -1.5636900 4.878090  1.074120
## 3        ALD 34.2735 1.827370 -33.3481 13.08000 -4.3894700 3.894920  3.086600
## 4        ALD 35.3477 1.389580 -33.3868 13.39940 -1.2816600 0.604832  0.331101
## 5        ALD 31.9907 0.623082 -27.3660  9.80694 -0.0119409 1.703960 -0.403977
## 6        ALD 32.9900 0.373753 -35.7319 11.88800 -0.3668650 2.408060  0.810319
##         PC8       PC9     PC10     PC11      PC12      PC13      PC14     PC15
## 1 -1.294480 -15.72440 10.57200 -7.57900  0.306803  1.626140  -7.51101 31.01680
## 2 -1.152290 -19.12860 11.94860 -7.17698 -2.854640  3.366370  -8.22972 19.83230
## 3 -1.591870 -15.82210 11.37940 -6.61994 -1.406800  1.335060  -6.13617 12.04520
## 4 -0.371745  -2.61902 -4.47102 -2.13649 -0.800955 -0.755948   1.60279 -4.44627
## 5  0.339965 -11.02060  5.71820  2.04755 -1.004190  2.191210  -5.82602 17.14610
## 6 -2.613870 -13.67560  6.68720 -4.27629 -2.585940 -4.477190 -11.55440 22.37860
##       PC16      PC17     PC18       PC19      PC20     PC21     PC22      PC23
## 1 -2.48287 -4.706000 -6.46377  -7.771850 -13.96380 -9.27508  4.08026   4.49743
## 2 -3.93641 -2.762380 -2.79927  -2.907130 -14.86400 -8.93777 10.41130  12.39160
## 3 -4.35073  0.873146 -5.22129  -1.610730 -12.05890 -7.13241  7.42712  11.57910
## 4  1.08533  1.155160  3.67332 -12.226500 -10.59080 -6.95033  4.71994   4.02882
## 5 -5.18930  0.374034 -3.66742   0.169997  -5.76635 -9.85443  6.20592   2.31409
## 6 -5.32986  1.919290  3.33214  42.968000  32.35080 11.49690 -7.52173 -16.05490
##         PC24      PC25     PC26     PC27     PC28      PC29      PC30      PC31
## 1  -7.941060  -8.91267 -2.05442  2.88808  1.55396  -1.53999  -1.64546  5.004950
## 2  -7.863100  -7.74425  2.14457  9.39956 -7.13180  -8.22522  -5.38359  6.276240
## 3 -11.627100 -11.28060 -2.42858  9.48418 -7.62069 -14.40570 -11.99440 11.077000
## 4  -0.419253  -6.92549  5.46390  6.14941 -0.75930   3.93828  -3.14222  0.856983
## 5 -17.710400  -1.36983 -4.04200 -4.07820  9.49657   1.08740   1.60221 -1.521270
## 6   3.340100  14.53330 -4.38522  9.39643 14.73800   6.23730  -7.50660  0.646253
##       PC32     PC33      PC34      PC35      PC36      PC37       PC38
## 1  2.73399  2.12740  0.992012 -2.343770  -8.68481  0.830522   0.514518
## 2  4.07887  7.24171  4.992420 -1.836340  -4.50153 -0.606885  -0.540377
## 3 -0.32891 13.11460  2.855670 -7.541280  -2.87208  2.869660  -3.359010
## 4 10.13540  2.48688  4.170620 -0.426741 -11.20110 -4.764150 -11.483700
## 5  3.45475  4.04732 -6.953400 -7.382410  14.54970  3.170570   2.209260
## 6 -1.06384 -9.06869 14.345700  2.146430 -24.29830  1.265940  -0.156519
##         PC39     PC40      PC41       PC42     PC43     PC44     PC45     PC46
## 1   4.878880 10.95370  0.911399  3.0333100 -1.27587 1.266400  6.08136 -2.20252
## 2  -0.401524  4.16874 -0.413553  3.2211400  4.31395 2.161870  1.76064 -2.25617
## 3   3.518950  4.13668 -5.858940 -0.0632954 -4.99706 5.550840 -3.20248 -1.21974
## 4  -5.583580  1.15464  3.412100 -5.8628200 -2.12944 9.625950 -2.92786 -6.79578
## 5   4.876490  4.13275 -6.373340  5.3835500 -5.84265 0.134066 13.88050  2.10961
## 6 -12.620200  3.76460 -5.818740  6.6802200  6.92779 0.182684 -1.45777 -5.49437
##        PC47      PC48      PC49      PC50      PC51     PC52       PC53
## 1  0.715354 -4.884480   9.46724  -5.49459  1.245290  4.96518  5.8071400
## 2  2.666250 -1.928840  -6.92761   5.64964 -0.435540 -6.20389  0.5962470
## 3 -2.249120 -2.250060 -13.90950   3.60353  4.554800 -6.27148  0.0720639
## 4 20.923900 -2.710540   5.72659  -7.00088 -0.291436 -3.92261  5.1775100
## 5  0.893004  0.369954   8.99940 -17.95580 -7.204090  2.28608 16.4855000
## 6 -6.365360 -0.960018  10.47180   5.38179 -3.231840  2.33383  5.5777200
##        PC54     PC55      PC56     PC57     PC58      PC59       PC60     PC61
## 1   6.13875 -2.78973  6.917520  3.64115  4.61293  1.538960  4.0828200 -2.77608
## 2  -5.50915  1.25105  5.983920  4.43292 12.86350 -3.601760  1.0253100 -8.86598
## 3 -19.74750  3.54473 19.259600  6.42519 14.73150 -1.314380 -3.5901200 -8.25783
## 4   6.29928 12.13010 -0.814202 -5.85231  1.87534 22.005700  3.3382100 14.37180
## 5   8.48926 -5.37201 -7.356940 -3.75368  9.53201 -0.890918 -4.8544300 -7.65264
## 6   0.94694 -2.19742 -3.469640 -2.18860 -3.81891  2.898470  0.0619372 -7.63990
##       PC62       PC63      PC64      PC65      PC66      PC67      PC68
## 1 -7.49693   5.356770  1.414720   6.83071   1.72852 -2.965640 -2.183760
## 2 -2.24644  -0.672646  4.764640   1.77717  -4.59098 -4.130500 -0.918205
## 3  5.13453 -12.671400  5.495980   1.25184   7.66209 -9.235030  1.829610
## 4 -9.58395   8.561870 -5.654850 -15.71400 -11.09880 -3.875670  1.085360
## 5 -1.19577   2.130770 -9.225300  -2.20116  -2.03796 -0.723815 11.654900
## 6 -5.13762   5.392660 -0.120972  -0.18269  -6.98170  1.719570  2.028280
##        PC69      PC70      PC71       PC72     PC73     PC74       PC75
## 1  1.192830  3.766200  0.622794   5.065640 -4.80467  2.94089  -3.245190
## 2  1.086950  2.528770 -5.799790  -3.321690 -3.55036 -7.34971  -1.050960
## 3  0.767803  0.559735  2.385130  -2.868020  4.38976 10.59250   5.077560
## 4 -7.096480 -4.754940 -3.892830 -10.667200 -3.24553 -2.24196 -10.083300
## 5  9.692490  4.236640 -4.310380  -1.038800  5.48001 -2.55295   5.165410
## 6  3.991450  0.350207  2.870570  -0.573346  2.13657  1.53847  -0.196955
##         PC76       PC77     PC78      PC79     PC80      PC81      PC82
## 1 -0.0129554  0.0505564 12.68150  1.601140 -7.33282  2.788560  7.703260
## 2 -5.2097800  7.5951400 10.35960 -0.291698  2.16434  0.296412 -0.934301
## 3 -0.4583840  0.0416165  2.64185 -1.717290  3.63620  0.214529 -7.629770
## 4 -6.5937600  7.4637200 -6.78586  6.868990  7.03817 -2.634720  5.319950
## 5  2.9004200  6.7934100 -8.50726 -7.055580 11.34130 -3.577970  1.304170
## 6 -2.2571200 -3.4150700 -2.61168  2.072780 -2.94126  0.272714 -2.538370
##        PC83      PC84       PC85     PC86      PC87        PC88     PC89
## 1 -9.866020  -4.31495  0.6777720 -5.51019 -6.924420  1.31937000 -6.27135
## 2  0.436573  -4.93466  2.8169900 -1.25202 -2.881640 -3.90148000 -5.87395
## 3  1.010660  -3.70488 -2.0284700 -3.73672 -2.211940  0.00993857 -3.52806
## 4  4.236790 -12.09670  0.0380884 -1.77589  4.614420  3.89719000  3.17119
## 5  7.152270  -8.92949 -1.0369200  8.31742 -5.384580 10.74410000  1.63387
## 6 -2.946150   1.43230  0.8272230 -1.73772  0.658895  5.02566000 -2.06215
##        PC90       PC91     PC92      PC93      PC94     PC95        PC96
## 1  1.145260 -10.156500 -3.13334 -1.783820 -3.588170 -4.35788   7.1361700
## 2 -1.608050   2.109830 -2.38463 -1.962430  2.246590 -4.25180  -4.6616200
## 3  3.092150  -2.204930 -3.23484 -0.249128  7.762050  4.78274   2.0568400
## 4  0.407724  -0.451571  6.58912 -4.365150 -8.650070 -1.14843  -8.2902300
## 5  6.507940   4.727920  5.91863  4.992710 -0.342017 -1.48456 -18.1406000
## 6 -1.103690   5.434020  2.28493 -0.614166 -1.243970 -4.61006   0.0451897
##      PC97      PC98      PC99    PC100      PC101     PC102    PC103    PC104
## 1 8.78241 -1.912010 -3.323490  4.93145 -10.537200  3.716780 -3.45889 -4.66733
## 2 2.91688 -1.175150  5.053420  3.14203  -6.980030 -3.292910  2.03871  1.98330
## 3 3.87105  0.136814 -3.648920 -1.23847   7.505590  2.325360  3.80764  5.79557
## 4 6.20959 -1.575340 10.112200 -1.60873  -4.491970  0.520230 -4.67215 -1.39170
## 5 6.59724 -2.519420 -2.018210 -2.76594  -1.344410 -5.595290  3.15003  4.51741
## 6 5.23598 -3.400290  0.370388  1.21478  -0.811618  0.276597 -1.02941  1.94314
##         PC105      PC106     PC107     PC108      PC109    PC110     PC111
## 1   3.6164400  -3.621440  3.778800 -1.881240  0.4265100 -3.20713  0.943682
## 2   5.5188800  -2.850750  0.756583  3.277790  0.2377210  3.06538 -0.598388
## 3   2.8100700   9.656480  3.418960  0.493882 -1.0775100  3.91338 -4.807900
## 4   6.7866000  -0.581409  4.729670 -4.434640 -0.0155062 -1.20471 11.342700
## 5 -17.8347000 -14.263700  4.946380  4.077720  0.6006550 -8.46081  2.171890
## 6  -0.0202892   2.111370 -4.683750 -0.244502 -0.4694190  3.42689  2.672000
##       PC112     PC113      PC114    PC115     PC116     PC117     PC118
## 1  4.650600  0.568530  5.8164100 -8.45628 -1.037270 -0.779554  1.309430
## 2  1.985000  0.853602  2.5881100 -2.08731 -2.665640  0.679625 -4.251210
## 3 -0.506207 -3.699900 -4.1380100 -2.26648  6.692060 -1.139400  2.083100
## 4 -7.456190  0.423242  4.1909500  1.79892 -3.504240  0.202567  2.130380
## 5 -1.378830 -7.304760 -0.0255998  4.65554  7.095080  4.593360 -3.398440
## 6  0.633097  2.289390 -4.2970600 -2.22632  0.232146  3.973770  0.172575
##      PC119     PC120      PC121    PC122    PC123      PC124      PC125
## 1 -6.83216 -0.238747 -2.3355200 -6.26133 -9.03430  1.0777400   1.102470
## 2 -1.40493 -5.859750 -0.6081220 -6.28296 -3.55222 -0.3966300   1.668450
## 3  1.48528 -0.877686  0.6227140  6.41087  2.76417  0.8582210   1.009050
## 4 15.06220 -2.202070 -2.6446100 -2.16875  1.00702 11.1506000  -5.773470
## 5  3.61705  6.696050  0.0467786  1.19695  5.11210 11.1839000 -11.471500
## 6 -2.73901  0.487846  1.1426000  2.56311  2.86882 -0.0639249   0.696654
##       PC126    PC127    PC128    PC129    PC130     PC131      PC132
## 1 -3.264220  2.49648  2.71035 -8.39245  5.29770 -0.604751  0.0319948
## 2 -1.615450 -1.42929 -2.63967  4.49418  1.57293  1.928240 -2.9189900
## 3  6.307040  3.51173  3.87306  1.50706 -1.24510  1.392200  3.1599800
## 4 -1.118350 -2.48191  5.86796  3.93594  1.18478 -2.317890 -6.7275100
## 5 -0.769771 -6.14800 -6.02020  1.90101 -1.86867 -9.847430  1.9986900
## 6 -1.714860  1.23419 -4.09984  1.65576 -2.69937  2.469880  2.5896300
##         PC133     PC134     PC135     PC136     PC137    PC138     PC139
## 1 -0.00476579  5.396020  2.611530 -0.328788  0.175361 -4.91154 -2.362130
## 2 -4.04194000 -1.379200  0.117704  4.871980 -2.606570  1.76225 -3.363510
## 3  0.41838800  0.375410  4.759850  2.854450 -4.663850  2.03150  5.358980
## 4 -1.30709000  4.628990 -1.042330 -5.263840 -1.519600  3.19468  0.663090
## 5 11.21110000  1.863470 -1.545080  0.652939  1.649230 -3.61138 -0.299006
## 6 -0.21671300  0.743186  2.001340  1.809520 -0.341460 -5.32700  0.773553
##       PC140    PC141     PC142     PC143     PC144      PC145    PC146
## 1 -6.396970 -1.67544  2.895390  2.192780  4.327200  -5.081270 -3.47263
## 2  3.239580 -5.49091  5.059210 -8.234230 -0.621355   0.906740 -1.06434
## 3  4.073650  3.54339 -1.613140  7.888170  5.158350  -0.368051 -1.46049
## 4  1.624300 -0.96298  5.114330  0.541375 -9.324060   4.952700 -4.09967
## 5  1.563700  1.84025  2.954910 -5.051490 -8.961600 -10.829700  4.28655
## 6  0.315918  2.70990  0.907912 -1.555190 -0.523389   0.646551  1.10398
##        PC147     PC148      PC149    PC150     PC151    PC152      PC153
## 1 -7.4019400 -0.085435  2.1186700  2.42665  0.850691  6.17052  4.9082400
## 2 -6.1629700  1.479820 -6.0286100  5.07042 -1.528910  1.71706  1.7883000
## 3 -0.6345720 -0.779144  0.0375354 -4.07633  1.652520 -1.62459 -1.2402500
## 4 -1.2463100  2.824750  5.6019900  2.88964  0.326503  3.09846  5.8434700
## 5  1.9688900  5.211240 -0.1552900 -3.11523 -3.405690 -7.51049 -9.3838200
## 6  0.0573525  3.712280 -1.5083900  1.30896  0.358595 -3.18367 -0.0334206
##      PC154     PC155     PC156    PC157     PC158      PC159    PC160     PC161
## 1 -1.69769 12.751200 -1.343380  7.72398 -5.795900 -10.400700 -3.71151 -4.729100
## 2 -3.39364  6.417370 -5.319130  1.00849  2.560790   7.208550 -7.04635  4.689130
## 3 -1.40850  0.903666 -7.892730  3.00922 -3.642500   4.311480  7.35473 -0.321652
## 4 -2.54236 -1.151910  2.195310  3.45469  0.929473  -2.629330  7.35077 -4.724400
## 5  1.26893 -1.233260  1.084270 -7.30525  6.647590  -0.758297 -8.51190 -0.609865
## 6 -4.09577 -0.159922  0.233928  1.69582  4.161510   5.042550  2.35631 -1.284000
##       PC162     PC163      PC164      PC165     PC166    PC167    PC168
## 1 -5.447390  2.201770 -0.0248024  -9.807610 -8.331380  2.93591  1.68750
## 2  2.198770 -3.485170  3.1277400 -10.353700  2.718670  2.76854 -3.49151
## 3  1.414800  1.948990 -1.3431700   1.076840  6.111980 -5.86223  4.33017
## 4 -1.014150 -4.131350  5.3142300  -2.862470 -6.931190  6.11905  1.52432
## 5 -4.819830  0.327660 -8.3835400   9.995780 -0.727753  2.26051 -4.25010
## 6  0.864983 -0.301886  0.9082320  -0.218509  4.348150 -1.98231  1.49800
##      PC169     PC170     PC171     PC172     PC173    PC174     PC175     PC176
## 1 -3.30902  6.371800  7.503040 -4.195200 11.637700  6.88266  0.837633  8.369330
## 2  4.53768  3.450040 -2.112630 -2.179480  0.109162 -1.93880 -5.196740 -4.097020
## 3  2.69640 -2.306310  6.697740  4.062030 -1.173660  4.34290 -2.586690 -4.394000
## 4 -2.10771 -3.178930 -3.468040 -2.929220 -1.594560  5.58657 -0.110492  0.460886
## 5 -7.87725 -0.359289 -8.879580 -0.999904 10.144000 -6.52697 -0.201275  1.116080
## 6 -0.17832 -2.886290  0.131796 -2.520810 -3.283840  0.51122 -0.870987  0.438179
##       PC177     PC178     PC179     PC180     PC181    PC182     PC183    PC184
## 1 -10.78100  5.979880  0.306682 -2.936590  4.932730  1.75031 -0.583489  1.14002
## 2   4.27266  0.121533 -7.721210  2.301580 -0.538046  2.58522  2.494530  1.62119
## 3   3.18118 -1.431240  1.373830  5.654720  2.734180 -4.02361 -0.945256 -1.33957
## 4  -1.63536  9.507150 -2.250480  0.297913 -4.907280  1.42801  1.772740 -6.59006
## 5  -5.69393  5.592520  2.322930 -5.613610  7.529220 -4.21566  3.385570  4.78116
## 6   2.47028  4.040490  0.961758 -1.334240  6.868490  2.88158 -1.996070 -3.50053
##       PC185     PC186      PC187     PC188     PC189      PC190     PC191
## 1  3.133300  3.199240 -4.3345700  0.682097  5.680720  1.5806700 -0.962661
## 2  2.885220 -0.103630 -4.1233500  4.916050  1.006000 10.3081000  3.801520
## 3 -6.638540  3.729430  1.1629700 -0.875242 -8.670040 -0.0632216 -0.411967
## 4  0.178439  0.341242 -0.7702740  5.564390 -5.282250 -3.1110000  0.207438
## 5 -1.714870 -4.521330  0.8891020 -7.905830 -0.191294 -7.9036900  0.200192
## 6  0.901250 -1.863460 -0.0184047  1.450870 -0.657083  2.2024500  0.887188
##       PC192    PC193     PC194     PC195    PC196    PC197     PC198     PC199
## 1  7.061020 -8.33520 -0.302678  0.647908 -3.35035 -8.25312  8.361610 -6.014980
## 2 -5.814570  4.67226  2.321640 -6.180880 -1.99485 -6.17516  1.487010 -9.180200
## 3  1.495610  3.27276 -8.704910  5.037340  9.93779 -2.37767  2.042930  2.851730
## 4  0.794471 -8.16763  0.454237  6.753870 -1.08893  5.28204  5.740390 -1.778770
## 5  2.372750 -1.10369 13.818600  5.244370  2.67536  2.80470 -5.927800  7.338330
## 6 -2.665490  6.94193 -0.591069 -5.780620 -1.84512 -2.18502  0.484737  0.690277
##       PC200     PC201     PC202     PC203    PC204    PC205     PC206    PC207
## 1 -6.931610 -4.586820 -4.564240  2.126820  3.10813  3.53048  8.388060  7.67917
## 2 -1.576450  1.869260 -0.937083  1.492150 -5.71744  6.39860  4.218390 -1.81072
## 3  1.233270 -6.941700 -3.707590 -6.891040 -2.04340 -7.37748 -2.193030 -3.10757
## 4  1.209880 -2.477590 -1.762960  0.835563 -0.21097  1.39658  0.392192  2.91430
## 5  4.525660  0.686656  0.375180  4.172130 -1.76877 -3.61801  6.061070 -5.40620
## 6  0.097467 -3.098260  5.006010 -0.805526 -2.75955 -3.50576  0.277770 -1.02450
##      PC208     PC209     PC210     PC211     PC212      PC213    PC214    PC215
## 1 -7.96133  1.617330 -2.793220  0.711674  4.924160  9.7029700 -5.84028 -2.61507
## 2  9.81179  7.525790  5.887680  4.568250  3.662950  6.1630100  5.38136  0.53293
## 3 -6.73944  0.461928  0.121859 -2.041100 -2.615150 -0.0967062  2.54300 -2.28002
## 4 -4.03631  2.789960 -2.504020  8.738590  5.403160 -3.6337300  7.38863  8.21427
## 5  8.72548 -6.731350 -8.032340 -1.955300 -0.959485 -6.8370900 -8.42257 -7.74669
## 6  1.84056  1.573760  3.382260 -0.328414 -1.186070  0.8472400  2.57702  1.19381
##      PC216     PC217       PC218     PC219      PC220     PC221     PC222
## 1 -4.05649 -1.400540  -5.2599400  11.66840 -11.990400  -2.59632 -4.280710
## 2  7.24621 -0.947783   2.4520900  -3.50296  -0.797493 -14.54210 -8.040200
## 3 -5.33901  3.356270  -0.0312963 -11.05110   2.600810   4.51269  0.935934
## 4  6.30084 -2.807530   7.0508700   5.31521  -6.573740   2.61616  1.336970
## 5 13.14020 -1.417840 -13.5184000  -1.07235  -4.257080   4.63861 -0.014990
## 6  1.13016 -5.947810   2.5275500  -0.87560   2.747300   5.81640  2.835400
##       PC223     PC224      PC225    PC226     PC227    PC228       PC229
## 1   1.94251  5.891960  -0.449854  3.92675  2.794570 11.87960  6.13497000
## 2  -1.11754  3.872850   8.925680 -8.55261  9.333840 -1.28196 10.33650000
## 3  11.25670  1.421700 -11.232400  3.01496  6.800130  4.60103 -1.33927000
## 4  -2.83960  4.244050   6.280090  5.59610  1.874810 -1.72194 -5.03377000
## 5 -12.46080 -5.103430   3.664240 -7.43294 -6.226510  1.61341 -1.64098000
## 6  -1.21133  0.921029  -1.758570 -1.27298 -0.214518 -2.59506 -0.00348635
##       PC230     PC231    PC232      PC233     PC234      PC235    PC236
## 1  -2.80267  -7.83308  8.85412  9.1812000 -1.254760  6.2623100 16.05450
## 2  19.19680 -11.41270 -1.09810 -5.5716400 -1.405510  3.7062700 -7.10752
## 3  -8.82161   4.70603  7.07786 -0.0711724  1.705160 -4.2771400 -3.51692
## 4   9.41425   9.06451  7.58435 -5.4198800 -1.647100  5.5584300  3.19031
## 5 -10.67500  -3.73103  4.58325 11.7961000 -6.267800 -0.0855602 -6.33932
## 6  -2.50674   2.88390  5.19936  0.1536130 -0.485274  0.0751213 -2.45290
##        PC237     PC238     PC239     PC240     PC241     PC242     PC243
## 1 -1.8641200 -0.431303 -7.267520 -9.430690 -9.013110 10.631200 -1.764860
## 2  1.6818200 -3.470010  0.157354 -1.640930 -3.601660 -0.992202 -1.892720
## 3 -0.0640565 -5.376040 -7.815560 -7.395830 -2.348790  1.930630  0.963152
## 4  7.0473000 -5.052070 -1.325840 -1.970020 -0.931347 -5.650630  6.926360
## 5 11.8571000  0.748320 -6.559410  1.255750  2.173390  1.652140  6.927640
## 6 -2.9325900 -2.290630 -7.875850 -0.830932 -4.696940  3.204330  0.256871
##        PC244    PC245    PC246      PC247    PC248     PC249     PC250
## 1 -11.902600  1.91390 -1.37661  15.236400  4.64691 -0.287894   2.80942
## 2  -0.269483  8.59781 -7.95790 -24.016300 -2.45026 -6.486090 -10.06590
## 3   6.695980 -5.20846 -4.87823  -3.626420 -3.16881  6.738720   5.11283
## 4  12.496000 -4.74525  8.57286  -3.619320 -8.54000  7.208910  -7.90051
## 5  -0.745293  2.93907  1.85637  -1.097090  1.36290 -1.547450  -2.38768
## 6   3.414920 -4.00801 -3.18730  -0.303565 -3.15522  3.801380  -2.45670
##       PC251      PC252     PC253      PC254      PC255     PC256     PC257
## 1  18.34910   6.021330  -1.75432  14.983700   4.642250   8.69091 -0.669913
## 2 -16.37770 -12.183600  12.46670  -5.029640 -12.460200 -20.54960  1.190570
## 3  -2.60577   6.802720   0.21410   6.770780  -4.476870   3.22644 -1.509920
## 4  -9.78929  15.557500 -15.83700 -12.138300  -3.103950   7.62455 -3.339510
## 5  -2.49837   0.984903   3.65400   3.357680   0.206083  -4.79476  3.970740
## 6  -3.27229  -0.948397   4.13363   0.582239  -0.279581   3.39691  0.180836
##      PC258      PC259     PC260    PC261      PC262    PC263     PC264
## 1 -3.30789 -2.3865600  3.374870 2.667370   4.798790  0.62135  2.703780
## 2  2.79875  0.1673550 -7.790910 2.164710  -0.518439 -4.98390 -0.884782
## 3  5.23692 -0.0362197 -4.863860 4.267020  -4.548640  5.31758 -1.741960
## 4 -2.43300 -1.6650700 15.606300 2.924460 -11.917900 -0.21808  1.082820
## 5  5.09175  1.2463300 -4.538360 1.354460  -1.329800 -1.58370 -2.255690
## 6 -1.13881 -1.3954600  0.408745 0.716955  -0.532828 -1.33913  0.801797
##       PC265      PC266     PC267      PC268     PC269     PC270       PC271
## 1  1.649910 -0.0200100  2.177510 -3.3099700  1.992740  1.927030   1.1373600
## 2  5.105700  1.8963400 -7.360250  0.7860970  1.227000 -4.218570   3.3524400
## 3 -6.865580  6.7687000 12.324800  0.4335670  0.266546  1.411440 -10.1173000
## 4 -3.427430 -5.1275700  1.426160  0.0933183 -2.021640  4.516900  -0.0350495
## 5  2.392090  0.0928426  2.462270 -0.1195320 -0.392513  1.111330  -0.7518710
## 6 -0.170483  1.4088800 -0.299909 -0.7428170  2.715280 -0.955953   0.7243900
##       PC272    PC273     PC274     PC275     PC276     PC277     PC278
## 1 -1.275740 -2.48432   5.20039 -2.861720  1.715530 -1.039670 -3.764230
## 2 -0.739302  1.69314   7.49680 -3.132360  1.161560  0.920478 -3.508810
## 3 10.122500 11.51660 -22.89080  4.621690 -2.174330 -8.311050 13.397500
## 4  0.863329  2.40725  -1.75043  2.406890 -0.203735 -1.697610  3.169310
## 5  3.764930  6.72967  -4.72925  0.524955 -1.068460 -1.648630  1.862390
## 6 -0.965874 -1.33428   1.68967  0.438172 -0.258886 -1.323900 -0.575133
##       PC279     PC280      PC281     PC282      PC283      PC284     PC285
## 1  3.872290  2.653390  0.9812440  1.369250  0.9536940 -1.4131500  1.296170
## 2  0.762351 -1.768900  0.0420279  0.807682 -0.3029830 -0.7922200  0.970225
## 3 -7.084490 -0.756776  1.0913600 -1.590870 -0.9827410  4.7706700 -9.088740
## 4 -3.343070 -1.896350  0.4949320 -1.056020  0.3940330  1.2254800 -0.808598
## 5 -0.774658  1.337520 -3.7078600  1.433060 -0.0421016  0.0227108 -1.267880
## 6 -1.375490 -0.560864  2.6851600 -2.156240  0.7733240  0.0510981  0.533246
##       PC286     PC287     PC288     PC289      PC290    PC291      PC292
## 1  1.408590 -0.692493 -1.692940 -0.508341  1.3155600  1.09893 -0.0469909
## 2  1.358240 -1.295590 -0.961377 -0.888247 -2.6110500 -1.79874 -0.3693740
## 3 -0.883345  0.656211  1.236220 -1.339480  0.9317510  2.41135  1.4819400
## 4  0.618072  2.211330  0.284783  0.238092  0.0885846 -1.28587  1.2646700
## 5  0.156034 -0.429487  0.074908  1.610000  0.3284930  1.19665 -1.4438900
## 6  0.568228  0.591674 -1.437410  0.723332 -0.5755580  0.31937 -1.0081300
##        PC293     PC294      PC295      PC296       PC297     PC298     PC299
## 1 -0.4883560  0.182678 -0.1851750 -0.2663890 -0.00320062  0.205618 -0.698171
## 2 -0.6974270  1.594630  0.5744470 -0.6161530  0.09362940 -1.101430  1.076890
## 3  2.9971100 -2.384980  0.0578765 -1.0670000 -0.27161200  0.949054 -0.522237
## 4 -0.0437913 -1.023070 -0.6816160  0.5671030  0.20787300 -0.917020 -1.890380
## 5 -0.9559760  1.207940  0.2997290  0.9107150  0.27095500  0.473866  1.609970
## 6 -1.4407400 -0.192525  1.6442300 -0.0874211  1.08679000  0.187795 -0.667890
##        PC300      PC301     PC302     PC303      PC304      PC305      PC306
## 1  0.4137050  0.5621150 -0.485694  0.257417 -0.2055540  0.1367620 -0.2688380
## 2 -0.6001580  0.6394160 -0.149942 -0.583650  1.2917800 -0.5095320  0.5222870
## 3  0.5338530 -0.3134630  1.039260 -0.670045  0.2227220  0.4046490 -0.9773220
## 4  0.0682799 -0.0738791 -0.316639  0.165937  0.2978430  0.0403902 -0.0669559
## 5 -0.1535990 -0.6139670 -0.632838  0.220250 -0.0752629 -0.7223190  0.2742670
## 6  0.8562820 -1.9606300 -0.304926  0.285378  0.0572905  0.7771180 -4.8020200
##       PC307     PC308     PC309     PC310     PC311     PC312      PC313
## 1 -0.258987 -0.179910  0.336991 -0.382592  0.025427 -0.147654  0.0352362
## 2 -0.765504 -1.110230 -1.812130 -0.340148 -0.294537 -0.184055 -0.0485052
## 3  0.308184  0.203520 -0.519026 -0.142370 -1.055930  0.441421 -0.1585510
## 4  0.382680  0.193674  0.826433  0.736911  0.221377  0.174860  0.1945740
## 5  0.269982  0.224447 -0.584473 -0.540086  0.811890 -0.276770 -0.1835570
## 6 -2.782440 10.372100 31.352700 -0.486031  7.850800 -0.217323 -0.3897350
##         PC314 Individual Pop_City Location Latitude Longitude Continent Year
## 1 1.29438e-06        801   Durres  Albania 41.29704  19.50373    Europe 2018
## 2 1.29438e-06        802   Durres  Albania 41.29704  19.50373    Europe 2018
## 3 1.29438e-06        803   Durres  Albania 41.29704  19.50373    Europe 2018
## 4 1.29438e-06        804   Durres  Albania 41.29704  19.50373    Europe 2018
## 5 1.29438e-06        805   Durres  Albania 41.29704  19.50373    Europe 2018
## 6 1.29438e-06        806   Durres  Albania 41.29704  19.50373    Europe 2018
##            Region   Subregion order order2 orderold
## 1 Southern Europe East Europe    33     25       25
## 2 Southern Europe East Europe    33     25       25
## 3 Southern Europe East Europe    33     25       25
## 4 Southern Europe East Europe    33     25       25
## 5 Southern Europe East Europe    33     25       25
## 6 Southern Europe East Europe    33     25       25

6.2.2 Create PCA plots for euro_native2_albania_croatia_greece_US

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_albania_croatia_greece_US_all_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

6.2.3 PC1 and PC3 for Albania, Croatia, Greece, US

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_albania_croatia_greece_US_all_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)

6.3 PCA for Eastern Europe SNP Set 3 (MAF 1%, R2<0.01 snp set)

6.3.1 Import the data for SNP Set 3 subset for native_far_east_euro2

genotype <- here(
   "euro_global/output/neuroadmixture/native_far_east_euro2.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22537
##   column count: 347
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22537
##   Character matrix gt cols: 347
##   skip: 0
##   nrows: 22537
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## ALU ARM BEN CAM CHA GEL GES HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KER KLP 
##  12  10  12  12  12   2  12  12   4   7  12  11   4   2   6  12  11   6  12   4 
## KRA KUN LAM MAT OKI QNC RAR SEV SOC SON SSK SUF SUU TAI TIK UTS YUN 
##  12   4   9  12  12  11  12  12  12   3  12   6   6   7  12  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   338
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   338
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.removed file, for more informations.
## 
## 
##  - number of detected individuals:   338
##  - number of detected loci:      22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.lfmm"

PCA for MAF 1% r2<0.01 snp set of euro_native2_albania_croatia_greece_US

nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          338
##         -L (number of loci)                 22537
##         -K (number of principal components) 338
##         -x (genotype file)                  /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.lfmm
##         -a (eigenvalue file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.pca/native_far_east_euro2.eigenvalues
##         -e (eigenvector file)               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.pca/native_far_east_euro2.eigenvectors
##         -d (standard deviation file)        /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.pca/native_far_east_euro2.sdev
##         -p (projection file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.pca/native_far_east_euro2.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/ 
## pca result directory:            native_far_east_euro2.pca/ 
## input file:                      native_far_east_euro2.lfmm 
## eigenvalue file:                 native_far_east_euro2.eigenvalues 
## eigenvector file:                native_far_east_euro2.eigenvectors 
## standard deviation file:         native_far_east_euro2.sdev 
## projection file:                 native_far_east_euro2.projections 
## pcaProject file:                   native_far_east_euro2.pcaProject 
## number of individuals:           338 
## number of loci:                  22537 
## number of principal components:  338 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          338
##         -i (input file)                     /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.pca/native_far_east_euro2.eigenvalues
##         -o (output file)                    /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.pca/native_far_east_euro2.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)

Get values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_far_east_euro.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_far_east_euro.rds"))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_far_east_euro.rds"))
head(sampling_loc)
##             Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Sevastopol, Crimea Ukraine 44.54125  33.51400    Europe          SEV 2021
## 2            Alushta Ukraine 44.68289  34.40368    Europe          ALU 2021
## 3      Kerch, Crimea Ukraine 45.35246  36.47015    Europe          KER 2021
## 4          Krasnodar  Russia 44.95504  39.02782    Europe          KRA 2017
## 5              Sochi  Russia 43.60042  39.74533    Europe          SOC 2021
## 6         Tikhoretsk  Russia 45.85460  40.12560    Europe          TIK 2021
##           Region   Subregion order order2 orderold
## 1 Eastern Europe East Europe    42     34       34
## 2 Eastern Europe East Europe    43     35       35
## 3 Eastern Europe East Europe    44     36       36
## 4 Eastern Europe East Europe    45     37       37
## 5 Eastern Europe East Europe    46     38       38
## 6 Eastern Europe East Europe    47     39       39

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 37 Levels: ALU ARM BEN CAM CHA GEL GES HAI HAN HOC HUN INJ INW JAF KAC ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 37

Check the regions

unique(sampling_loc$Region)
## [1] "Eastern Europe" "East Asia"      "South Asia"     "Southeast Asia"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population     PC1        PC2       PC3       PC4       PC5      PC6
## 1        ALU 20.4856  -0.305611 -2.550140  -7.28735 -24.20470  6.38975
## 2        ALU 25.3823   4.337260 -0.388604  -5.43395 -20.36060  8.95141
## 3        ALU 20.0237   2.626390 -0.786929 -10.38780 -29.93820  8.93216
## 4        ALU 25.8712  -3.342340 -1.110070  -2.41729   2.61012 -3.53675
## 5        ALU 30.0675 -11.041700  0.724509  -4.64010  -5.47538  9.77449
## 6        ALU 27.3772  -2.688700  0.959256  -1.42843  -1.32056 -4.38881
##         PC7     PC8       PC9    PC10     PC11      PC12      PC13      PC14
## 1 -15.04960 6.67978  0.151167 38.2655 -6.15882 -8.821650 -24.86590  -3.87666
## 2 -12.67250 4.17658 -0.154637 31.6130 -3.83496 -1.228910  -8.24218   3.89523
## 3 -19.74160 8.25617  1.521170 44.7494 -1.77796 -3.928850 -15.47780   4.22217
## 4   6.09960 4.60456  0.107691 31.9499  6.25326  0.350372   5.77296  12.58190
## 5  -7.78187 2.21053  0.984531 24.2537  9.42648 -2.278880   1.63767 -27.95230
## 6   2.18008 1.82266  0.788612 27.3708 -2.65027 -0.994772  -4.55721 -17.14900
##       PC15      PC16      PC17      PC18       PC19     PC20     PC21      PC22
## 1 -6.17760 14.824700  -9.01089   7.14638  13.722300  8.52422  7.00553  -3.77419
## 2 -4.40492 -0.229220  -9.98679  -5.77431  -4.806080 17.98710 19.70860  -9.18956
## 3 -6.60865  5.091250 -17.67700   2.63932   0.991916 10.78830 11.84630 -11.81950
## 4 -5.58323 -3.184320 -18.61840   1.67256  -4.746580 14.92690 17.16900  -7.24129
## 5 -2.96934 -8.900110  21.58010 -29.60820 -15.574500  1.08231 -5.34274   8.00136
## 6 -1.06223 -0.968945  13.54250 -20.00430 -14.981500  5.18956  1.51858  10.53780
##        PC23      PC24        PC25      PC26     PC27      PC28        PC29
## 1  -1.45222 -3.332710  -4.2012100  -9.21728  4.76234 -0.891124   5.2582300
## 2 -11.60540 15.355200  -4.0634100  -1.13517  1.33338 14.433700  -3.7651200
## 3  -9.34223  8.623670  -3.7577200  -6.51587  5.63725  6.830240   0.0136676
## 4  -6.96422  9.800260 -10.1645000 -13.08660 12.53640 -2.196250  -8.5672800
## 5   7.71842 -9.136000   0.0228503  -9.11044 10.12690 -2.903270   1.2629200
## 6   3.21686  0.968363  -5.4992600  -2.00759  8.71201 -3.956690 -10.9915000
##        PC30       PC31     PC32      PC33      PC34      PC35       PC36
## 1 -11.54270   0.105329 -5.48342 -6.168920 -2.445510   2.38026 -18.168900
## 2  -5.08792  -3.830590 -3.33753  4.723430 -2.689220   4.57595   6.566870
## 3 -10.27700   1.422650 -7.00011 -6.931120 -5.608190  11.83910  -6.569350
## 4  -4.63713  13.878400 -7.89427 -0.547714 -4.742410   7.84252  -6.930040
## 5 -17.04900 -11.385000  1.81386  2.792030  3.853960 -13.36160   3.102920
## 6  -1.97113  11.571500  3.23825  1.899260 -0.426233  -4.60796   0.468608
##       PC37     PC38      PC39       PC40      PC41      PC42        PC43
## 1 0.638639  5.96370  1.638110  -2.702090  26.01240  2.872620  -6.5748000
## 2 2.467160 -0.13457  2.271750  -0.695783 -25.92740  3.957250   9.0044100
## 3 2.511160 -1.24808  0.509379   6.589810  -1.63697  4.799590  -0.0233594
## 4 2.121700 -1.87570  1.823560   2.841060   2.42338  0.418901 -11.3373000
## 5 2.344010 -3.17460  7.938730   0.777851  -1.85059 -5.698850  14.5095000
## 6 5.034230  4.93773 10.499000 -15.493800   3.24924  5.950570  14.8428000
##        PC44       PC45     PC46     PC47      PC48    PC49     PC50        PC51
## 1  16.89850   1.788520  5.62376  8.75830  7.335620 3.93678 -7.88436 -0.00527219
## 2 -13.97630   0.418266 -4.15255 -5.67872 -9.702130 5.36903  8.58153  6.94867000
## 3   2.98640  -1.076080  1.37242 -1.21517 -0.602785 2.66775 -3.25599  8.65494000
## 4   5.15674   3.924090  4.79590  4.03105 10.211600 1.34531 -9.18752  2.72248000
## 5  -1.09257  -2.114320 -2.75617  3.44852  0.473827 0.79640 -6.17943  3.01706000
## 6  -1.87532 -11.292000 -3.75054  6.02935  6.197380 3.29759 -3.60343 -0.55713100
##        PC52     PC53       PC54      PC55      PC56      PC57       PC58
## 1  2.340900 12.99150   8.848370  -1.60283 -20.37070 -0.376401  11.458700
## 2 -4.383410 -9.85162 -11.181800   3.11030  26.42630 -1.008880 -18.740200
## 3 -6.471730 -0.49460  -0.560385   4.99467  -1.27009  1.606140   0.764857
## 4 -1.318270 -3.21074   0.506548   1.69150  -9.63551 -1.613160  -6.314400
## 5  0.859159  5.97654   2.418050  -2.52355  -5.83637  3.143980   7.944640
## 6  4.578160  1.21431   6.930270 -10.54760   3.27519  8.432160   9.756380
##        PC59     PC60      PC61     PC62     PC63      PC64      PC65
## 1 12.856000 -3.76451  3.046620  3.70025 -1.03461  1.963410 -4.801280
## 2 -4.022410 11.09840 -2.703890 -2.78526  6.80457 -1.793960  1.971330
## 3 -6.825280  1.63018  3.468500 -2.68255 -3.66434  7.675350 -1.717530
## 4  2.388530 -8.23049  1.187890 13.60900 -8.75254 -7.880010  8.460010
## 5  7.744390  3.05013  1.542050 -6.87117  9.60592 -1.401980 -7.486110
## 6 -0.982531 -1.11583  0.763478 10.81760 16.40710 -0.705895 -0.986492
##          PC66        PC67      PC68       PC69      PC70      PC71      PC72
## 1   1.1013800   6.9687800  0.677785 -14.196900  1.932510 -2.730120  0.852579
## 2   0.0759335 -13.2050000  1.579390  17.570800  0.696840 -5.388440  1.549200
## 3   9.6270600  -0.2945390 -3.152270  -0.185804 -1.788710  2.772100 -1.140440
## 4 -19.0307000   0.0222812 13.416700  -9.458960  3.892460 -5.184720 -5.684060
## 5  -6.0241000   4.3416200  2.450220   0.687401  0.928587 -0.543575 -1.444860
## 6 -13.5157000   2.9042000 19.716400  -6.014460 -3.473160 -3.559860 -1.434370
##         PC73      PC74      PC75     PC76      PC77     PC78      PC79     PC80
## 1 -0.0272235  0.442396 -0.734162  1.60670   8.34790 -1.57096 -2.325790 -2.66391
## 2  3.7382100  3.371150  1.717450  2.09539 -12.74000 -1.72072  4.329370 -2.44019
## 3 -3.2626400 -3.607260 -3.840580 -2.75547   6.71369  1.34046  0.637695  2.15170
## 4  0.7048390 -3.747130  1.718730  4.18192  -3.58733  4.14767 -4.507960 -5.19651
## 5 -5.9070400 -4.062740 -4.659250 -1.10407   3.31766  2.18593 -6.823190 -4.00661
## 6  6.0514700 -4.166360 -4.344890 -7.77497   4.97026 -1.38811 -3.049480  1.18698
##         PC81     PC82      PC83      PC84     PC85     PC86      PC87     PC88
## 1  -6.443170  1.83956  -4.31690  3.162300  3.88469  2.71503   1.53758  3.58843
## 2  -0.199465  3.38909  -2.99189  3.244740 -1.58022  3.20976   1.88266  6.84259
## 3  -1.699530 -3.79007   4.17300  7.029760  1.97388 -3.85027   4.20135 -6.80635
## 4 -18.767000  5.48438  -9.84728  5.582990  2.23714  4.16893 -10.36050  3.19901
## 5   9.240860 -1.70624   3.38226  2.525090 -3.98731  2.47006   4.15028 -8.34635
## 6  -6.014610 -8.22647 -13.41560 -0.858546 -3.22604  8.85529 -10.47330 -4.72417
##          PC89      PC90       PC91      PC92      PC93      PC94      PC95
## 1   2.9237700 -1.174850  -1.771270  2.770570  0.475811  2.639810  0.441089
## 2  -1.8059400  4.373540 -12.713400  0.889759 -1.552400 -0.109804  4.667540
## 3 -10.5783000 -6.958860  10.898300 -1.400270  0.583719 -0.996475 -1.975080
## 4   2.2166400 -0.260060  -6.463720  7.555240 -4.397560  0.371583 -2.671060
## 5   0.2608440 -5.346590   0.409559  1.481070 -2.083320  4.788510  3.257690
## 6   0.0814059 -0.702711  -8.225600  2.683760 -8.159570  2.908500  1.401030
##        PC96      PC97      PC98       PC99     PC100      PC101     PC102
## 1  1.871160 -5.345640  1.123030  3.8324100  4.429460   3.851840 -0.684432
## 2 -0.947024 -3.227110 -0.485948  1.7780700 -0.494879   2.156390  3.148300
## 3  0.877382  2.141940 -3.737950  1.3161900 -3.770210   1.886970 -1.763700
## 4  1.246670 -0.479822  0.900249 -7.1356500  1.164820   1.714810  1.542460
## 5 -4.474370  3.739800 -0.491935  0.0339354  1.290120   0.387799 -0.703330
## 6 -1.487730  8.175420  6.974200 -6.9411800  1.599830 -12.971100  5.138090
##      PC103    PC104     PC105      PC106    PC107    PC108     PC109     PC110
## 1  2.25127  1.52242  1.906900  0.3036580  2.60092 -0.43695  5.143180  0.670442
## 2  1.61509  2.50924 -0.372587  1.5369000 -5.25799  1.23955  2.456150  4.775800
## 3 -5.46988 -1.75623  1.980140  0.9970790  1.69452 -8.97879  0.148475 -3.022270
## 4 -3.20834 -2.54959  0.729329  0.1442540  1.59751  4.16221 -3.662630  3.966310
## 5 -1.75826 -3.16704  3.781260 -0.2276980  2.71692 -2.47770  3.644330  3.318670
## 6 -0.15957 -1.73146  4.174510 -0.0224256  2.94150  5.84921  1.235730 -1.103880
##       PC111     PC112     PC113     PC114     PC115     PC116      PC117
## 1  0.233544  1.491500 -2.291850   2.75724  4.884290 -5.246880  -0.804267
## 2 -2.612870  1.070640  3.629040  -1.37871 -4.767060 -0.517355  -5.131480
## 3  1.307540  0.255413 -0.929420   3.97817  1.340020  3.841250   2.142500
## 4 -5.926810 -4.381290  0.525513  -4.22072  8.663280 -0.735895 -11.334500
## 5  6.378110  1.025500  4.854340   3.27895 -3.870160 -5.524730   8.700350
## 6 -1.616950 -5.279780 -2.855310 -11.38320 -0.243223 -3.058180  -7.078780
##      PC118     PC119     PC120     PC121     PC122     PC123    PC124    PC125
## 1 -2.10701  3.333620 -2.215670  1.317990 -1.266080  0.190247 -3.89487 -2.80235
## 2 -4.94447  5.394510 -0.399604  0.336776 -0.395207  3.196520 -4.65527  2.98828
## 3 11.90080 -3.439220  3.095730 -0.587585  2.156610  1.273720 -3.38204 -2.57005
## 4 -5.42009  3.225020 -5.246600  3.407710 -5.493770 -5.074700  3.36216  3.98967
## 5  3.73444 -5.657550  1.020350  1.996580  1.535220  1.972160 -2.88932 -8.43176
## 6 -2.36509  0.662008 -8.206090  4.289750  2.361380 -5.375370  3.63467  5.10431
##       PC126     PC127     PC128    PC129     PC130     PC131    PC132     PC133
## 1  3.471030  3.016630 -3.925310 -2.14323 -2.687760 -0.433259  3.58807  1.525600
## 2 -4.326420  3.231160 -0.718613 -1.38307 -0.170312 -0.374122 -4.82892  2.103600
## 3 -3.125980 -0.896061 -1.163370  1.41482  0.493408  1.832260  3.01200  0.647162
## 4  0.802205 -4.291240 -5.576690  0.97485  1.256250 -1.065740  7.41296  0.175257
## 5 -4.921190  0.742270 -5.490360 -1.96823  0.305370  3.665320 -5.13845 -2.557740
## 6 -6.691420  3.812800 -0.800976  5.76665 -2.657830  2.180710  1.69836 -8.661120
##       PC134    PC135     PC136     PC137    PC138    PC139    PC140     PC141
## 1  4.204800  3.50766 -3.622220 -3.061660  4.27550 -1.23560 -5.24803 -0.523196
## 2  2.249160  7.33844 -0.255614 -5.768620  5.60840 -1.66117 -2.77480  0.705456
## 3 -0.632887 -2.77590 -2.518710  0.322666 -5.57880  5.30426  3.46215 -3.504200
## 4 -8.758330  1.69746  4.803670  0.639076  6.57224 -2.78041 -9.07109  1.242930
## 5 -3.477590 -3.41453 -6.135030 -3.890220  1.14685  3.50326  3.22089 -4.223740
## 6  0.188458  6.80750  4.682540 -2.105050 -4.82354  1.69971  3.74137 -2.142900
##       PC142     PC143    PC144     PC145      PC146     PC147     PC148
## 1 -0.173200 -0.775339  4.46472 -1.424750 -2.2181000  2.490170  1.419960
## 2  5.600500 -6.012330  5.26934 -3.267700  0.0852875  1.049250 -1.768620
## 3  0.320618  1.420100 -7.04487  0.232887  3.3146700 -3.616300  1.685780
## 4 -1.607800  0.241212 -4.46744  2.599630  0.9280960  1.541200 -0.483154
## 5  0.563423  4.325520  1.73424 -4.996600 -7.8645100 -0.139164  2.314240
## 6  6.806240  4.354110  2.02088  2.259160  2.8227200 -7.372750  0.467339
##       PC149     PC150      PC151     PC152      PC153    PC154     PC155
## 1 -0.751374 -0.314859  4.9189600  2.832320  2.1644200 -0.48708  5.633100
## 2 -3.457890  1.122780  4.4779900  3.029150  0.5865970  2.21855  5.408460
## 3  1.304000 -0.849583 -5.3868500 -3.694100 -1.4307600  0.99073 -2.478430
## 4 -0.648384  1.500540 -0.0454821 -0.899994 -0.9112840  3.99710  2.946110
## 5  4.085970 -1.058240  2.1960000  1.563200 -0.8587100 -2.03031 -0.114413
## 6  4.018140 -3.663050 -6.9023600  1.565750 -0.0364479 -1.12643 -3.045680
##      PC156     PC157     PC158     PC159        PC160    PC161     PC162
## 1 -3.40933  4.245140 -1.608310 -0.170535  2.250260000  3.06643 -0.852598
## 2  2.57965  1.008830 -1.634390  3.641350 -0.684657000  1.74680 -1.512600
## 3 -4.95370  4.642520  3.691120 -1.763100 -4.002720000  4.10108  1.779000
## 4  2.82666 -4.000820  9.446800  0.366259 -0.306979000 -1.30854 -1.802620
## 5 -5.92844  3.509700  0.381809  2.169450  0.179172000 -4.47060  2.325670
## 6  3.92859  0.675016 -2.718180  1.907780  0.000285673 -8.03710  0.267860
##       PC163     PC164     PC165     PC166    PC167     PC168    PC169    PC170
## 1  0.582339 -2.580390  2.534640 -1.589210 -1.59853 -1.455500 -4.33631 -3.95844
## 2 -0.592519 -3.621410  2.543370 -1.163350  1.89278 -1.634220 -5.58807  1.23806
## 3  1.663760  1.286180 -2.124800 -0.953216 -2.25028  3.720880 -4.73797 -0.96331
## 4  0.554564 -0.670156  0.685410  4.931360 -8.06053 -0.302732  7.43252 -3.31588
## 5  1.366430 -0.520266 -0.703254  2.255250 -5.59305 -5.713120  2.88964 -2.07597
## 6 -3.714100  9.020530  5.422160  4.142060  5.22376 -0.119157  4.82586  3.03351
##        PC171     PC172     PC173     PC174     PC175     PC176     PC177
## 1  0.3138550 -2.836640  0.581467 -3.716190 -1.784550  1.922010  0.736131
## 2  2.8229900  1.280020 -2.240140 -1.526570 -1.140210  0.987885  0.272200
## 3 -0.0935415 -0.850212  2.103960  7.079490 -2.267310 -1.072840 -4.739330
## 4  2.0464400 -3.026580  2.425940 -0.111941  0.515358 -0.360149  4.501240
## 5  2.5353400 -1.658530 -0.194528 -4.949810  4.951410 -3.588460  0.845839
## 6  5.3982000  7.035450 -0.577793  1.208870  1.451360  4.278480  1.147550
##       PC178     PC179    PC180    PC181     PC182     PC183       PC184
## 1 -0.245713 -7.739400 -2.80283 5.160200 -5.933070  4.642320  -2.3001200
## 2  0.489924 -1.649790 -3.27892 3.705430  0.475018 -0.281622   3.6696600
## 3 -2.571620 -3.262760  2.11674 0.566958  5.369940  0.913404 -10.4730000
## 4 -2.499130  5.848700 -1.91250 0.993972 -3.598870  3.904920  -1.2258800
## 5 -1.163900 -0.547669 -2.98600 4.095320  2.077450  2.955180  -0.0954974
## 6 -2.227390  9.930780 -3.43060 5.058650  3.444780 11.927500  -0.9931580
##        PC185    PC186     PC187      PC188      PC189     PC190      PC191
## 1 -1.8608100 -3.57918 -3.912450 -2.7491800 -2.5978300 -1.807430 -1.8084200
## 2 -1.8806100 -7.12260  0.209279  0.0102187 -0.0604702  4.360960  5.1693100
## 3 -7.1409200  1.70281 -1.111460 -7.1552800 -8.6599500 -0.900483 -1.0899900
## 4  6.2324500  4.95887 -5.534910 -2.1408900  6.2462800 -2.671090 -6.8505500
## 5  0.0824617  2.49476 -1.652400 -3.6944000 -1.7281500 -1.053770  0.5330990
## 6 -4.4923500  5.50138 10.743400 -1.3303000  1.5984800  0.940630  0.0717802
##       PC192     PC193     PC194     PC195     PC196     PC197     PC198
## 1  4.392910 -0.704383  0.127557  2.069700  0.267326  2.343840 -6.774000
## 2  4.154560  2.998550  0.741238  2.082670  2.141780 -0.735199 -2.112400
## 3 -3.583830 -0.872622  1.375060 -0.841222 -7.582140 -3.181530 -3.944950
## 4 -2.731750  4.570590 -6.040680  3.671770 -6.331720 -2.819110  9.198310
## 5  0.814798  4.779060 -4.684740  1.950760  2.227170 -0.463965 -0.632437
## 6 -6.477250  1.733180  9.695080  1.744510 -1.948060 -4.347590 -4.955840
##      PC199      PC200     PC201     PC202     PC203     PC204      PC205
## 1  2.29855 -0.0840596 -0.290231  5.129110 -1.081720 -2.044860 -0.0636947
## 2 -1.01843  1.9971400 -1.360590  4.351050  0.039855 -1.017150  0.0704090
## 3 -4.91483 -8.5438900  2.165530 -3.273990 -3.107690 -1.767540  0.7227850
## 4  6.95400  2.8321700  0.960941 -0.693739  2.248740 -0.873033  6.2040300
## 5  2.81415 -1.6188800  3.698710  3.380350 -9.481560 -5.930260 -4.7194000
## 6 -4.46817  3.0213600 -4.903340  0.566249  9.049290 -8.506660  4.4522700
##        PC206    PC207     PC208      PC209     PC210     PC211     PC212
## 1  1.5565900  4.89563   9.70141   3.520910   5.68518 -0.588393  1.056990
## 2  0.8222620  4.83075   2.18860   2.087710  -2.66309 -2.121430  1.516630
## 3  0.0512091 10.63600  11.86430  14.413100  -7.77049  4.691010  0.630596
## 4  3.4318200 -2.31269  -2.76345 -12.579300  11.45120  2.536890 10.807200
## 5 -0.9745060  8.45072   7.45257  -0.122306  15.12800 -0.265452  1.483020
## 6  9.0050700 -6.74781 -10.43120   1.213550 -12.14130  1.414730 -7.572490
##       PC213    PC214     PC215     PC216   PC217     PC218     PC219      PC220
## 1 -1.266960 -1.60985 -0.455257  1.231620 1.37431 -4.960800  -4.40612  6.5877900
## 2 -0.481193 -3.71090  1.183000 -1.255400 2.45358 -2.561620  -1.60094  3.2417100
## 3  3.097420 -1.23464  4.302810  4.391370 7.53760  4.076680 -10.18920  5.0080000
## 4 -7.962850 -3.56571  2.817200  0.285720 4.46929 -4.205260   1.14108 -5.4300800
## 5 -7.528000 -3.75026  8.194150  1.091220 2.87153 -6.945880   1.55729 -3.4248800
## 6  0.178464  6.24746 -8.234460 -0.268105 2.09840  0.779583 -17.71250 -0.0259676
##      PC221      PC222    PC223     PC224    PC225     PC226     PC227
## 1 -2.49333   1.295260  1.77904 -1.765170  1.51092  -6.89570  1.222100
## 2 -4.23876   4.800870 -1.94656 -2.462170  6.82174  -7.55609  0.722666
## 3  5.08803   7.371570  8.18448  0.267683  3.48268 -14.00320  0.621006
## 4  4.63688  -0.344474 -3.59394  7.851560 -4.77102   5.96996 -7.958300
## 5 -3.30376  10.818000 -5.81665 -0.492456  5.12095  -2.48280 -0.717461
## 6 -8.96617 -14.448400  1.28541 -1.475940 -5.50763  -9.50137 -5.830390
##        PC228    PC229     PC230     PC231     PC232     PC233    PC234
## 1  -0.291177  5.38652   5.04529   3.02889  6.859150  1.555080  1.29869
## 2  -2.053150 -5.98924   3.81262   4.17189  0.124972  3.060280 -1.89391
## 3 -15.958000 -7.04024   2.50946   5.35528  0.410173  1.464870  2.39490
## 4   0.578263  5.83464 -13.33220 -10.07140 -3.934080  3.461750 -1.71238
## 5  -4.859340  7.17837 -17.11510   2.30003  8.734620 -5.527440  2.22191
## 6   5.254240 -1.82583   4.63786  12.23260 -3.380450 -0.478726  4.62158
##        PC235    PC236    PC237      PC238    PC239    PC240    PC241     PC242
## 1   1.268790  4.52818 -7.90247   1.263610  4.09049  6.95967 -5.75503 -3.350460
## 2  -2.088440 -1.19242  5.23539  -3.943150  2.10397  4.34058 -2.17436 -3.132580
## 3 -12.433200  4.77106  8.64175 -15.126500 -2.63310  3.34238  3.77514  8.087270
## 4   2.793900 -7.70442 14.41080  -3.873750 -1.45488 -2.44353 10.76130  3.397560
## 5  -0.870791 -1.30531 -8.18779  -0.964891 -2.22363 10.75230  1.02944  0.560609
## 6  -8.529220 -6.23071 -4.26087   2.978340 -5.84498 -3.58001 -7.57907  3.112510
##       PC243      PC244    PC245    PC246     PC247    PC248     PC249     PC250
## 1 -1.887980   3.048360  8.25138  2.96888  -2.74788 -7.25497  0.606981  2.748020
## 2 -0.467864   0.751218 -2.37102 -2.16277  -3.27506  4.45175 -2.053020 -1.088070
## 3 -5.055780 -16.320700 -6.31010  7.57656   4.43318  9.95462 -1.544290 -1.869050
## 4  4.911020   2.182850  8.67330 -1.86469 -11.33880  3.70551  1.268100  0.207395
## 5 -4.010240   4.846260  8.99997 -9.64692  -4.28301 -6.92391 -5.193560  5.790190
## 6  0.317515   0.249875 -8.37567  7.74226   1.02684 -2.74028  4.764380 -1.612520
##      PC251    PC252    PC253      PC254    PC255    PC256     PC257    PC258
## 1  4.86447 -2.52755  2.38467  0.0448477 -5.31077  8.40696  8.242650  5.59847
## 2 -4.35830  5.40465  1.91873 -2.1237500  1.24516  4.74902 -0.557711 -1.64364
## 3  3.39060 -3.05126 -6.91127 -9.8591800  0.62905 -4.09576 -3.203680  3.44532
## 4 -1.63647 -5.42257  1.56657 -5.1332400 -8.49202 -9.78501 -7.576970 -1.69326
## 5  1.82744  1.09874  3.63232  0.8565510 -5.45035  8.13579 -5.024560  3.09718
## 6  3.90397  4.10101 -2.05782 -2.1190700  5.60654 -1.82609 -3.433010 -3.56565
##        PC259      PC260     PC261       PC262     PC263    PC264    PC265
## 1  9.3155000   0.719761  0.580734 -7.64357000  0.696658  4.39360  8.06374
## 2  3.4819200   4.315870  1.015440 -4.47027000  5.465670  4.00709 -6.48726
## 3  2.1263500  -0.517176  4.983100  5.55968000  0.179064 -6.60045 -2.56074
## 4 -5.2229300 -11.558400  2.654590  0.00190635 -1.453690 -5.30192 -4.26263
## 5 -6.4178500   5.448530  4.900950 -0.67934400  3.008140 -1.17734 -5.74475
## 6  0.0127738   4.882430 -1.211800  4.04374000  0.210157  1.39389  2.00698
##       PC266     PC267    PC268     PC269     PC270     PC271    PC272
## 1  0.180735  2.117720 -4.77674 -4.413830  9.333690 14.076300 -4.07322
## 2 -7.499210 -3.653030  2.66255 -0.555569 -4.079380 -2.101810  6.11986
## 3  3.790040  0.247629  3.81500  0.609312 -2.652120 -0.727738 -1.39410
## 4  1.938170 -0.620761  8.61369  3.525960 -1.878210 -7.486710  2.22378
## 5  3.740610 -2.155160 -4.02776  2.201080 -2.951990 -3.904070  1.04965
## 6 -0.316796 -3.881040 -2.55355 -2.415430 -0.699635  1.750140 -3.13271
##        PC273     PC274     PC275    PC276     PC277     PC278     PC279
## 1 -10.811000  0.208267  4.651730  1.16779 -4.812110 -3.322090  3.026150
## 2   5.981040  0.860304 -5.686120  5.86928  6.004530 -1.379920 -3.026130
## 3   2.620050 -5.757590 -1.667580 -2.79863 -0.380622 -4.231010  4.692490
## 4   3.807900 -5.598330  1.101140  3.98188 -1.783290 -1.552710  2.946290
## 5   0.375166  4.860810 -0.651275  1.82738  8.155100  0.560862 -6.258940
## 6   2.427860  4.076330  0.558768 -1.12187 -1.858940  4.047870  0.695363
##       PC280    PC281     PC282    PC283     PC284     PC285     PC286    PC287
## 1  4.499140 -8.17780  0.644040  3.70380 -3.647450 -7.788210  0.251713 -4.16450
## 2  4.382140 -4.68117  0.529465  1.19826  2.134550 -5.084610 -4.957800  5.29651
## 3 -0.228019  4.32578 -1.448660 -7.78725  7.675220  5.594250 -1.586220  1.07977
## 4  0.499336  4.76906 -4.674750  2.14105  1.589770 -0.186059  1.760220 -1.23835
## 5 -5.299180  1.91256  9.464840  5.74187 -6.064580  5.245830 -5.734360  2.64987
## 6  0.277668 -1.00595  3.460120 -4.49424  0.237973  3.234210 -0.301798  5.27515
##       PC288    PC289     PC290     PC291     PC292     PC293     PC294
## 1 -9.996610 13.12480  2.871160 -4.881800 15.220000  9.717160  1.721690
## 2  5.358210 -1.06213  0.404066 -3.354730  5.271610  1.987590 15.290100
## 3  0.973881 -5.44692 -4.967540  1.100860 -6.512470 -6.068240 -6.820920
## 4 -1.243610 -3.83314  1.145480 -0.172442 -2.360910  0.271403 -0.357951
## 5  7.786440 -6.88359  5.737540 -0.726949 -6.030380 -7.258960  2.350180
## 6 -0.371498  3.70075 -0.705509  1.010940 -0.938047  3.868570  3.402900
##        PC295     PC296     PC297       PC298     PC299     PC300     PC301
## 1 -3.4110900  1.686080   6.62375 -10.3170000  2.115910  0.321069  1.199770
## 2 -9.4855200  6.153120  17.61420  -7.2190300  7.198400 -6.962230 -5.653820
## 3  3.3814800 -4.143960  -5.74892   4.9096400 -0.910531 -0.102854 -0.283924
## 4 -3.0965900  0.839258  -4.59375  -0.0365626 -0.220522 -2.668270 -5.738610
## 5 -4.4456700 -0.248431 -11.56480   4.4448900  1.259200  0.233057  3.147440
## 6 -0.0810154 -1.332060  -1.32580   1.4609400  1.912250  1.705790  2.295310
##       PC302     PC303      PC304    PC305     PC306      PC307     PC308
## 1  1.574640 -3.410510 -2.8918700 -3.23672  1.482110 -1.8217600  1.745490
## 2  0.802838 -8.660020 -1.3383400 -8.27628  3.236250  7.9197700 -3.593290
## 3 -1.958730  4.446000  0.7113250  5.66868 -0.847999 -4.1450500  0.267413
## 4 -0.458483 -0.995096  0.0814797 -5.66699 -2.117740  1.3477800  0.150716
## 5 -5.632880 -1.385670  9.9214900 -1.62767 -4.083950  6.1395500 -1.262320
## 6 -0.970871  0.859766  0.2588410  3.55760  1.765990 -0.0887573 -2.060870
##       PC309     PC310     PC311     PC312     PC313     PC314     PC315
## 1 -3.918990 -0.167691 -1.184250  1.242380 -0.466074  0.353532 -2.221200
## 2  2.215080  2.951480 -1.747090 -1.625510 -1.582370 -2.256560  2.379820
## 3 -0.939395 -2.352220 -1.784810  4.128300 -2.899250  2.590090 -1.071720
## 4  3.228050 -0.940345  0.394989 -0.637514  0.110893  1.006150  0.615197
## 5  1.743200  5.374170  3.288830 -7.110870  0.716492 -0.481899  1.932740
## 6  0.246399 -1.196600  1.812030  1.838890  0.300388 -0.774491  0.605467
##       PC316     PC317    PC318     PC319     PC320      PC321     PC322
## 1  0.352821 -2.220830 -2.11519  2.158240  0.247180 -0.4446400 -0.835695
## 2 -0.496456 -2.474130 -4.76329  0.395585 -0.520601 -1.4199800 -3.187130
## 3  0.979855  0.753731  2.23401  0.406704 -0.827127 -0.4565480  0.968956
## 4 -0.418606 -0.105097 -2.62185  0.286816  1.054780 -0.0905316 -0.621044
## 5  0.302910 -2.030700  1.58847 -3.183170  3.812870  3.7652200 -2.353640
## 6 -0.886637  0.871873 -2.42999  2.415860 -0.816579 -0.9468930  2.103610
##       PC323     PC324     PC325     PC326      PC327     PC328      PC329
## 1  0.516407  0.357848 -1.338830  2.550150  1.7769500  0.640938  0.7280670
## 2  2.077730 -2.123860  1.529100  0.947236 -1.4292900 -1.574630 -0.2259680
## 3 -0.400698  1.644650 -0.458929 -0.531544 -1.1136200 -1.376580 -0.5154730
## 4  1.133430  1.358530 -0.616837  0.314308  0.0895645 -1.338060  1.1146800
## 5  3.226560 -3.635110  2.314460 -2.032180 -0.6470410  0.445676  0.0271391
## 6 -4.040430 -1.187590 -2.444880  0.737983 -2.7459600 -1.951170 -1.7823600
##        PC330     PC331      PC332     PC333     PC334      PC335      PC336
## 1 -1.5942800  0.514835 -0.2979850  0.770455  1.289120 -0.0675534  0.7056970
## 2 -1.0438600  0.757701 -1.5047800  3.134780  0.646038 -0.2487530 -0.3816210
## 3  0.0645953 -0.122472 -0.2345380 -0.565257 -0.396688 -0.3370070 -0.3057790
## 4 -0.6303430 -0.454786 -0.0135467  0.381199 -0.143390  0.1891330 -0.0823058
## 5 -0.0344383 -0.600860  0.1210370 -0.596788 -0.330062  0.1136310  0.5483740
## 6  1.0374000  0.138874  0.1367710  0.643809 -0.335264 -0.4544160 -0.2970510
##        PC337       PC338 Individual Pop_City Country Latitude Longitude
## 1 -0.1890310 1.22504e-06       1210  Alushta Ukraine 44.68289  34.40368
## 2  0.2466140 1.22504e-06       1209  Alushta Ukraine 44.68289  34.40368
## 3  0.1358210 1.22504e-06       1204  Alushta Ukraine 44.68289  34.40368
## 4  0.2101320 1.22504e-06       1211  Alushta Ukraine 44.68289  34.40368
## 5  0.0485408 1.22504e-06       1212  Alushta Ukraine 44.68289  34.40368
## 6  0.2411700 1.22504e-06       1213  Alushta Ukraine 44.68289  34.40368
##   Continent Year         Region   Subregion order order2 orderold
## 1    Europe 2021 Eastern Europe East Europe    43     35       35
## 2    Europe 2021 Eastern Europe East Europe    43     35       35
## 3    Europe 2021 Eastern Europe East Europe    43     35       35
## 4    Europe 2021 Eastern Europe East Europe    43     35       35
## 5    Europe 2021 Eastern Europe East Europe    43     35       35
## 6    Europe 2021 Eastern Europe East Europe    43     35       35

6.3.2 Create PCA plots for far eastern Europe

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_far_eastern_euro_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

6.3.3 PC1 & PC3 for far eastern Europe

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_far_eastern_euro_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

6.3.4 Re-do with individual pops named

sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_far_east_euro_2.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_far_east_euro_2.rds"))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_far_east_euro_2.rds"))
head(sampling_loc)
##             Pop_City            Location Latitude Longitude Continent
## 1 Sevastopol, Crimea Sevastopol, Ukraine 44.54125  33.51400    Europe
## 2            Alushta    Alushta, Ukraine 44.68289  34.40368    Europe
## 3      Kerch, Crimea      Kerch, Ukraine 45.35246  36.47015    Europe
## 4          Krasnodar   Krasnodar, Russia 44.95504  39.02782    Europe
## 5              Sochi       Sochi, Russia 43.60042  39.74533    Europe
## 6         Tikhoretsk  Tikhoretsk, Russia 45.85460  40.12560    Europe
##   Abbreviation Year         Region   Subregion order order2 orderold
## 1          SEV 2021 Eastern Europe East Europe    42     34       34
## 2          ALU 2021 Eastern Europe East Europe    43     35       35
## 3          KER 2021 Eastern Europe East Europe    44     36       36
## 4          KRA 2017 Eastern Europe East Europe    45     37       37
## 5          SOC 2021 Eastern Europe East Europe    46     38       38
## 6          TIK 2021 Eastern Europe East Europe    47     39       39

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 37 Levels: ALU ARM BEN CAM CHA GEL GES HAI HAN HOC HUN INJ INW JAF KAC ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 37

Check the regions

unique(sampling_loc$Region)
## [1] "Eastern Europe" "East Asia"      "South Asia"     "Southeast Asia"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population     PC1        PC2       PC3       PC4       PC5      PC6
## 1        ALU 20.4856  -0.305611 -2.550140  -7.28735 -24.20470  6.38975
## 2        ALU 25.3823   4.337260 -0.388604  -5.43395 -20.36060  8.95141
## 3        ALU 20.0237   2.626390 -0.786929 -10.38780 -29.93820  8.93216
## 4        ALU 25.8712  -3.342340 -1.110070  -2.41729   2.61012 -3.53675
## 5        ALU 30.0675 -11.041700  0.724509  -4.64010  -5.47538  9.77449
## 6        ALU 27.3772  -2.688700  0.959256  -1.42843  -1.32056 -4.38881
##         PC7     PC8       PC9    PC10     PC11      PC12      PC13      PC14
## 1 -15.04960 6.67978  0.151167 38.2655 -6.15882 -8.821650 -24.86590  -3.87666
## 2 -12.67250 4.17658 -0.154637 31.6130 -3.83496 -1.228910  -8.24218   3.89523
## 3 -19.74160 8.25617  1.521170 44.7494 -1.77796 -3.928850 -15.47780   4.22217
## 4   6.09960 4.60456  0.107691 31.9499  6.25326  0.350372   5.77296  12.58190
## 5  -7.78187 2.21053  0.984531 24.2537  9.42648 -2.278880   1.63767 -27.95230
## 6   2.18008 1.82266  0.788612 27.3708 -2.65027 -0.994772  -4.55721 -17.14900
##       PC15      PC16      PC17      PC18       PC19     PC20     PC21      PC22
## 1 -6.17760 14.824700  -9.01089   7.14638  13.722300  8.52422  7.00553  -3.77419
## 2 -4.40492 -0.229220  -9.98679  -5.77431  -4.806080 17.98710 19.70860  -9.18956
## 3 -6.60865  5.091250 -17.67700   2.63932   0.991916 10.78830 11.84630 -11.81950
## 4 -5.58323 -3.184320 -18.61840   1.67256  -4.746580 14.92690 17.16900  -7.24129
## 5 -2.96934 -8.900110  21.58010 -29.60820 -15.574500  1.08231 -5.34274   8.00136
## 6 -1.06223 -0.968945  13.54250 -20.00430 -14.981500  5.18956  1.51858  10.53780
##        PC23      PC24        PC25      PC26     PC27      PC28        PC29
## 1  -1.45222 -3.332710  -4.2012100  -9.21728  4.76234 -0.891124   5.2582300
## 2 -11.60540 15.355200  -4.0634100  -1.13517  1.33338 14.433700  -3.7651200
## 3  -9.34223  8.623670  -3.7577200  -6.51587  5.63725  6.830240   0.0136676
## 4  -6.96422  9.800260 -10.1645000 -13.08660 12.53640 -2.196250  -8.5672800
## 5   7.71842 -9.136000   0.0228503  -9.11044 10.12690 -2.903270   1.2629200
## 6   3.21686  0.968363  -5.4992600  -2.00759  8.71201 -3.956690 -10.9915000
##        PC30       PC31     PC32      PC33      PC34      PC35       PC36
## 1 -11.54270   0.105329 -5.48342 -6.168920 -2.445510   2.38026 -18.168900
## 2  -5.08792  -3.830590 -3.33753  4.723430 -2.689220   4.57595   6.566870
## 3 -10.27700   1.422650 -7.00011 -6.931120 -5.608190  11.83910  -6.569350
## 4  -4.63713  13.878400 -7.89427 -0.547714 -4.742410   7.84252  -6.930040
## 5 -17.04900 -11.385000  1.81386  2.792030  3.853960 -13.36160   3.102920
## 6  -1.97113  11.571500  3.23825  1.899260 -0.426233  -4.60796   0.468608
##       PC37     PC38      PC39       PC40      PC41      PC42        PC43
## 1 0.638639  5.96370  1.638110  -2.702090  26.01240  2.872620  -6.5748000
## 2 2.467160 -0.13457  2.271750  -0.695783 -25.92740  3.957250   9.0044100
## 3 2.511160 -1.24808  0.509379   6.589810  -1.63697  4.799590  -0.0233594
## 4 2.121700 -1.87570  1.823560   2.841060   2.42338  0.418901 -11.3373000
## 5 2.344010 -3.17460  7.938730   0.777851  -1.85059 -5.698850  14.5095000
## 6 5.034230  4.93773 10.499000 -15.493800   3.24924  5.950570  14.8428000
##        PC44       PC45     PC46     PC47      PC48    PC49     PC50        PC51
## 1  16.89850   1.788520  5.62376  8.75830  7.335620 3.93678 -7.88436 -0.00527219
## 2 -13.97630   0.418266 -4.15255 -5.67872 -9.702130 5.36903  8.58153  6.94867000
## 3   2.98640  -1.076080  1.37242 -1.21517 -0.602785 2.66775 -3.25599  8.65494000
## 4   5.15674   3.924090  4.79590  4.03105 10.211600 1.34531 -9.18752  2.72248000
## 5  -1.09257  -2.114320 -2.75617  3.44852  0.473827 0.79640 -6.17943  3.01706000
## 6  -1.87532 -11.292000 -3.75054  6.02935  6.197380 3.29759 -3.60343 -0.55713100
##        PC52     PC53       PC54      PC55      PC56      PC57       PC58
## 1  2.340900 12.99150   8.848370  -1.60283 -20.37070 -0.376401  11.458700
## 2 -4.383410 -9.85162 -11.181800   3.11030  26.42630 -1.008880 -18.740200
## 3 -6.471730 -0.49460  -0.560385   4.99467  -1.27009  1.606140   0.764857
## 4 -1.318270 -3.21074   0.506548   1.69150  -9.63551 -1.613160  -6.314400
## 5  0.859159  5.97654   2.418050  -2.52355  -5.83637  3.143980   7.944640
## 6  4.578160  1.21431   6.930270 -10.54760   3.27519  8.432160   9.756380
##        PC59     PC60      PC61     PC62     PC63      PC64      PC65
## 1 12.856000 -3.76451  3.046620  3.70025 -1.03461  1.963410 -4.801280
## 2 -4.022410 11.09840 -2.703890 -2.78526  6.80457 -1.793960  1.971330
## 3 -6.825280  1.63018  3.468500 -2.68255 -3.66434  7.675350 -1.717530
## 4  2.388530 -8.23049  1.187890 13.60900 -8.75254 -7.880010  8.460010
## 5  7.744390  3.05013  1.542050 -6.87117  9.60592 -1.401980 -7.486110
## 6 -0.982531 -1.11583  0.763478 10.81760 16.40710 -0.705895 -0.986492
##          PC66        PC67      PC68       PC69      PC70      PC71      PC72
## 1   1.1013800   6.9687800  0.677785 -14.196900  1.932510 -2.730120  0.852579
## 2   0.0759335 -13.2050000  1.579390  17.570800  0.696840 -5.388440  1.549200
## 3   9.6270600  -0.2945390 -3.152270  -0.185804 -1.788710  2.772100 -1.140440
## 4 -19.0307000   0.0222812 13.416700  -9.458960  3.892460 -5.184720 -5.684060
## 5  -6.0241000   4.3416200  2.450220   0.687401  0.928587 -0.543575 -1.444860
## 6 -13.5157000   2.9042000 19.716400  -6.014460 -3.473160 -3.559860 -1.434370
##         PC73      PC74      PC75     PC76      PC77     PC78      PC79     PC80
## 1 -0.0272235  0.442396 -0.734162  1.60670   8.34790 -1.57096 -2.325790 -2.66391
## 2  3.7382100  3.371150  1.717450  2.09539 -12.74000 -1.72072  4.329370 -2.44019
## 3 -3.2626400 -3.607260 -3.840580 -2.75547   6.71369  1.34046  0.637695  2.15170
## 4  0.7048390 -3.747130  1.718730  4.18192  -3.58733  4.14767 -4.507960 -5.19651
## 5 -5.9070400 -4.062740 -4.659250 -1.10407   3.31766  2.18593 -6.823190 -4.00661
## 6  6.0514700 -4.166360 -4.344890 -7.77497   4.97026 -1.38811 -3.049480  1.18698
##         PC81     PC82      PC83      PC84     PC85     PC86      PC87     PC88
## 1  -6.443170  1.83956  -4.31690  3.162300  3.88469  2.71503   1.53758  3.58843
## 2  -0.199465  3.38909  -2.99189  3.244740 -1.58022  3.20976   1.88266  6.84259
## 3  -1.699530 -3.79007   4.17300  7.029760  1.97388 -3.85027   4.20135 -6.80635
## 4 -18.767000  5.48438  -9.84728  5.582990  2.23714  4.16893 -10.36050  3.19901
## 5   9.240860 -1.70624   3.38226  2.525090 -3.98731  2.47006   4.15028 -8.34635
## 6  -6.014610 -8.22647 -13.41560 -0.858546 -3.22604  8.85529 -10.47330 -4.72417
##          PC89      PC90       PC91      PC92      PC93      PC94      PC95
## 1   2.9237700 -1.174850  -1.771270  2.770570  0.475811  2.639810  0.441089
## 2  -1.8059400  4.373540 -12.713400  0.889759 -1.552400 -0.109804  4.667540
## 3 -10.5783000 -6.958860  10.898300 -1.400270  0.583719 -0.996475 -1.975080
## 4   2.2166400 -0.260060  -6.463720  7.555240 -4.397560  0.371583 -2.671060
## 5   0.2608440 -5.346590   0.409559  1.481070 -2.083320  4.788510  3.257690
## 6   0.0814059 -0.702711  -8.225600  2.683760 -8.159570  2.908500  1.401030
##        PC96      PC97      PC98       PC99     PC100      PC101     PC102
## 1  1.871160 -5.345640  1.123030  3.8324100  4.429460   3.851840 -0.684432
## 2 -0.947024 -3.227110 -0.485948  1.7780700 -0.494879   2.156390  3.148300
## 3  0.877382  2.141940 -3.737950  1.3161900 -3.770210   1.886970 -1.763700
## 4  1.246670 -0.479822  0.900249 -7.1356500  1.164820   1.714810  1.542460
## 5 -4.474370  3.739800 -0.491935  0.0339354  1.290120   0.387799 -0.703330
## 6 -1.487730  8.175420  6.974200 -6.9411800  1.599830 -12.971100  5.138090
##      PC103    PC104     PC105      PC106    PC107    PC108     PC109     PC110
## 1  2.25127  1.52242  1.906900  0.3036580  2.60092 -0.43695  5.143180  0.670442
## 2  1.61509  2.50924 -0.372587  1.5369000 -5.25799  1.23955  2.456150  4.775800
## 3 -5.46988 -1.75623  1.980140  0.9970790  1.69452 -8.97879  0.148475 -3.022270
## 4 -3.20834 -2.54959  0.729329  0.1442540  1.59751  4.16221 -3.662630  3.966310
## 5 -1.75826 -3.16704  3.781260 -0.2276980  2.71692 -2.47770  3.644330  3.318670
## 6 -0.15957 -1.73146  4.174510 -0.0224256  2.94150  5.84921  1.235730 -1.103880
##       PC111     PC112     PC113     PC114     PC115     PC116      PC117
## 1  0.233544  1.491500 -2.291850   2.75724  4.884290 -5.246880  -0.804267
## 2 -2.612870  1.070640  3.629040  -1.37871 -4.767060 -0.517355  -5.131480
## 3  1.307540  0.255413 -0.929420   3.97817  1.340020  3.841250   2.142500
## 4 -5.926810 -4.381290  0.525513  -4.22072  8.663280 -0.735895 -11.334500
## 5  6.378110  1.025500  4.854340   3.27895 -3.870160 -5.524730   8.700350
## 6 -1.616950 -5.279780 -2.855310 -11.38320 -0.243223 -3.058180  -7.078780
##      PC118     PC119     PC120     PC121     PC122     PC123    PC124    PC125
## 1 -2.10701  3.333620 -2.215670  1.317990 -1.266080  0.190247 -3.89487 -2.80235
## 2 -4.94447  5.394510 -0.399604  0.336776 -0.395207  3.196520 -4.65527  2.98828
## 3 11.90080 -3.439220  3.095730 -0.587585  2.156610  1.273720 -3.38204 -2.57005
## 4 -5.42009  3.225020 -5.246600  3.407710 -5.493770 -5.074700  3.36216  3.98967
## 5  3.73444 -5.657550  1.020350  1.996580  1.535220  1.972160 -2.88932 -8.43176
## 6 -2.36509  0.662008 -8.206090  4.289750  2.361380 -5.375370  3.63467  5.10431
##       PC126     PC127     PC128    PC129     PC130     PC131    PC132     PC133
## 1  3.471030  3.016630 -3.925310 -2.14323 -2.687760 -0.433259  3.58807  1.525600
## 2 -4.326420  3.231160 -0.718613 -1.38307 -0.170312 -0.374122 -4.82892  2.103600
## 3 -3.125980 -0.896061 -1.163370  1.41482  0.493408  1.832260  3.01200  0.647162
## 4  0.802205 -4.291240 -5.576690  0.97485  1.256250 -1.065740  7.41296  0.175257
## 5 -4.921190  0.742270 -5.490360 -1.96823  0.305370  3.665320 -5.13845 -2.557740
## 6 -6.691420  3.812800 -0.800976  5.76665 -2.657830  2.180710  1.69836 -8.661120
##       PC134    PC135     PC136     PC137    PC138    PC139    PC140     PC141
## 1  4.204800  3.50766 -3.622220 -3.061660  4.27550 -1.23560 -5.24803 -0.523196
## 2  2.249160  7.33844 -0.255614 -5.768620  5.60840 -1.66117 -2.77480  0.705456
## 3 -0.632887 -2.77590 -2.518710  0.322666 -5.57880  5.30426  3.46215 -3.504200
## 4 -8.758330  1.69746  4.803670  0.639076  6.57224 -2.78041 -9.07109  1.242930
## 5 -3.477590 -3.41453 -6.135030 -3.890220  1.14685  3.50326  3.22089 -4.223740
## 6  0.188458  6.80750  4.682540 -2.105050 -4.82354  1.69971  3.74137 -2.142900
##       PC142     PC143    PC144     PC145      PC146     PC147     PC148
## 1 -0.173200 -0.775339  4.46472 -1.424750 -2.2181000  2.490170  1.419960
## 2  5.600500 -6.012330  5.26934 -3.267700  0.0852875  1.049250 -1.768620
## 3  0.320618  1.420100 -7.04487  0.232887  3.3146700 -3.616300  1.685780
## 4 -1.607800  0.241212 -4.46744  2.599630  0.9280960  1.541200 -0.483154
## 5  0.563423  4.325520  1.73424 -4.996600 -7.8645100 -0.139164  2.314240
## 6  6.806240  4.354110  2.02088  2.259160  2.8227200 -7.372750  0.467339
##       PC149     PC150      PC151     PC152      PC153    PC154     PC155
## 1 -0.751374 -0.314859  4.9189600  2.832320  2.1644200 -0.48708  5.633100
## 2 -3.457890  1.122780  4.4779900  3.029150  0.5865970  2.21855  5.408460
## 3  1.304000 -0.849583 -5.3868500 -3.694100 -1.4307600  0.99073 -2.478430
## 4 -0.648384  1.500540 -0.0454821 -0.899994 -0.9112840  3.99710  2.946110
## 5  4.085970 -1.058240  2.1960000  1.563200 -0.8587100 -2.03031 -0.114413
## 6  4.018140 -3.663050 -6.9023600  1.565750 -0.0364479 -1.12643 -3.045680
##      PC156     PC157     PC158     PC159        PC160    PC161     PC162
## 1 -3.40933  4.245140 -1.608310 -0.170535  2.250260000  3.06643 -0.852598
## 2  2.57965  1.008830 -1.634390  3.641350 -0.684657000  1.74680 -1.512600
## 3 -4.95370  4.642520  3.691120 -1.763100 -4.002720000  4.10108  1.779000
## 4  2.82666 -4.000820  9.446800  0.366259 -0.306979000 -1.30854 -1.802620
## 5 -5.92844  3.509700  0.381809  2.169450  0.179172000 -4.47060  2.325670
## 6  3.92859  0.675016 -2.718180  1.907780  0.000285673 -8.03710  0.267860
##       PC163     PC164     PC165     PC166    PC167     PC168    PC169    PC170
## 1  0.582339 -2.580390  2.534640 -1.589210 -1.59853 -1.455500 -4.33631 -3.95844
## 2 -0.592519 -3.621410  2.543370 -1.163350  1.89278 -1.634220 -5.58807  1.23806
## 3  1.663760  1.286180 -2.124800 -0.953216 -2.25028  3.720880 -4.73797 -0.96331
## 4  0.554564 -0.670156  0.685410  4.931360 -8.06053 -0.302732  7.43252 -3.31588
## 5  1.366430 -0.520266 -0.703254  2.255250 -5.59305 -5.713120  2.88964 -2.07597
## 6 -3.714100  9.020530  5.422160  4.142060  5.22376 -0.119157  4.82586  3.03351
##        PC171     PC172     PC173     PC174     PC175     PC176     PC177
## 1  0.3138550 -2.836640  0.581467 -3.716190 -1.784550  1.922010  0.736131
## 2  2.8229900  1.280020 -2.240140 -1.526570 -1.140210  0.987885  0.272200
## 3 -0.0935415 -0.850212  2.103960  7.079490 -2.267310 -1.072840 -4.739330
## 4  2.0464400 -3.026580  2.425940 -0.111941  0.515358 -0.360149  4.501240
## 5  2.5353400 -1.658530 -0.194528 -4.949810  4.951410 -3.588460  0.845839
## 6  5.3982000  7.035450 -0.577793  1.208870  1.451360  4.278480  1.147550
##       PC178     PC179    PC180    PC181     PC182     PC183       PC184
## 1 -0.245713 -7.739400 -2.80283 5.160200 -5.933070  4.642320  -2.3001200
## 2  0.489924 -1.649790 -3.27892 3.705430  0.475018 -0.281622   3.6696600
## 3 -2.571620 -3.262760  2.11674 0.566958  5.369940  0.913404 -10.4730000
## 4 -2.499130  5.848700 -1.91250 0.993972 -3.598870  3.904920  -1.2258800
## 5 -1.163900 -0.547669 -2.98600 4.095320  2.077450  2.955180  -0.0954974
## 6 -2.227390  9.930780 -3.43060 5.058650  3.444780 11.927500  -0.9931580
##        PC185    PC186     PC187      PC188      PC189     PC190      PC191
## 1 -1.8608100 -3.57918 -3.912450 -2.7491800 -2.5978300 -1.807430 -1.8084200
## 2 -1.8806100 -7.12260  0.209279  0.0102187 -0.0604702  4.360960  5.1693100
## 3 -7.1409200  1.70281 -1.111460 -7.1552800 -8.6599500 -0.900483 -1.0899900
## 4  6.2324500  4.95887 -5.534910 -2.1408900  6.2462800 -2.671090 -6.8505500
## 5  0.0824617  2.49476 -1.652400 -3.6944000 -1.7281500 -1.053770  0.5330990
## 6 -4.4923500  5.50138 10.743400 -1.3303000  1.5984800  0.940630  0.0717802
##       PC192     PC193     PC194     PC195     PC196     PC197     PC198
## 1  4.392910 -0.704383  0.127557  2.069700  0.267326  2.343840 -6.774000
## 2  4.154560  2.998550  0.741238  2.082670  2.141780 -0.735199 -2.112400
## 3 -3.583830 -0.872622  1.375060 -0.841222 -7.582140 -3.181530 -3.944950
## 4 -2.731750  4.570590 -6.040680  3.671770 -6.331720 -2.819110  9.198310
## 5  0.814798  4.779060 -4.684740  1.950760  2.227170 -0.463965 -0.632437
## 6 -6.477250  1.733180  9.695080  1.744510 -1.948060 -4.347590 -4.955840
##      PC199      PC200     PC201     PC202     PC203     PC204      PC205
## 1  2.29855 -0.0840596 -0.290231  5.129110 -1.081720 -2.044860 -0.0636947
## 2 -1.01843  1.9971400 -1.360590  4.351050  0.039855 -1.017150  0.0704090
## 3 -4.91483 -8.5438900  2.165530 -3.273990 -3.107690 -1.767540  0.7227850
## 4  6.95400  2.8321700  0.960941 -0.693739  2.248740 -0.873033  6.2040300
## 5  2.81415 -1.6188800  3.698710  3.380350 -9.481560 -5.930260 -4.7194000
## 6 -4.46817  3.0213600 -4.903340  0.566249  9.049290 -8.506660  4.4522700
##        PC206    PC207     PC208      PC209     PC210     PC211     PC212
## 1  1.5565900  4.89563   9.70141   3.520910   5.68518 -0.588393  1.056990
## 2  0.8222620  4.83075   2.18860   2.087710  -2.66309 -2.121430  1.516630
## 3  0.0512091 10.63600  11.86430  14.413100  -7.77049  4.691010  0.630596
## 4  3.4318200 -2.31269  -2.76345 -12.579300  11.45120  2.536890 10.807200
## 5 -0.9745060  8.45072   7.45257  -0.122306  15.12800 -0.265452  1.483020
## 6  9.0050700 -6.74781 -10.43120   1.213550 -12.14130  1.414730 -7.572490
##       PC213    PC214     PC215     PC216   PC217     PC218     PC219      PC220
## 1 -1.266960 -1.60985 -0.455257  1.231620 1.37431 -4.960800  -4.40612  6.5877900
## 2 -0.481193 -3.71090  1.183000 -1.255400 2.45358 -2.561620  -1.60094  3.2417100
## 3  3.097420 -1.23464  4.302810  4.391370 7.53760  4.076680 -10.18920  5.0080000
## 4 -7.962850 -3.56571  2.817200  0.285720 4.46929 -4.205260   1.14108 -5.4300800
## 5 -7.528000 -3.75026  8.194150  1.091220 2.87153 -6.945880   1.55729 -3.4248800
## 6  0.178464  6.24746 -8.234460 -0.268105 2.09840  0.779583 -17.71250 -0.0259676
##      PC221      PC222    PC223     PC224    PC225     PC226     PC227
## 1 -2.49333   1.295260  1.77904 -1.765170  1.51092  -6.89570  1.222100
## 2 -4.23876   4.800870 -1.94656 -2.462170  6.82174  -7.55609  0.722666
## 3  5.08803   7.371570  8.18448  0.267683  3.48268 -14.00320  0.621006
## 4  4.63688  -0.344474 -3.59394  7.851560 -4.77102   5.96996 -7.958300
## 5 -3.30376  10.818000 -5.81665 -0.492456  5.12095  -2.48280 -0.717461
## 6 -8.96617 -14.448400  1.28541 -1.475940 -5.50763  -9.50137 -5.830390
##        PC228    PC229     PC230     PC231     PC232     PC233    PC234
## 1  -0.291177  5.38652   5.04529   3.02889  6.859150  1.555080  1.29869
## 2  -2.053150 -5.98924   3.81262   4.17189  0.124972  3.060280 -1.89391
## 3 -15.958000 -7.04024   2.50946   5.35528  0.410173  1.464870  2.39490
## 4   0.578263  5.83464 -13.33220 -10.07140 -3.934080  3.461750 -1.71238
## 5  -4.859340  7.17837 -17.11510   2.30003  8.734620 -5.527440  2.22191
## 6   5.254240 -1.82583   4.63786  12.23260 -3.380450 -0.478726  4.62158
##        PC235    PC236    PC237      PC238    PC239    PC240    PC241     PC242
## 1   1.268790  4.52818 -7.90247   1.263610  4.09049  6.95967 -5.75503 -3.350460
## 2  -2.088440 -1.19242  5.23539  -3.943150  2.10397  4.34058 -2.17436 -3.132580
## 3 -12.433200  4.77106  8.64175 -15.126500 -2.63310  3.34238  3.77514  8.087270
## 4   2.793900 -7.70442 14.41080  -3.873750 -1.45488 -2.44353 10.76130  3.397560
## 5  -0.870791 -1.30531 -8.18779  -0.964891 -2.22363 10.75230  1.02944  0.560609
## 6  -8.529220 -6.23071 -4.26087   2.978340 -5.84498 -3.58001 -7.57907  3.112510
##       PC243      PC244    PC245    PC246     PC247    PC248     PC249     PC250
## 1 -1.887980   3.048360  8.25138  2.96888  -2.74788 -7.25497  0.606981  2.748020
## 2 -0.467864   0.751218 -2.37102 -2.16277  -3.27506  4.45175 -2.053020 -1.088070
## 3 -5.055780 -16.320700 -6.31010  7.57656   4.43318  9.95462 -1.544290 -1.869050
## 4  4.911020   2.182850  8.67330 -1.86469 -11.33880  3.70551  1.268100  0.207395
## 5 -4.010240   4.846260  8.99997 -9.64692  -4.28301 -6.92391 -5.193560  5.790190
## 6  0.317515   0.249875 -8.37567  7.74226   1.02684 -2.74028  4.764380 -1.612520
##      PC251    PC252    PC253      PC254    PC255    PC256     PC257    PC258
## 1  4.86447 -2.52755  2.38467  0.0448477 -5.31077  8.40696  8.242650  5.59847
## 2 -4.35830  5.40465  1.91873 -2.1237500  1.24516  4.74902 -0.557711 -1.64364
## 3  3.39060 -3.05126 -6.91127 -9.8591800  0.62905 -4.09576 -3.203680  3.44532
## 4 -1.63647 -5.42257  1.56657 -5.1332400 -8.49202 -9.78501 -7.576970 -1.69326
## 5  1.82744  1.09874  3.63232  0.8565510 -5.45035  8.13579 -5.024560  3.09718
## 6  3.90397  4.10101 -2.05782 -2.1190700  5.60654 -1.82609 -3.433010 -3.56565
##        PC259      PC260     PC261       PC262     PC263    PC264    PC265
## 1  9.3155000   0.719761  0.580734 -7.64357000  0.696658  4.39360  8.06374
## 2  3.4819200   4.315870  1.015440 -4.47027000  5.465670  4.00709 -6.48726
## 3  2.1263500  -0.517176  4.983100  5.55968000  0.179064 -6.60045 -2.56074
## 4 -5.2229300 -11.558400  2.654590  0.00190635 -1.453690 -5.30192 -4.26263
## 5 -6.4178500   5.448530  4.900950 -0.67934400  3.008140 -1.17734 -5.74475
## 6  0.0127738   4.882430 -1.211800  4.04374000  0.210157  1.39389  2.00698
##       PC266     PC267    PC268     PC269     PC270     PC271    PC272
## 1  0.180735  2.117720 -4.77674 -4.413830  9.333690 14.076300 -4.07322
## 2 -7.499210 -3.653030  2.66255 -0.555569 -4.079380 -2.101810  6.11986
## 3  3.790040  0.247629  3.81500  0.609312 -2.652120 -0.727738 -1.39410
## 4  1.938170 -0.620761  8.61369  3.525960 -1.878210 -7.486710  2.22378
## 5  3.740610 -2.155160 -4.02776  2.201080 -2.951990 -3.904070  1.04965
## 6 -0.316796 -3.881040 -2.55355 -2.415430 -0.699635  1.750140 -3.13271
##        PC273     PC274     PC275    PC276     PC277     PC278     PC279
## 1 -10.811000  0.208267  4.651730  1.16779 -4.812110 -3.322090  3.026150
## 2   5.981040  0.860304 -5.686120  5.86928  6.004530 -1.379920 -3.026130
## 3   2.620050 -5.757590 -1.667580 -2.79863 -0.380622 -4.231010  4.692490
## 4   3.807900 -5.598330  1.101140  3.98188 -1.783290 -1.552710  2.946290
## 5   0.375166  4.860810 -0.651275  1.82738  8.155100  0.560862 -6.258940
## 6   2.427860  4.076330  0.558768 -1.12187 -1.858940  4.047870  0.695363
##       PC280    PC281     PC282    PC283     PC284     PC285     PC286    PC287
## 1  4.499140 -8.17780  0.644040  3.70380 -3.647450 -7.788210  0.251713 -4.16450
## 2  4.382140 -4.68117  0.529465  1.19826  2.134550 -5.084610 -4.957800  5.29651
## 3 -0.228019  4.32578 -1.448660 -7.78725  7.675220  5.594250 -1.586220  1.07977
## 4  0.499336  4.76906 -4.674750  2.14105  1.589770 -0.186059  1.760220 -1.23835
## 5 -5.299180  1.91256  9.464840  5.74187 -6.064580  5.245830 -5.734360  2.64987
## 6  0.277668 -1.00595  3.460120 -4.49424  0.237973  3.234210 -0.301798  5.27515
##       PC288    PC289     PC290     PC291     PC292     PC293     PC294
## 1 -9.996610 13.12480  2.871160 -4.881800 15.220000  9.717160  1.721690
## 2  5.358210 -1.06213  0.404066 -3.354730  5.271610  1.987590 15.290100
## 3  0.973881 -5.44692 -4.967540  1.100860 -6.512470 -6.068240 -6.820920
## 4 -1.243610 -3.83314  1.145480 -0.172442 -2.360910  0.271403 -0.357951
## 5  7.786440 -6.88359  5.737540 -0.726949 -6.030380 -7.258960  2.350180
## 6 -0.371498  3.70075 -0.705509  1.010940 -0.938047  3.868570  3.402900
##        PC295     PC296     PC297       PC298     PC299     PC300     PC301
## 1 -3.4110900  1.686080   6.62375 -10.3170000  2.115910  0.321069  1.199770
## 2 -9.4855200  6.153120  17.61420  -7.2190300  7.198400 -6.962230 -5.653820
## 3  3.3814800 -4.143960  -5.74892   4.9096400 -0.910531 -0.102854 -0.283924
## 4 -3.0965900  0.839258  -4.59375  -0.0365626 -0.220522 -2.668270 -5.738610
## 5 -4.4456700 -0.248431 -11.56480   4.4448900  1.259200  0.233057  3.147440
## 6 -0.0810154 -1.332060  -1.32580   1.4609400  1.912250  1.705790  2.295310
##       PC302     PC303      PC304    PC305     PC306      PC307     PC308
## 1  1.574640 -3.410510 -2.8918700 -3.23672  1.482110 -1.8217600  1.745490
## 2  0.802838 -8.660020 -1.3383400 -8.27628  3.236250  7.9197700 -3.593290
## 3 -1.958730  4.446000  0.7113250  5.66868 -0.847999 -4.1450500  0.267413
## 4 -0.458483 -0.995096  0.0814797 -5.66699 -2.117740  1.3477800  0.150716
## 5 -5.632880 -1.385670  9.9214900 -1.62767 -4.083950  6.1395500 -1.262320
## 6 -0.970871  0.859766  0.2588410  3.55760  1.765990 -0.0887573 -2.060870
##       PC309     PC310     PC311     PC312     PC313     PC314     PC315
## 1 -3.918990 -0.167691 -1.184250  1.242380 -0.466074  0.353532 -2.221200
## 2  2.215080  2.951480 -1.747090 -1.625510 -1.582370 -2.256560  2.379820
## 3 -0.939395 -2.352220 -1.784810  4.128300 -2.899250  2.590090 -1.071720
## 4  3.228050 -0.940345  0.394989 -0.637514  0.110893  1.006150  0.615197
## 5  1.743200  5.374170  3.288830 -7.110870  0.716492 -0.481899  1.932740
## 6  0.246399 -1.196600  1.812030  1.838890  0.300388 -0.774491  0.605467
##       PC316     PC317    PC318     PC319     PC320      PC321     PC322
## 1  0.352821 -2.220830 -2.11519  2.158240  0.247180 -0.4446400 -0.835695
## 2 -0.496456 -2.474130 -4.76329  0.395585 -0.520601 -1.4199800 -3.187130
## 3  0.979855  0.753731  2.23401  0.406704 -0.827127 -0.4565480  0.968956
## 4 -0.418606 -0.105097 -2.62185  0.286816  1.054780 -0.0905316 -0.621044
## 5  0.302910 -2.030700  1.58847 -3.183170  3.812870  3.7652200 -2.353640
## 6 -0.886637  0.871873 -2.42999  2.415860 -0.816579 -0.9468930  2.103610
##       PC323     PC324     PC325     PC326      PC327     PC328      PC329
## 1  0.516407  0.357848 -1.338830  2.550150  1.7769500  0.640938  0.7280670
## 2  2.077730 -2.123860  1.529100  0.947236 -1.4292900 -1.574630 -0.2259680
## 3 -0.400698  1.644650 -0.458929 -0.531544 -1.1136200 -1.376580 -0.5154730
## 4  1.133430  1.358530 -0.616837  0.314308  0.0895645 -1.338060  1.1146800
## 5  3.226560 -3.635110  2.314460 -2.032180 -0.6470410  0.445676  0.0271391
## 6 -4.040430 -1.187590 -2.444880  0.737983 -2.7459600 -1.951170 -1.7823600
##        PC330     PC331      PC332     PC333     PC334      PC335      PC336
## 1 -1.5942800  0.514835 -0.2979850  0.770455  1.289120 -0.0675534  0.7056970
## 2 -1.0438600  0.757701 -1.5047800  3.134780  0.646038 -0.2487530 -0.3816210
## 3  0.0645953 -0.122472 -0.2345380 -0.565257 -0.396688 -0.3370070 -0.3057790
## 4 -0.6303430 -0.454786 -0.0135467  0.381199 -0.143390  0.1891330 -0.0823058
## 5 -0.0344383 -0.600860  0.1210370 -0.596788 -0.330062  0.1136310  0.5483740
## 6  1.0374000  0.138874  0.1367710  0.643809 -0.335264 -0.4544160 -0.2970510
##        PC337       PC338 Individual Pop_City         Location Latitude
## 1 -0.1890310 1.22504e-06       1210  Alushta Alushta, Ukraine 44.68289
## 2  0.2466140 1.22504e-06       1209  Alushta Alushta, Ukraine 44.68289
## 3  0.1358210 1.22504e-06       1204  Alushta Alushta, Ukraine 44.68289
## 4  0.2101320 1.22504e-06       1211  Alushta Alushta, Ukraine 44.68289
## 5  0.0485408 1.22504e-06       1212  Alushta Alushta, Ukraine 44.68289
## 6  0.2411700 1.22504e-06       1213  Alushta Alushta, Ukraine 44.68289
##   Longitude Continent Year         Region   Subregion order order2 orderold
## 1  34.40368    Europe 2021 Eastern Europe East Europe    43     35       35
## 2  34.40368    Europe 2021 Eastern Europe East Europe    43     35       35
## 3  34.40368    Europe 2021 Eastern Europe East Europe    43     35       35
## 4  34.40368    Europe 2021 Eastern Europe East Europe    43     35       35
## 5  34.40368    Europe 2021 Eastern Europe East Europe    43     35       35
## 6  34.40368    Europe 2021 Eastern Europe East Europe    43     35       35

6.3.5 Create PCA plots for far eastern Europe with pop names

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_far_eastern_euro_pops_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

6.3.6 PC1 & PC3 plot for far eastern Europe with pop names

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_far_eastern_euro_pops_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

6.4 PCA for Brazil & Sicily SNP Set 3 (MAF 1%, R2<0.01 snp set)

Create a new bed file (since we didn’t make one specifically for these pops before)

echo "BER
PAL
GRV
REC
SIC
HAI
YUN
HUN
OKI
KAN
UTS
KAG
TAI
GEL
BEN
SUF
INW
KLP
KUN
KAT
JAF
CAM
SUU
INJ
MAT
SSK
KAC
SON
CHA
LAM
HAN
HOC
QNC
" > euro_global/output/neuroadmixture/native_sicily_and_Americas.txt

Training done with euro_native pops for all italian + native +US

cd /gpfs/gibbs/pi/caccone/mkc54/albo
plink \
--keep-allele-order \
--keep-fam euro_global/output/neuroadmixture/native_sicily_and_Americas.txt \
--bfile euro_global/output/file7b \
--make-bed \
--export vcf \
--out euro_global/output/neuroadmixture/euro_native_sicily_Americas \
--extract euro_global/output/neuroadmixture/train/train_euro_nativeb.snplist \
--silent
grep "samples\|variants" euro_global/output/neuroadmixture/euro_native_sicily_Americas.log 

100367 variants loaded from .bim file. –extract: 22537 variants remaining. Total genotyping rate in remaining samples is 0.967089. 22537 variants and 287 people pass filters and QC.

6.4.1 Import the data for R2<0.01 subset for euro_native_sicily_Americas

genotype <- here(
   "euro_global/output/neuroadmixture/euro_native_sicily_Americas.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22537
##   column count: 296
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22537
##   Character matrix gt cols: 296
##   skip: 0
##   nrows: 22537
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## BEN BER CAM CHA GEL GRV HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN LAM 
##  12  12  12  12   2  12  12   4   7  12  11   4   2   6  12  11   6   4   4   9 
## MAT OKI PAL QNC REC SIC SON SSK SUF SUU TAI UTS YUN 
##  12  12  11  11  11   9   3  12   6   6   7  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   287
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   287
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.removed file, for more informations.
## 
## 
##  - number of detected individuals:   287
##  - number of detected loci:      22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.lfmm"

PCA for MAF 1% r2<0.01 snp set of euro_native2_albania_croatia_greece_US

nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          287
##         -L (number of loci)                 22537
##         -K (number of principal components) 287
##         -x (genotype file)                  /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.lfmm
##         -a (eigenvalue file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.pca/euro_native_sicily_Americas.eigenvalues
##         -e (eigenvector file)               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.pca/euro_native_sicily_Americas.eigenvectors
##         -d (standard deviation file)        /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.pca/euro_native_sicily_Americas.sdev
##         -p (projection file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.pca/euro_native_sicily_Americas.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/ 
## pca result directory:            euro_native_sicily_Americas.pca/ 
## input file:                      euro_native_sicily_Americas.lfmm 
## eigenvalue file:                 euro_native_sicily_Americas.eigenvalues 
## eigenvector file:                euro_native_sicily_Americas.eigenvectors 
## standard deviation file:         euro_native_sicily_Americas.sdev 
## projection file:                 euro_native_sicily_Americas.projections 
## pcaProject file:                   euro_native_sicily_Americas.pcaProject 
## number of individuals:           287 
## number of loci:                  22537 
## number of principal components:  287 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          287
##         -i (input file)                     /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.pca/euro_native_sicily_Americas.eigenvalues
##         -o (output file)                    /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.pca/euro_native_sicily_Americas.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)

Get values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_Brazil_euro.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_Brazil_euro.rds"))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_Brazil_euro.rds"))
head(sampling_loc)
##     Pop_City Country  Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ     USA  39.79081 -74.92910  Americas          BER 2018
## 2 Palm Beach     USA  26.70560 -80.03640  Americas          PAL 2018
## 3 Recife, PE  Brazil  -8.05882 -34.87810  Americas          REC 2017
## 4   Gravatai  Brazil -29.93760 -50.99070  Americas          GRV 2018
## 5    Sicilia   Italy  38.23294  15.55088    Europe          SIC 2016
## 6   Kanazawa   Japan  36.56100 136.65620      Asia          KAN 2008
##            Region   Subregion order order2 orderold
## 1   North America                 1     NA       75
## 2   North America                 3     NA       77
## 3   South America                 7     NA       81
## 4   South America                 8     NA       82
## 5 Southern Europe West Europe    27     19       19
## 6       East Asia                51     43       43

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 33 Levels: BEN BER CAM CHA GEL GRV HAI HAN HOC HUN INJ INW JAF KAC KAG ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 33

Check the regions

unique(sampling_loc$Region)
## [1] "North America"   "South America"   "Southern Europe" "East Asia"      
## [5] "South Asia"      "Southeast Asia"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population      PC1      PC2      PC3       PC4      PC5      PC6     PC7
## 1        BEN -27.9868  -9.0873 -5.55891 0.3080150 -4.01840 1.950530 3.70341
## 2        BEN -28.9441 -10.9597 -5.01098 1.1825100 -4.97300 1.453670 5.16611
## 3        BEN -29.4994 -10.2542 -5.25078 0.9689040 -4.89600 0.549813 5.03626
## 4        BEN -28.9520 -11.0038 -5.93855 0.5300450 -3.26018 0.449869 4.76751
## 5        BEN -27.9604 -10.0257 -6.17960 3.0958500 -5.52483 0.450500 3.86448
## 6        BEN -28.1620 -11.0876 -6.79601 0.0490528 -3.87185 2.094940 7.01968
##       PC8     PC9      PC10     PC11      PC12     PC13      PC14       PC15
## 1 5.91813 4.78740  0.745305  8.20893 -1.780890 1.193630  0.231945  0.0533398
## 2 6.55756 4.62976 -0.216794  9.46454 -4.079890 1.136330 -2.297950 -0.3697570
## 3 6.03396 4.77860  0.518998 10.77310 -1.772330 2.734290 -4.288900 -0.0944688
## 4 5.96438 5.84500 -2.590170 10.34560 -3.115270 0.439902 -3.548310  0.0491362
## 5 5.87321 4.78631  0.691438 10.57550 -0.243535 2.036400 -1.867820 -0.3295010
## 6 5.44803 5.07241  1.652590 10.82090 -3.716390 0.736772 -1.851240  1.8900500
##        PC16       PC17      PC18     PC19      PC20      PC21     PC22
## 1 -1.266420 -2.0139300 -0.433046 -1.49173  0.665996 -2.234730 12.42980
## 2 -1.574040  0.0860914  0.888112 -1.02298 -0.102479 -0.672590 15.03790
## 3 -0.454693 -1.8951400  0.232981 -2.60175  0.250769 -1.729150  8.66984
## 4 -2.215180 -0.2608700  0.119347 -1.50181  1.808150  0.394653  8.53026
## 5  0.359386  0.5562120  0.725415 -3.65824 -2.890880 -1.201060 11.23110
## 6 -1.534290 -2.1531400  3.053970 -2.79129  2.101390 -0.727313 19.58400
##        PC23      PC24       PC25     PC26      PC27      PC28     PC29     PC30
## 1  0.761672 -0.430870  0.8843980 -3.25450  0.221034  1.355870  5.25490 12.78410
## 2 -0.123448 -1.577800  2.7291000 -7.15095  1.562620  0.900525  7.84708 15.08510
## 3  2.441130 -1.166160 -1.3492000 -4.77287  0.799582 -3.100520  7.30012  6.15201
## 4 -2.386990  0.335299 -0.5935370 -2.94495 -0.888812  1.604380  6.77380  6.80868
## 5  2.278470 -3.814050 -0.0269465 -3.42005  0.858828 -1.198720  7.75474  7.40810
## 6  4.278960 -6.874380 -0.0964525 -6.43008  0.698959 -1.633610 26.79140 17.88970
##        PC31      PC32       PC33     PC34    PC35      PC36     PC37       PC38
## 1 -0.973695  -7.17816 -1.2580800  5.45639 6.12411  1.844960 -0.72627 -3.9845500
## 2 -5.683380 -10.14920 -2.4827900  9.61246 2.69258 -1.401460  1.48955 -3.6300900
## 3 -1.281290  -3.98277 -1.7402000  7.08759 4.44985  2.863930  3.62264 -0.2127700
## 4  0.702757  -6.26064 -2.3531000  5.10378 2.14621  0.293538 -1.99579 -3.4623400
## 5 -1.515960  -1.97493  0.0676711  4.58086 6.38920 -2.295360  1.49959 -1.8260700
## 6 -3.443480 -15.79480  2.6084800 20.95110 9.83649  7.164940 19.11160  0.0241028
##         PC39      PC40     PC41     PC42        PC43      PC44      PC45
## 1   2.084780 -2.580950 -1.46229 -3.00743   0.8819480  0.821013  1.214560
## 2   3.015540  2.452000 -3.83215  1.06354  -2.4413300  0.370019 -2.565900
## 3   0.593476  0.106879 -4.82031 -1.30717  -3.8806500 -0.142078 -5.594520
## 4   2.503040 -1.946390 -1.89825 -4.44304   0.0180848 -1.933410  0.213887
## 5   0.192741 -0.438637 -2.21437 -3.17785   2.4504800 -0.391757  1.153320
## 6 -14.030000 -6.934650 -3.42846 -5.15883 -13.4982000 -2.794790 12.950700
##        PC46      PC47     PC48      PC49      PC50      PC51      PC52
## 1  1.731850  0.338732 -1.72754  -6.91383  1.611800  3.019810  -4.60060
## 2  0.942138 -4.192440  4.72635 -35.05700 -0.159692  3.503820 -15.10700
## 3  2.520570  0.169481 -5.61066  -7.63847  6.700730  0.712633   1.85182
## 4  2.225770  1.290450  2.18913 -12.07980 -0.383975  8.330670  -2.95165
## 5  0.717575 -2.566600  4.41802 -11.65780 -0.559683  1.512060  -2.33130
## 6 -4.136980  2.018580 -7.22710  43.38740 -9.305990 -9.569190  13.33710
##        PC53      PC54     PC55      PC56      PC57      PC58      PC59
## 1  1.301540  1.267390  7.40905  -3.21041  0.336023   3.80185  5.058700
## 2  4.597570  0.246998 -3.07987 -12.70330 -4.772730 -10.49040 -2.739270
## 3  6.071460 -1.507940 -2.09003   5.76525  0.054409  -5.56212  1.479390
## 4 -4.515890 -3.524390 -4.72082   6.47438  0.412994   9.66145  4.740800
## 5 -0.326511  0.388013 -5.55859  -2.38799 -1.279610   4.98030  0.259202
## 6 -5.910610 -0.353015  4.46964   1.62015 -2.233240  -4.84299 -4.363080
##        PC60      PC61     PC62     PC63       PC64      PC65       PC66
## 1 -1.838270 -0.110149 -8.01533 -1.04805  3.9015200 -6.403050   0.574366
## 2 12.417500 12.151800 13.89750 -4.64977 -0.7759550 -7.350880 -14.506300
## 3  1.571440 -4.368410 -9.85807 -1.26122 -3.2807800  4.514280   5.074280
## 4  0.846691 -2.129420 -4.80275  3.30404 -0.0934461  0.131633   3.005300
## 5 -0.923966 -1.596270 -4.75110  7.73719  0.2663180 -2.220510   3.099970
## 6 -2.201000  2.500710  8.43954  6.06728 -2.3796800 -0.612183  -4.167650
##       PC67      PC68       PC69      PC70     PC71      PC72     PC73      PC74
## 1  3.11166   7.06384   2.896360 -2.143830  7.90763 -0.919581 -9.54844  7.295890
## 2 -2.17393 -24.85840 -17.043700 -3.470070  1.39043 -9.937550  1.72217 -0.749985
## 3 -3.86727  10.38510  -0.182639 -7.325990  1.83799  5.435100  1.17335 -4.567540
## 4  6.03290   8.82299   3.478440 -4.369100 -3.11641  8.516430  1.48559  2.973250
## 5 -5.36433   6.41736   5.223680  4.939590 -1.12897  5.240490 -7.24477 -4.521190
## 6  5.41691  -7.45211  -2.709640  0.885797 -3.76283 -5.676240  2.79463 -0.300528
##         PC75       PC76      PC77      PC78     PC79      PC80      PC81
## 1 -12.518900 -12.002800   0.89719 -1.947470  5.60450  6.537620  7.360060
## 2  -1.637810   1.012620  -2.34214 -5.134770  1.13798 -0.665655  1.642230
## 3   1.613300   0.113959   1.61805  7.079900  3.23799 -0.641635 -2.350940
## 4  -3.075040   3.164620 -13.13410  1.603080 -8.72153 -3.105100  3.082540
## 5  -0.130613  -5.797960  -5.74377 -3.906230 -1.50849  1.554610 -4.931390
## 6  -2.424430  -1.399860   3.58868 -0.778464 -1.80604 -3.448510  0.154695
##        PC82     PC83      PC84      PC85      PC86     PC87     PC88       PC89
## 1 -5.367360 -4.00604 -7.329600  -4.19953  4.223190  8.26659  7.13485 -19.587100
## 2 -0.198981 -8.54468  4.860840   3.48384 -6.834450  2.00563 -1.04595   4.851030
## 3  0.421844  3.00855 -3.223160  -6.02851  7.984840 -6.36501  5.32659   4.553460
## 4  4.074450 10.47270 -0.273944  -5.55145 -3.133760 -4.46994 -6.40368  -7.740630
## 5 -1.618570 -2.29159 -8.813950 -11.84120 -2.246410 -3.93520 -6.05628  -8.451110
## 6 -2.894390  1.89023 -1.470960   2.86234  0.905114 -3.67370 -1.82196  -0.917497
##        PC90      PC91      PC92      PC93     PC94       PC95      PC96
## 1  -4.94073 -5.423260   1.54224  8.422590 -4.24749   4.108610 -5.626930
## 2   2.53255  4.885750  -1.47199  3.210320  3.30140   1.734530  4.495000
## 3   1.56661  0.678582   4.30504 -2.308270 -8.55437   0.345432  1.777310
## 4  -4.80391 -8.116790 -11.83820 -4.069290 11.74910 -12.530700 -0.670313
## 5 -10.88650 -4.118770   4.33815 -0.544696 -7.68970  -3.157390 -8.540630
## 6  -4.42027 -0.323588   2.88592 -2.998070  2.80336   2.547970  1.810400
##        PC97      PC98       PC99      PC100      PC101     PC102        PC103
## 1  4.930930 -0.725001 -7.0365300  0.2512860  -8.637080  0.775752  -3.31963000
## 2 -0.325766  1.759460  0.6433410  3.9173500  -5.630310  4.000010   0.00404051
## 3 -6.885840 -6.284380  1.6681600 -2.0952600   0.555313 -8.919030 -12.70480000
## 4 -4.775260  6.671960 -4.4419300  1.6787500 -12.835600  5.872480  13.07120000
## 5 -0.407980  3.778690  0.9572150 -2.7288600   7.319920 -2.905770   0.35116800
## 6  2.144340 -0.244524 -0.0807656  0.0877283   2.232350  2.678400  -2.36631000
##        PC104     PC105      PC106     PC107    PC108     PC109     PC110
## 1  -3.650890 10.365500  -0.110929 -5.179050  4.98707  2.065450  5.275890
## 2   6.632720 -2.665690   2.629080  0.344004 -7.90520 -2.711130 -1.770720
## 3  -6.874500  3.909680 -11.960800  2.307880 -1.23568 12.459000 -2.327750
## 4 -15.773100 13.539500  -6.509400 12.766100 -5.89163 -6.022990 -0.544698
## 5  -1.133730 -1.669730   6.815360  2.007520  5.99504 -4.309400 18.195300
## 6   0.937436  0.451469   1.582670 -1.641950 -3.37312  0.223767 -0.313039
##      PC111     PC112      PC113     PC114     PC115      PC116      PC117
## 1 -3.72262 -1.709440  -2.469530  7.388760  -9.91022 -1.4255400   8.345290
## 2  2.36369  2.119570   3.133310  0.555375   4.40154  0.2884900  -4.108450
## 3  2.59460 -5.405970 -18.265400 -0.543583  -7.95654 -8.7136600 -13.896900
## 4 -7.54909  8.035770  -1.318790 -3.488650  -7.59389 10.2802000  -2.675060
## 5  4.03111  5.095640  -1.588530  6.625040 -12.37840  2.0082100  10.941300
## 6 -1.03677  0.748991   0.429426  2.739340   1.38151 -0.0964276  -0.887215
##      PC118    PC119     PC120     PC121     PC122     PC123       PC124
## 1 -3.70027 -3.00961 -5.858970  5.553240 -4.930520  -6.60648 -12.5941000
## 2 -3.06922  3.05055 -2.976410  0.413382  4.508610  -3.52480  -0.0575154
## 3 10.10280  6.86568 -1.122790 10.506900  0.766919   5.33264  -4.6897800
## 4  0.59914  4.29264  4.891670  4.334770 -4.625190 -15.18510  -1.9201200
## 5  5.97099 -6.98284  0.621585  9.133850  1.637910   9.59557   2.7198300
## 6  2.68469  1.10565  2.177320  1.387450  1.767020  -0.20282   0.7362430
##       PC125      PC126     PC127    PC128      PC129     PC130     PC131
## 1 -0.712984   0.138293  2.179260  6.03463 -1.2302300  8.125010  4.019940
## 2 -8.047660   3.826760  4.767960 -3.71010  0.0897396 -3.211940  2.280000
## 3  3.550700 -13.411400 -2.037490 -1.88012 15.0846000 -0.590829  6.077670
## 4  0.783764   8.090510 -3.312490  4.13663 -5.4562900  9.836730 -2.596380
## 5 -0.642211   8.697430  2.798690 11.04680 -6.7470800 -8.821130  2.921880
## 6 -0.482564  -0.618477  0.197942 -2.13166  0.7780380  0.876103 -0.556115
##       PC132      PC133     PC134     PC135     PC136     PC137     PC138
## 1  3.207600   6.039150  1.770420 -2.976580  5.722390   9.32636   1.20594
## 2  3.606010  -2.140560  2.982800  2.894330 -0.468215   1.56876  -1.75236
## 3 -5.079250 -10.075700 -0.874715 13.116200  4.294810 -10.52870   8.22391
## 4 -2.785890  13.811600  1.911940  5.512510 -7.301110  -3.36717 -17.63690
## 5 -3.748820 -13.804300  0.530970 -6.411690 -1.320520 -10.35680   1.31892
## 6 -0.657234  -0.500192  3.447490  0.214201  0.189449   1.46786  -1.16552
##       PC139     PC140     PC141    PC142     PC143      PC144    PC145
## 1 -3.936410 16.697200   3.28420 13.08420 -3.691860 -11.328300 -1.21216
## 2 -2.301130 -0.411955   3.36125 -2.25490 -0.200263   1.258310 -2.04743
## 3 -6.910680 13.690800  -1.99014 -8.37847 -0.584530  -9.490540 -6.54467
## 4  3.855460  5.191490 -10.45740 -7.67927  5.319640  -0.540938  5.05316
## 5 22.110100  1.811170 -10.67750 -5.94812  9.515320  -1.534930 -2.17691
## 6 -0.817732 -1.910000  -1.11667 -1.32883  1.723810   1.022040  2.28221
##       PC146      PC147     PC148     PC149     PC150      PC151     PC152
## 1   4.03896  3.4457400 -3.070150 11.881300 -19.45000   5.623070  0.579008
## 2  -1.10344  3.7740800  1.051590 -0.883642   2.79289   1.600340  0.925971
## 3  -5.08406  3.5452200  6.780100  1.708210  -4.63673  12.203800 -0.483891
## 4   4.32733 -0.0750739 -1.516440  2.209780   5.57357 -11.001000 -2.278020
## 5 -11.70380  6.2777200 -5.020820  4.939670  -8.60653  19.327900 -5.683860
## 6   4.13200  2.2194700 -0.423547  0.484201  -1.43745  -0.935855  1.025520
##        PC153      PC154      PC155     PC156      PC157     PC158    PC159
## 1 -12.878000   1.033240   6.583560 -5.511640   2.589040  9.432300 -9.48998
## 2  -0.868274   0.937980   3.491970  1.037770  -3.169020 -0.586865  1.69525
## 3  10.101600  11.540300  -3.730950  6.569580  -8.149760  1.730820 -1.20677
## 4  15.152800  -0.495281 -21.212200  0.203603   1.680550 -2.091630 -2.92136
## 5  -9.435780 -15.492700   8.164590 -3.640940 -14.507700 -6.723570  3.23550
## 6   1.442670   1.980180  -0.817082 -3.189410  -0.441945 -4.215420  1.65176
##      PC160    PC161     PC162      PC163     PC164    PC165     PC166
## 1  1.48242  9.15575 -2.397780  -3.449520  0.964108 -2.44514 -4.120880
## 2  3.60080 -1.69537 -0.585215   5.308250  2.814770 -1.34442 -0.173893
## 3  7.78513 12.72490 -2.229330  18.424300  0.644631 15.09080 14.597600
## 4  6.66010  1.06526  0.763251  -6.127380 -6.183860  2.32320 -2.998020
## 5 19.65440 -8.62961  2.344090 -15.537600 -2.883270 11.85370 13.854700
## 6  1.03209 -0.41453 -0.983646   0.196306  3.261320 -2.42458  0.314184
##        PC167      PC168     PC169      PC170     PC171      PC172     PC173
## 1 -10.292100 -20.124500 -11.30100   6.344640 -9.368680  -9.728120  5.140010
## 2  -0.900305   3.355870  -3.16255   2.574450 -1.985830   0.575674 -0.566387
## 3   0.058153   9.812130  -7.17513 -17.631000  7.983690 -22.290300  6.855110
## 4   8.613030   0.182716  -1.71216 -13.120900  0.202328  -4.317990 -5.667270
## 5   9.073690 -16.238200  14.37750  11.516000 -5.280630   3.581040  9.958980
## 6   0.435482  -0.579278   1.11392   0.925707 -1.761120   1.946030 -0.497627
##       PC174     PC175    PC176     PC177    PC178      PC179     PC180
## 1  14.92830  3.119610 -1.34525 -2.755000  1.56895 -17.358100 11.031600
## 2  -1.62261  0.694167  4.16878  1.264910 -5.24309  -2.706600 -7.004110
## 3 -11.57420 -2.765520 -6.85045  6.161230 -1.45467   4.007950  6.536340
## 4  -3.85618  2.681660  3.88404  9.843810 10.11240   0.767019 -9.671400
## 5 -24.02330 -5.464220 -4.16733  2.513230 -3.23209  12.529500 -4.566630
## 6   1.67146 -0.215576  1.81306  0.744773  1.85090  -2.732590 -0.570049
##        PC181      PC182    PC183     PC184     PC185     PC186     PC187
## 1   1.426210  -2.489690  7.05421 26.189400 15.125300 -2.040030  8.853190
## 2   1.395950  -0.247367 -1.89733 -2.824270  3.304820  3.515820  2.020320
## 3   9.027280  -2.364400 -1.18158 -0.588440  0.900796  1.009520 -6.877080
## 4 -14.645300 -28.803600 12.04750  2.608000 12.728400  7.282120  9.027220
## 5  -4.395440   6.129290 -9.43538 -2.951500 -4.524190  0.728390 -1.945460
## 6   0.171567  -0.936474 -1.38738 -0.705347 -1.442620 -0.601111 -0.754935
##      PC188     PC189     PC190     PC191     PC192     PC193       PC194
## 1  3.12938  16.26000 15.275500  1.782970  3.106490 -1.823070   0.0535326
## 2 -3.35918  -5.69236 -1.393210 -3.975370  2.615380 -0.337273  -1.6417100
## 3  4.64530 -15.78190  9.423800  6.103940 -4.275900 -8.049880  -9.3616900
## 4 -5.91934 -12.61830 -5.278100  9.547440  1.186180 -4.708890 -10.4498000
## 5 -2.90288   2.97822 -6.992460 -0.699433 -6.657050  3.233220   3.5032700
## 6  2.90335  -3.10696 -0.884288 -3.230100 -0.619537  1.818470  -2.2591600
##       PC195      PC196      PC197     PC198     PC199      PC200     PC201
## 1 -3.263470 10.1692000   2.403610 -5.660060 -2.732850  7.8384500 -0.116071
## 2  3.887430  1.8286500  -0.522081 -1.813420 -1.187670  1.5394200 -0.753382
## 3 -0.441351  0.6416220 -20.740500 -3.670630  5.011280  0.0746461  1.994490
## 4 -2.037770 -0.4025780  12.728900  0.179160  3.776370 -5.4642600 -4.106600
## 5 -1.025930  3.6759300   7.863650  4.437090  2.708850 -3.2536500 -8.460940
## 6 -0.880249  0.0849061   1.934600  0.685482  0.757912 -2.6629600  1.759240
##        PC202     PC203      PC204     PC205     PC206     PC207    PC208
## 1  3.6195000 -3.707560  0.0158396  5.839080  2.229270 -11.10690  3.26447
## 2 -0.7886260  0.291429  1.5528800  3.604630 -7.745430   5.85148  1.09819
## 3  6.0006600 -0.272915 -1.8461100  4.245150  7.416220   2.76945 -1.41021
## 4 -4.8589000  2.895360  4.4053400 -4.043130 -4.733780   4.70966  3.95704
## 5 -1.7367400  2.460220  2.9588000 -6.624050  1.281920   1.03362  1.74956
## 6  0.0605942 -2.286350  0.6293900 -0.634392  0.914047   3.41830  1.46717
##      PC209    PC210      PC211      PC212    PC213     PC214      PC215
## 1  1.18134 -2.69620 -1.6968100  5.1375900  4.85852 -0.597896 -2.6557600
## 2  2.63148  0.99805 -5.2046200  0.3573960  4.80028  3.544460  4.8880300
## 3 -1.67602  3.08015 -0.0645954  0.9254070 -7.90074  2.591360 -4.0078500
## 4 -4.74794  3.56340  0.6744180  2.0860900  6.77795 -1.808880  4.4124700
## 5  3.43979 -7.08848  1.0160100 -4.9488000 -2.88778  2.243840 -0.0372929
## 6 -2.69538 -2.44311 -1.7924500  0.0184598  1.20241 -0.671993 -1.5522000
##      PC216     PC217     PC218     PC219     PC220     PC221     PC222
## 1  4.49055  0.637331 -2.365800  1.949140 -5.035390 -1.548830 -0.838372
## 2 -3.86744 -3.500280  0.808693  0.195691 -8.131690  4.254230  8.771050
## 3 -2.61305  1.392310  0.314417 -2.420570 -2.270040  1.684090 -0.530963
## 4  1.00933  2.606290 -0.528731 -1.362910  1.269370 -0.508426  0.563710
## 5 -3.48988 -2.834740  1.543290 -1.324410 -0.419488 -1.843760 -0.369917
## 6 -2.04589 -0.786415  2.031060 -0.669019 -0.880694 -0.589158 -1.018700
##       PC223    PC224     PC225      PC226     PC227     PC228       PC229
## 1  0.954695  2.45351 -0.737650   0.415769  0.785759 -0.651546  1.36670000
## 2 18.281000 12.17320 16.524000 -26.419500  8.788070  5.993790 20.42990000
## 3  0.409464  1.66778  0.934650  -2.862440  1.102900  0.689047 -0.00841803
## 4 -0.886546  1.23434 -4.179950   0.117267 -0.187612 -1.101560 -1.69233000
## 5  1.443750  1.37715  0.159855   0.977391 -0.696971 -0.191525  2.62075000
## 6 -2.979680 -1.21344  1.280080   1.205850 -0.393115 -1.905210 -0.98067600
##       PC230       PC231      PC232     PC233     PC234     PC235      PC236
## 1  1.334460  -2.3155100  -1.345160  2.042100 -2.138660 -2.404690  0.4979810
## 2 -3.639970 -13.8845000 -18.269700 -1.052180 -5.245630 -2.464180 -7.2019100
## 3 -3.931810  -1.3872000   3.465830  1.637310  2.006790 -1.720370 -1.4247600
## 4 -0.240021   1.1555300  -1.110210 -0.914095  0.403332 -0.452953  0.0840436
## 5 -0.862870   0.6809950  -0.584831 -1.733090  2.094720  2.020550  0.2379360
## 6 -1.438750   0.0388905  -0.319700 -0.722855 -0.594278  1.125490 -0.2412180
##       PC237     PC238      PC239     PC240    PC241      PC242     PC243
## 1  1.132170  0.828952 -1.0402100 -0.534313 0.166768 -0.1861520 -0.853036
## 2 -0.560467 -0.238514 -0.5820710 -5.140480 1.185240 -3.8792300 -2.995210
## 3 -0.648752 -0.296555 -0.3478960 -1.963030 1.805890 -2.2796300 -1.737720
## 4 -0.382944 -0.430719 -1.1353500 -0.777930 0.475309 -2.5282900 -0.684651
## 5  0.651394  0.744220  0.0405264 -0.109277 2.219380 -0.0243305 -1.720880
## 6 -0.623646  3.208910  0.9541860 -2.308850 0.952111 -0.9234700 -7.028690
##       PC244     PC245     PC246     PC247     PC248      PC249        PC250
## 1 -0.549459 -1.676700  1.442640  0.388364 -1.585570  0.0158327 -2.068450000
## 2 -3.520200  3.064050 -4.494790 -2.666930 -0.398643  1.3191100 -1.880360000
## 3  0.908814 -0.526994  0.646026  1.772390  0.192232 -0.2334130 -1.445990000
## 4  0.905766 -1.366530 -1.241710 -0.480329  0.339002 -0.0926394 -0.000228799
## 5 -3.184300 -0.604245  0.766963  1.316780 -0.886905  1.7635700  0.218820000
## 6  0.404846 -1.935070 -7.452530  0.947396 -3.893770 10.5260000  3.845080000
##        PC251     PC252     PC253     PC254     PC255        PC256      PC257
## 1 -0.0909045 -1.286410 -1.476590  0.445361 -0.454673 -1.279170000   0.616078
## 2 -0.1960620 -2.804130  0.797743 -2.514610 -2.363580 -0.095094100  -2.614910
## 3  0.0338227  0.516131 -0.875077  0.272255  0.168574 -0.119842000   0.553969
## 4 -1.1658700 -0.141914  0.708533 -0.702764 -0.613669  0.000595737   0.639629
## 5 -0.1425980  1.286430 -1.024210  1.305400 -0.363115  0.340160000   0.209637
## 6  0.6705600 -1.752760 -0.262442 -1.252340 -8.706210 -2.363270000 -20.676100
##         PC258      PC259      PC260     PC261      PC262      PC263     PC264
## 1 -0.47031100  0.2438470 -0.5043690  0.226510 -0.4418150 -0.8708950  0.799262
## 2 -1.90755000 -2.1152500 -1.0091900  0.545371 -1.4976800 -0.0525219  0.423708
## 3  0.00727958  0.7054270 -0.9570060  0.957102  0.7390040  0.1333230 -1.466320
## 4 -0.34952200 -1.0184900 -0.0294914 -0.571801 -0.0639744  0.5144190  0.160258
## 5 -0.66157300  0.0231993  0.5241020  0.778208 -1.0389300 -0.4784070 -0.991843
## 6 38.03930000  1.1037000 -5.4512800  0.209399 -3.9660400 -3.2770000  5.910630
##        PC265     PC266      PC267      PC268     PC269       PC270     PC271
## 1  0.3961940 -0.365417 -0.1131220  0.0399503  0.318304  0.42435600  0.427372
## 2 -0.0778169 -0.693591  0.7942950 -0.5593870 -0.252321 -0.72199200 -0.143000
## 3 -0.9421380 -0.734953  0.0518940 -0.3157790  0.119161 -0.08887840  0.474355
## 4  0.1291820 -0.220107 -0.0933386 -1.0670800  0.603698 -0.00707304  0.595090
## 5  0.9881800  0.161395  0.3287930  0.5240300  0.365007 -0.28078300  0.133682
## 6  4.4098800 -1.617270  2.4468000  1.9637500  0.818310  0.98260300  0.898526
##        PC272      PC273     PC274      PC275      PC276      PC277      PC278
## 1  0.8898670 -0.5320180 -0.202373  0.1221180 -0.0494071  0.1407310  0.2934410
## 2  0.1501440  0.1468680 -0.118874  0.3131190  0.4881750  0.7204670  0.0129602
## 3  0.4355940 -0.1591700 -0.349746 -0.2655820 -0.4097860  0.2029200  0.2233030
## 4 -0.3921040  0.0654559  0.327395 -0.0158294  0.5578660  0.3354980 -0.2985050
## 5 -0.0447267 -0.8947430  0.194515  0.2834500  0.5954340 -0.0214011 -0.2369820
## 6  0.9333370  0.5134990 -1.294610 -0.5516730 -1.3518900  1.9478600  1.0588200
##       PC279      PC280      PC281      PC282       PC283      PC284      PC285
## 1  0.378642 -0.2942070 -0.0961943 -0.4393570 -0.00963723 -0.1411430 -0.3151240
## 2  0.262467  0.4056820 -0.7283000 -0.5717360 -0.41356500  0.0274373 -0.1701970
## 3  0.338803  0.0757141 -0.1722910  0.4152430  0.15766400  0.6326340  0.0471803
## 4 -0.687666 -0.6938590 -0.1059360  0.0656018  0.69531300  0.0811386  0.2997780
## 5  0.288014 -0.2440200 -0.1515450  0.6560290 -0.03491080  0.3181510 -0.1604810
## 6 -0.865197  0.0494245 -0.3556250 -0.5837550 -0.13014400 -0.3389760 -0.2589450
##         PC286       PC287 Individual  Pop_City Country Latitude Longitude
## 1 -0.00890661 1.37433e-06        266 Bengaluru   India  12.9716   77.5946
## 2 -0.07037630 1.37433e-06        255 Bengaluru   India  12.9716   77.5946
## 3 -0.00793918 1.37433e-06        256 Bengaluru   India  12.9716   77.5946
## 4 -0.22101200 1.37433e-06        265 Bengaluru   India  12.9716   77.5946
## 5  0.07046310 1.37433e-06        261 Bengaluru   India  12.9716   77.5946
## 6  0.01113450 1.37433e-06        262 Bengaluru   India  12.9716   77.5946
##   Continent    Year     Region Subregion order order2 orderold
## 1      Asia Unknown South Asia              60     52       52
## 2      Asia Unknown South Asia              60     52       52
## 3      Asia Unknown South Asia              60     52       52
## 4      Asia Unknown South Asia              60     52       52
## 5      Asia Unknown South Asia              60     52       52
## 6      Asia Unknown South Asia              60     52       52

6.4.2 Create PCA plots for Brazil, Sicily + native range

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_brazil_sicily_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

6.4.3 PC1 & PC3 plots for Brazil, Sicily + native range

## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_brazil_sicily_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

6.5 PCA for Iberian Peninsula SNP Set 3 (MAF 1%, R2<0.01)

6.5.1 Import the data for SNP Set 3 subset for native_iberia_Americas2

genotype <- here(
   "euro_global/output/neuroadmixture/native_iberia_Americas2.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22537
##   column count: 340
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22537
##   Character matrix gt cols: 340
##   skip: 0
##   nrows: 22537
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## BAR BEN BER CAM CHA GEL GRV HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN 
##  12  12  12  12  12   2  12  12   4   7  12  11   4   2   6  12  11   6   4   4 
## LAM MAT OKI PAL POL POP QNC REC SON SPB SPC SPM SPS SSK SUF SUU TAI UTS YUN 
##   9  12  12  11   2  12  11  11   3   8   6   5   8  12   6   6   7  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   331
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   331
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.removed file, for more informations.
## 
## 
##  - number of detected individuals:   331
##  - number of detected loci:      22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.lfmm"

PCA for MAF 1% r2<0.01 snp set of euro_native2_albania_croatia_greece_US

nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          331
##         -L (number of loci)                 22537
##         -K (number of principal components) 331
##         -x (genotype file)                  /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.lfmm
##         -a (eigenvalue file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.pca/native_iberia_Americas2.eigenvalues
##         -e (eigenvector file)               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.pca/native_iberia_Americas2.eigenvectors
##         -d (standard deviation file)        /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.pca/native_iberia_Americas2.sdev
##         -p (projection file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.pca/native_iberia_Americas2.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/ 
## pca result directory:            native_iberia_Americas2.pca/ 
## input file:                      native_iberia_Americas2.lfmm 
## eigenvalue file:                 native_iberia_Americas2.eigenvalues 
## eigenvector file:                native_iberia_Americas2.eigenvectors 
## standard deviation file:         native_iberia_Americas2.sdev 
## projection file:                 native_iberia_Americas2.projections 
## pcaProject file:                   native_iberia_Americas2.pcaProject 
## number of individuals:           331 
## number of loci:                  22537 
## number of principal components:  331 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          331
##         -i (input file)                     /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.pca/native_iberia_Americas2.eigenvalues
##         -o (output file)                    /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.pca/native_iberia_Americas2.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)

Get values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_US.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_US.rds"))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_US.rds"))
head(sampling_loc)
##     Pop_City  Country  Latitude  Longitude Continent Abbreviation Year
## 1 Berlin, NJ      USA  39.79081 -74.929100  Americas          BER 2018
## 2 Palm Beach      USA  26.70560 -80.036400  Americas          PAL 2018
## 3 Recife, PE   Brazil  -8.05882 -34.878100  Americas          REC 2017
## 4   Gravatai   Brazil -29.93760 -50.990700  Americas          GRV 2018
## 5   Penafiel Portugal  41.18555  -8.329371    Europe          POP 2017
## 6      Loule Portugal  37.09084  -8.092465    Europe          POL 2017
##            Region   Subregion order order2 orderold
## 1   North America                 1     NA       75
## 2   North America                 3     NA       77
## 3   South America                 7     NA       81
## 4   South America                 8     NA       82
## 5 Southern Europe West Europe    11      3        3
## 6 Southern Europe West Europe    12      4        4

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 39 Levels: BAR BEN BER CAM CHA GEL GRV HAI HAN HOC HUN INJ INW JAF KAC ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 39

Check the regions

unique(sampling_loc$Region)
## [1] "North America"   "South America"   "Southern Europe" "East Asia"      
## [5] "South Asia"      "Southeast Asia"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population     PC1       PC2     PC3     PC4      PC5      PC6     PC7
## 1        BAR 25.5499  2.001850 32.4190 7.67455 -25.4593 -4.67251 13.7777
## 2        BAR 25.4107  0.354687 31.4114 5.91305 -20.6103 -7.52188 21.6435
## 3        BAR 21.9004 -1.111830 31.3627 5.46695 -22.3339 -6.51403 19.6255
## 4        BAR 22.5679  1.277120 33.8145 6.21831 -25.4711 -6.89546 20.8575
## 5        BAR 24.4462 -1.403420 29.1115 6.30322 -19.0357 -5.08700 14.6359
## 6        BAR 25.3203  0.761056 34.6690 7.37388 -25.1849 -6.13724 17.6580
##       PC8       PC9      PC10     PC11    PC12     PC13    PC14    PC15
## 1 3.40690 -0.733271  -9.67442 -15.9058 18.0216  6.32654 5.98767 3.41399
## 2 5.84533 -2.214230 -12.85460 -19.9574 17.3894  6.11969 7.98925 1.07950
## 3 9.17413 -2.425720  -9.48653 -17.5797 18.6901  9.58027 5.83143 2.08171
## 4 8.02944 -3.644450 -12.77820 -19.6965 21.8319 10.70570 6.00047 5.04656
## 5 2.53529 -1.993170  -7.14319 -17.3004 14.1459  7.36393 5.16066 4.08381
## 6 8.71096 -1.058250 -12.28910 -19.4578 17.8304  7.96268 7.49455 2.73509
##       PC16     PC17    PC18      PC19      PC20      PC21      PC22      PC23
## 1 -7.16208  7.71709 4.41331 -2.754290  1.854080  2.129810  0.884887  1.293300
## 2 -6.41903  7.30044 5.32912  3.931020 -1.013790 -0.234323 -8.631420 -2.862340
## 3 -3.89952 11.34300 4.84956  2.312330 -3.679250 -2.301880 -6.299350 -0.498285
## 4 -4.11928 10.77560 8.72978  1.012840 -0.646801 -2.926890 -7.660640 -0.725622
## 5 -3.15545  6.72153 4.27435  0.619417 -0.920164 -3.943620 -5.897880 -3.275340
## 6 -9.46860 11.47150 4.60506 -2.486380  0.665044  3.262860 -2.738350  2.407820
##       PC24     PC25     PC26      PC27       PC28      PC29      PC30      PC31
## 1 -2.23555 3.980410 0.479427 -0.130636  0.0865897 -2.361130  0.557199  0.316978
## 2  2.34082 2.598980 0.134286  2.492170  4.3267900 -4.631260  1.770090 -0.939229
## 3  5.09459 0.749207 0.836163 -1.730430 -0.4442760 -2.515980 -0.942403  4.041180
## 4  3.40329 2.569730 4.961780 -4.176320 -0.1042140 -0.131276 -0.999407 -1.027160
## 5  1.23383 1.401020 2.345880  4.706740  3.5340900 -3.955960 -2.782230  2.525790
## 6  4.66316 4.781560 0.827205 -0.737351 -1.4653200 -3.554900  3.813600 -0.341777
##         PC32     PC33     PC34        PC35      PC36     PC37      PC38
## 1 -0.4444750 -3.15818 -2.83666  4.82432000 -0.997399  1.21471 -5.746860
## 2  3.5647700  1.54800 -3.88493  9.35334000 -1.017370 -6.12165  6.997830
## 3 -2.4151800 -1.41287  4.46832 -7.77748000 -1.808960  8.18154 -0.910979
## 4  0.0760186  3.24226  2.29609 -2.64337000  4.722200 -1.96688 -1.398860
## 5 -1.2545300  2.59171 -4.77755  0.00356112  5.917210 -4.51056 -3.554590
## 6 -1.9315000  2.22768 -1.22077 -2.63133000  0.270552  6.88953 -4.599620
##         PC39       PC40      PC41      PC42       PC43     PC44      PC45
## 1 -0.0614424 -10.545000  7.883870  6.857250 -3.2807000  2.47440 16.497700
## 2 -3.3119900   0.578622  2.470320 -6.460780 -4.1464900  2.14022 -7.894510
## 3  4.8478600   1.088610 -3.357980 -0.298669  0.0652313 -1.38465 -0.620771
## 4 -0.8465660   9.940250 -8.551070 -4.867380  5.7667200 -7.29192 -7.406230
## 5 -3.6516100  -4.192260 10.120500 -1.745520 -0.0524046  4.05158 12.631500
## 6  0.9997290  -2.663760  0.210486  3.010410 -3.9270600  3.11524  3.306900
##       PC46      PC47     PC48     PC49      PC50      PC51      PC52       PC53
## 1  8.22976   6.59451  8.11986  5.01540  -4.40843 -0.740971 -15.62990   9.878280
## 2  3.88213  -6.63139 -3.31186 -9.77056   8.51794 -2.449940   7.63387   4.434340
## 3 -8.34699 -11.30220 -7.68358 12.71820 -20.46040 -2.483860 -21.09400 -21.488000
## 4 -7.00947   6.08540 -7.22786 -7.66642  11.90510  2.748040  16.59100 -10.143900
## 5  5.70632  20.08740  3.50690 -4.87685  18.25830  5.788220  13.66970  16.963500
## 6  5.80395  -7.31314  2.43073 11.88560 -19.00440  0.178450 -13.99530   0.764454
##        PC54       PC55      PC56      PC57     PC58       PC59     PC60
## 1  -5.57121 -15.027800 -13.90860 -4.094810  4.21689 -10.382200 -4.29901
## 2   3.36386  -4.893800   2.43052  4.229120 -5.20076   1.254800 -8.31485
## 3   3.95256   6.789440   1.51764 -5.220900  8.97183   0.382508  7.51565
## 4   5.02958  18.559000  15.71960  3.405140 -6.72767   7.536590  9.78866
## 5 -15.53990   0.428366 -12.44660 -2.525550 -6.14423  -4.232770  6.26329
## 6   2.29198 -10.959400  -3.82909  0.495758  5.25124   3.543490 -6.27881
##          PC61       PC62      PC63      PC64       PC65       PC66      PC67
## 1 -1.09797000  -4.804900 -0.156622  0.683491  -2.260210   4.234910 -3.649060
## 2  0.00490617  10.205300 -0.668617 -8.417400  -1.931710   3.357220 -5.219050
## 3  1.20415000  -0.623453  0.974949 10.128200   4.281520 -10.625900  9.376110
## 4  3.38923000  -4.474070 -3.546520 -1.106340  -0.252152  -3.496220  1.515490
## 5 -1.17010000 -16.055000  0.699717 -2.160300   2.908200   2.509440 -0.379699
## 6  0.01844000   5.739770 -9.486850  2.533710 -10.111700  -0.978215 -6.956280
##        PC68      PC69      PC70      PC71      PC72      PC73      PC74
## 1 -1.574830 -5.243250 -1.403580 -12.63960  1.356470  -8.12799  0.859371
## 2 -0.278632  0.894293 -3.828950   5.62895 -1.293390   1.69540  4.779710
## 3  8.166690 -4.691220  3.184360  -9.13291 -0.734180   4.67734  2.345690
## 4  0.836239  8.973690 -6.029360   5.15928 -2.319940   4.61097 -1.591360
## 5 -2.314620 -3.092170 10.263800   3.76500 -4.196360 -18.39850 -3.893150
## 6  8.467670 -3.522660  0.544784 -13.40720 -0.114845   1.46456 12.050100
##        PC75       PC76     PC77     PC78     PC79     PC80     PC81      PC82
## 1  6.572720  -1.970600 -6.48878  2.12883 -8.44218  2.83437 10.09980 -6.658690
## 2  0.289025 -10.742600  9.31079  2.62550 -5.67173  0.65423 -9.82558  6.597310
## 3 -4.700500   0.240791 -9.46227 -1.55213  2.30165  5.40084  1.85246 -0.882662
## 4 -3.918090   4.068350 -4.40486 -2.23271 12.17960 -2.56363 -9.31355  0.260666
## 5  4.600820   9.808770  4.18361  1.28728 -7.80091 -5.49614 10.93040 -3.438530
## 6 -5.969230 -10.239100 -4.05508  2.52897 -6.26424 -6.24197  2.27204 -0.564218
##       PC83       PC84      PC85     PC86     PC87      PC88      PC89      PC90
## 1 -2.86887  14.001600  0.957331 -1.87941  4.64860  0.657698  0.399883  2.022860
## 2 -2.41667   0.050327  2.596270  1.67471 -2.48395 -1.396320 -4.257030 -2.291260
## 3  2.37558  -2.717030  0.846448  3.65477 -3.07108 -2.243730  3.539840 -0.099632
## 4  3.51192 -10.564200 -0.569494 -2.68905 -2.09208  5.139930  6.494860  5.454050
## 5 -2.26074  13.791700  3.296180 -5.23505  8.30755 -4.069930 -6.242270 -4.136490
## 6 -2.48147  10.155800  0.622089  4.60028 -1.74139  5.396000  2.946920 -2.739450
##        PC91      PC92      PC93      PC94      PC95     PC96      PC97
## 1 -7.621980  0.347251 -7.679630 -4.570580   6.94600  2.55569 -6.281100
## 2  6.124300 -6.053970  5.249970  9.215560   1.07701 -3.67124 -7.650710
## 3 -0.821424  6.222880 -5.285170 -1.756490  -2.14933 -1.85051  0.284107
## 4  0.316467 -1.141910 -3.056630 -1.073980 -10.11590  3.00487  5.355180
## 5  3.806260  0.314570  0.591955 -9.671800  14.58320  2.33464 -1.780890
## 6 -3.593550 -4.794020 -7.208490 -0.857269   1.25426 -2.20382 -2.009490
##        PC98      PC99     PC100     PC101     PC102     PC103     PC104
## 1 -10.40770 -2.110000 -9.424470 -1.839250  1.535390 -5.510170 -9.217160
## 2   1.88745 -0.834114 -2.570220  0.206347  4.293290  0.302222  1.592700
## 3   1.58134 -1.053580  6.435840  5.676030 -3.411820 -0.304258 -0.429984
## 4   8.50590  1.367650  0.608194  1.623170 -1.206740  7.056930  3.993230
## 5  -6.29561 -4.134710 -1.820040 -6.111940  0.997718  0.208238  4.110780
## 6  -2.58204  3.409730  0.152236  1.872320 -0.201032 -4.711020 -6.356940
##       PC105    PC106     PC107     PC108    PC109     PC110    PC111     PC112
## 1  6.079390  1.64507  5.043130 -1.280310 -5.12226  2.400870 -2.93166 -2.596930
## 2 -6.858940  4.49297 10.819200  1.935290  1.18597 -3.354460 -3.12687  5.176590
## 3  1.402610  5.83262 -8.318620 -0.867838  4.82371 -2.783900  0.98931 -0.873792
## 4  0.540892 -8.13902  2.633320  2.897030 -2.00770  4.063370  5.14743 -4.799910
## 5 -2.495470 -2.72289 -0.883392 -1.856300 -3.07880 -1.573520 -5.40047  3.153300
## 6  2.410270  3.76763  0.650305  5.733720 -1.65211 -0.793212 -1.72445 -6.884940
##        PC113    PC114     PC115     PC116     PC117     PC118     PC119
## 1  2.2578800  3.88597  2.867630  0.539158 -0.845930 -5.403780  2.784410
## 2 -4.3606400 -0.92686 -7.133760 -4.580270 -3.141370 -2.555780 -6.057470
## 3 -0.0760097  4.41340  2.063490  4.869730  1.861820  5.288130 -0.360741
## 4 -2.7829100 -2.09383  1.375930 -2.798820 -0.927025 -0.118288  8.700330
## 5  7.6067200  3.72398  2.286540  3.436420  1.227490  0.349492  0.649339
## 6 -0.9041910 -2.10562  0.943164 -0.286257 -4.915870 -1.528040  2.788780
##      PC120    PC121     PC122    PC123    PC124     PC125     PC126    PC127
## 1 -4.91608  2.93566 -8.418820  3.80704 -1.52167 -5.461930  2.234070 -2.08119
## 2 -5.82860  2.04339  7.237420  1.77367  5.72487  4.807260  0.455513 -4.55348
## 3  1.93994 -6.71502  5.527480 -1.24756 -6.20322 -2.739320 -1.555350  2.43503
## 4 -4.21144 -2.04098 -5.836880 -6.76454 -2.89093  0.808653  0.486359  3.37248
## 5  6.23956 -3.65780  9.455560  1.56276  2.35529 -6.507030 -3.885540 -1.85520
## 6 -6.83929  3.39949  0.274695 -3.51281 -1.19905  5.340580 -0.898033 -5.43983
##       PC128     PC129     PC130     PC131     PC132      PC133     PC134
## 1  -2.68327 -2.549630  9.401090  0.683652 -1.271010 -11.416600 -1.922070
## 2   9.75043 -0.917970 -0.372633  1.562830 -1.881060   4.285940  4.032880
## 3   2.38452 -5.374380 -3.196170 -2.384930  2.686380   4.526360 -2.830820
## 4 -11.87540  4.469200 -7.035340 -3.212530 -3.742870   8.108840  2.652020
## 5   4.62774  0.716657  1.085520  3.138260  0.723081  -5.909650  0.225585
## 6  -2.55966 -3.140090  3.077420 -0.500853  3.364680   0.357091  5.086970
##      PC135    PC136     PC137    PC138    PC139     PC140     PC141     PC142
## 1 -4.87730  2.26513  0.262335  4.62478  5.99020  4.339540  0.189673 -3.939540
## 2  8.17511  1.93598  5.514280  1.03692 -1.33778 -6.677280  1.741950  0.921766
## 3  4.24522 -5.15313  0.201977 -6.31506 -1.37553  1.129400 -5.531570  3.147510
## 4 -5.15336  2.21695 -2.338480 -5.61432 -8.98571 -1.644590  4.369480 -1.193880
## 5 -2.41895  3.90826  3.841930  4.90093 14.33500  0.260579 -5.722460  7.555040
## 6  2.18485 -3.69796  0.569709 -1.83564 -6.22962  5.775860 -4.806010 -6.111940
##       PC143     PC144     PC145     PC146     PC147    PC148     PC149    PC150
## 1 -0.699479 -2.504360 -2.254420 -3.395470   4.77453 -3.15509 -1.713990  3.44239
## 2 -2.513140 -0.844755 -3.618760 -3.942080   5.65364 -5.38484 -3.114800 -4.41353
## 3  5.840610 -1.219760 -8.340430  5.265660   3.93128  3.53840  5.863480  7.40169
## 4  2.002100 -1.773520  5.916870 -2.958120 -11.15000 -1.58413 -5.806020 -5.48468
## 5 -5.756550  4.389890  4.857280  6.174350  -4.38367 -1.28658  0.225397 -2.82186
## 6  4.348170  0.796501 -0.205014 -0.827709  -4.34729 -5.21046 -1.004390  4.42634
##       PC151    PC152     PC153     PC154    PC155    PC156     PC157     PC158
## 1  2.118480  4.28212 -0.317366 -1.457550  1.15876 -1.83845  0.677561  2.787140
## 2 -4.782600  4.04689 -0.657632  3.414950  3.59248  1.42896  0.855985  0.960586
## 3 -0.398366 -1.25384 -2.677050  0.337193 -4.41156 -3.83077 -0.899710  0.143709
## 4 -4.051750  3.49544  9.373580 -3.702670  2.50726 -3.59417 -3.470970 -2.842710
## 5  0.403745  1.09411 -5.127360 -6.036350 -5.27839  7.88031  0.981945 -4.104420
## 6  2.459710  1.82108  3.974530 -0.626101  2.53644 -6.06100 -0.463329  2.182540
##      PC159     PC160     PC161    PC162    PC163    PC164     PC165    PC166
## 1  2.61953  2.776970  6.499190 -3.17932 -1.26286 -5.81216 -3.177150 -1.86669
## 2 -2.04183 -3.528650 -5.698820 -4.64500 -4.59376 -5.62974  2.897230 -1.27826
## 3 -4.13686  0.359589  2.721880  2.89826  2.82899  7.24960  0.378805 -6.48326
## 4  2.79463 -1.328860 -0.184743  1.14260  1.50184  4.79097 -4.914690  8.68172
## 5  1.95661 -2.774440  1.031620 -0.31542  5.01982 -1.95284 -2.344740  8.01955
## 6 -2.27209 -2.122670 -2.977810 -1.96589  3.56997  3.67382  7.778470 -1.85858
##        PC167     PC168     PC169     PC170      PC171     PC172    PC173
## 1 -0.0803253  2.848890 -2.162710  4.967950 -3.9800100 -4.861580 -1.59772
## 2 -3.7661300 -3.614570 -4.534350 -0.843756 -5.4733700  7.792640 -1.93847
## 3 -0.6931030 -1.764910 -1.017960 -3.551750 -2.6722700 -0.845494 -4.76455
## 4  3.4457900 -4.300140  5.789650 -0.429008  0.0647389 -7.637510 -1.81966
## 5 -0.7328930  0.499075 -0.141274  0.987371 14.8210000  5.370970  1.26637
## 6  4.7712000 -0.928789 -2.991940 -4.036390 -5.7398300 -5.186370 -3.25269
##      PC174     PC175     PC176     PC177     PC178     PC179     PC180
## 1  9.62203  3.794880  7.310290  1.446480  3.169850  2.729350 -5.314270
## 2 -2.25811 14.894800 -5.213880 -4.870530 -2.762820  4.817930  3.516820
## 3  5.99955  2.589690 -1.611290  7.348660 -0.456571  2.428020  4.821800
## 4 -7.29503 -9.533150  0.415911 -8.096380  2.315460 -2.030240 -4.813670
## 5 -4.08710 -4.051960  2.816590  0.522548  5.103060  0.550644 -0.870163
## 6 -4.15292 -0.687135 -0.908193 -0.298223  2.353910  2.230670  3.742080
##        PC181    PC182     PC183      PC184     PC185    PC186     PC187
## 1  3.4773000  4.92234 -0.658278  4.7902500  0.790756  5.81346 -1.153170
## 2  0.0253924 -6.73912 -0.494408  1.9940900 -4.615020  6.09937 -0.101715
## 3 -5.7484900 -2.87637  2.330840 -2.9863900  2.284260  5.20689 -2.454970
## 4 10.4513000  7.54592  3.610410  6.0622700  3.310300 -6.91275  1.656040
## 5 -1.4731600  1.22017 -6.339070 -7.6392100 -4.212780  1.59322  0.115176
## 6 -1.2248000 -2.79284 -2.296190  0.0286201 -3.590650  3.76164  1.137250
##         PC188     PC189    PC190     PC191     PC192      PC193    PC194
## 1   4.5866100  0.486935  3.98583   1.18599 -0.239188 -7.5906200 -1.54424
## 2 -10.3407000 -2.671190  4.26308  -3.43792  2.857570  1.0492400 -3.08940
## 3   0.1671920 -6.557360 -9.36124  10.18680  1.382550  0.0706454 -4.30834
## 4   4.4596000  1.430970  3.55802 -11.40820  0.961035 -0.1869930 10.67490
## 5  -7.9391000 -3.556150 -2.03019  -0.64403 -6.336980 12.7351000  2.46297
## 6   0.0759437  3.128770  2.74974  -8.75257 -3.094980 -6.5699700  4.72646
##       PC195     PC196    PC197     PC198     PC199       PC200     PC201
## 1  0.910948 -8.042340  5.23727  8.172020   1.74364   7.4632400  10.90750
## 2 -5.426750  0.130371  7.72826 -6.550310 -11.48980 -13.9436000  -8.64064
## 3  6.408280  4.563530 -4.61863 -6.578520   5.92747   0.0228253 -10.26530
## 4  1.673040  5.761970 -4.43693  0.321186   1.10595  -4.3009700   8.66860
## 5  1.472540 -5.483900 -6.46014  5.276940  -3.83060  -4.6839400   1.42336
## 6  3.555810  6.871120 -4.15241 -8.280110   1.35386   4.7200800  -9.43587
##        PC202     PC203    PC204     PC205     PC206     PC207     PC208
## 1 -2.2669100  3.741520 -6.90213  1.521190  0.548047   1.04816  7.438350
## 2  1.6462600  9.763130 -2.57959 -2.844000 -1.215910  -2.81616 -3.145650
## 3 -0.0215309 -6.618470  6.54731  9.394450 -3.523440 -10.66750  6.256250
## 4 -6.2581700 -0.318148 -5.49795 -6.739860  8.748370   5.30643 -0.278222
## 5 -1.1527600  4.195740  3.75332  8.709440  0.724445   1.80931 -4.247090
## 6 -1.9675900 -2.659990  2.69489 -0.196236 -1.345850  -1.33835 -3.777190
##        PC209     PC210     PC211    PC212     PC213     PC214     PC215
## 1   1.079830  0.804993   5.95026 -1.61042  3.547410 -0.497208  -2.09902
## 2  -3.828520 -3.319220  -3.68087  3.30502  0.411716  1.725200  -9.77083
## 3   3.878940  6.145040   6.15633  4.76610  2.528460  1.723470  11.92680
## 4  -3.133370 -0.159137  -1.99040  1.98730 -3.919200 -1.113520 -12.08330
## 5 -10.947600  2.338610 -11.36970  1.54996 -2.101040  2.795130   6.68741
## 6   0.730206  1.055790   4.48507 -3.09285 -3.694160  2.946190  -3.76150
##       PC216     PC217    PC218    PC219     PC220     PC221      PC222
## 1  -1.93799  2.137280  1.49136 -6.35979 -1.999650  6.460560 -10.585700
## 2  10.25870 -0.681566 -8.56728 -2.77335  0.664778  7.269180  -5.625950
## 3  -1.91891 -4.036690 -5.57953  4.47943 -3.644230 -1.642510   0.177161
## 4   0.40950 -2.822450  1.48543 -6.64756  3.153860 -0.323252   1.453790
## 5 -11.59720 11.862400  5.37617  6.33545 -6.907660 -4.249690  -3.328230
## 6  -1.94337 -4.715090 -1.19449  3.98731  7.462940 -8.265040   7.105690
##       PC223    PC224    PC225      PC226      PC227      PC228     PC229
## 1   6.26030  5.75470 -6.84284  11.715700 -5.6183400  0.2430480 -0.704013
## 2  17.91000 -7.41692 23.08070   0.976779 -5.1448300 -2.3510100 -0.273485
## 3 -14.50860  6.04174 -3.54294   1.507420  2.9864500 -0.8730820  4.071330
## 4  -2.47360 -5.11154 -2.27466  -2.801570 -2.3035100 -0.0670175 -9.115460
## 5  -1.17638  2.57996 -9.42025  -1.487250  6.2886600  3.8361400  0.896267
## 6  -3.81264 -3.02251 -3.97208 -11.323300  0.0969562  5.2164200 -1.921160
##      PC230     PC231     PC232     PC233     PC234     PC235    PC236     PC237
## 1 -6.11467 -2.251420  6.120510 -2.641430  1.507590  -4.00376 -6.11933 -0.933163
## 2 -4.48263  1.855160  5.160780  5.599340 -4.713290   6.16179 -2.42093 -2.878430
## 3  7.98041 -0.923137 -0.136223  0.705681 -6.179890 -10.17500 -1.47485  3.840300
## 4  0.81340 -1.179330  0.886945 -2.777520  4.520280  -1.01872  7.61043 13.094500
## 5  4.38866  3.506720 -8.013210 -2.897140 -1.386990   1.37174  6.54433  3.476010
## 6 -2.58047 -1.612530  6.915120 -4.707690 -0.449379   5.55076 10.31380  0.237316
##       PC238     PC239    PC240      PC241    PC242     PC243      PC244
## 1  21.10460 -15.26040  2.19886 -0.0960591 -3.26182 -18.25860  -4.793180
## 2 -14.65680  11.68660 -2.92870 -6.7929100 -3.74096  -3.30562   0.578432
## 3 -12.73490   1.53312  8.05155  1.6704700 -9.82794  -3.79779  -5.898550
## 4   6.76285   6.71659 -2.03321  7.6123200  9.25779   6.91597 -10.802400
## 5 -17.45570   5.53179  1.85606  0.4529880  3.55132  21.33230  -6.126410
## 6  -2.81911   6.30187 -3.38643 11.3769000 26.88920   5.01527   6.718160
##        PC245     PC246      PC247    PC248      PC249     PC250     PC251
## 1   9.825920  0.368498 -10.421100 -3.01939   4.934450  5.364350   3.21276
## 2   0.249513 -3.860660  -6.924750 -7.77055   6.010400  1.173970   9.52594
## 3  -1.940680 -1.843600   0.517222 11.53290  12.955700  8.033140  23.63580
## 4  13.227400 -5.330850  -1.493860  7.58503  -3.396850  8.558150  14.18900
## 5 -12.008000  2.958860   3.197090  7.69150   0.490028  5.073940   4.34951
## 6 -14.440200 -1.294670   0.980852  3.58781 -12.775300 -0.912472 -22.90500
##      PC252     PC253      PC254     PC255    PC256      PC257     PC258
## 1 18.37280   6.08839  -5.178480  -1.77009 -1.49813   0.215559 13.878100
## 2 -1.37248   1.89693 -18.117400   3.58664 -9.25055 -17.999100  9.275970
## 3 -5.39199 -10.17820  -9.378100 -11.21190  4.04999  -5.659700  7.364760
## 4 13.28860   0.77435  -0.941677 -10.06840  3.86081   4.607560  6.790750
## 5 -3.71363  -3.86821  -8.100050  -7.65747 -4.77171   6.838140  0.779761
## 6  5.60337  10.07590 -14.518000   1.70536 -8.48897   6.001850 -2.653630
##      PC259    PC260    PC261    PC262     PC263    PC264     PC265     PC266
## 1 1.366520 -1.67545 12.12620 -2.73072  7.122350  5.61174 -0.369992 -3.337640
## 2 3.253030 -0.52109 11.01710  1.41546 -1.347250 -4.68588 -5.470220 -0.847408
## 3 0.240385  7.00778  3.52365 -1.66673 -1.377640 -7.34325 -6.123390 -7.495310
## 4 2.596990 -1.69915 11.56710  5.40263  8.225790  5.23127  2.497470 -1.212910
## 5 1.670470 -2.51541  3.88925  2.05927  0.447871 -2.91688  2.134180  2.218940
## 6 1.779740 -2.98839 -6.96636  1.62252 -5.128720  2.32212  4.330380  6.185210
##       PC267     PC268      PC269     PC270    PC271     PC272    PC273    PC274
## 1  0.613616   5.68206  2.3816700  1.196100 -8.22209 -4.639850  1.34193  7.72916
## 2 -5.298480   5.02221  0.3060520  0.410279 -1.12968 -3.351980 -1.24930 -1.67022
## 3  0.169219   6.72680  1.2057500 -3.196180  2.88484 -6.311270 -1.33891 -3.01090
## 4  1.645960   2.84261 -0.0118685 -0.859617 -3.34874  0.263232  2.95285  2.27743
## 5  0.729482   3.56651  0.1658270 -2.992770  1.79006  0.343371 -3.37505 -3.23617
## 6  2.918850 -10.56080 -5.3668600 -3.750360  7.57666  6.797100 -1.14310 -7.39161
##       PC275     PC276      PC277       PC278     PC279     PC280     PC281
## 1 -1.567000 -0.541626 -2.3688300  2.31419000  4.288540 -0.221416  2.291920
## 2  0.699928 -2.999690 -0.5837480 -2.83002000  1.545390  1.294580  1.069000
## 3  0.200851 -0.670598 -0.0464095  0.00465983  1.196600 -0.930999  3.447720
## 4 -2.834220  0.707821  3.9734100  3.71178000  2.108120 -0.984043  0.493455
## 5  1.411270  3.746540  0.4366800  1.48215000 -0.988513 -2.537130  1.234670
## 6 -3.469010  5.891030  3.8895800 -2.91172000  2.393080 -0.382463 -2.931400
##       PC282     PC283     PC284    PC285     PC286     PC287     PC288
## 1  1.003570 -1.175530 -1.824410  5.13454  2.423280 -3.702960 -1.284480
## 2  2.713420  0.303270 -0.383434 -1.57956  1.351580 -0.952356 -0.811361
## 3  3.383290 -1.118160 -2.668420 -1.04655  0.701324  0.058565 -0.758462
## 4  1.375720 -1.235870 -1.625840  5.27569  0.522691 -4.968060  0.197759
## 5  2.055850 -0.958846 -1.070320 -1.32984 -0.462819 -1.993750  1.911980
## 6 -0.886907 -0.966520  4.639050 -5.32699  2.417190  1.671340  2.014800
##       PC289     PC290     PC291     PC292     PC293      PC294     PC295
## 1  1.836550 -1.957090  0.530749  0.508176  0.636557 -1.4038200 -0.248397
## 2 -0.594592 -1.720770 -0.268210  0.288945  2.037530 -2.3017200  2.315090
## 3 -0.708405  2.430020 -2.235310 -0.810539 -0.489244 -0.5221430  1.293830
## 4  1.670870 -1.281180  1.018950 -2.859150 -1.072850 -0.0479585 -3.677910
## 5  0.898994 -0.508513  1.531620 -1.622450 -0.550698  2.2110100 -1.993760
## 6  2.775520 -0.158644  1.099160 -1.171090 -3.326430 -0.5605280  2.982730
##        PC296     PC297      PC298     PC299     PC300     PC301      PC302
## 1 -0.0467915 -0.654160 -2.0625000  0.240649 -0.588341  0.295810 -0.0316656
## 2  1.8139700  0.779074  1.5598200 -1.556570 -0.529166 -0.825035 -0.3991970
## 3  0.2042640 -1.983090  0.8547860  0.188940  0.952455 -1.293090  0.4155780
## 4 -1.0536000  1.223620 -0.1958010 -0.205820 -1.186100  0.895926  0.9182580
## 5 -0.5999970  0.708073 -0.0893639  0.430328  0.908237  0.879889 -1.3128300
## 6  0.5782380 -1.207890  0.8823800 -0.102105  1.834670  1.117060 -0.7175880
##        PC303      PC304     PC305     PC306     PC307     PC308      PC309
## 1 -0.1808410 -0.0681759  0.857804  0.903926  0.147160 -0.205308 -0.6859770
## 2  2.6869200  0.8234650 -0.634279 -0.204301  0.413023  0.684150 -0.7299410
## 3 -1.5163100 -0.6338990 -0.109502 -0.826637 -0.450889  0.641502  0.6849520
## 4 -0.0282513 -0.9098190  1.934460  0.766758 -1.479430 -0.584326 -0.7959710
## 5  0.7944070 -0.7262600 -0.943115 -1.985560  0.300072 -1.724090  0.0472941
## 6  0.7691120  2.1418600 -0.130876 -1.041520  0.215288 -1.160540  0.4151860
##       PC310     PC311     PC312     PC313     PC314      PC315      PC316
## 1 -0.571153 -1.214920 -1.229820 -0.739570 -0.612735 -0.3053200  0.2118910
## 2 -0.394852  1.681880 -1.950200  0.699761 -0.967610  1.7095700 -0.0358667
## 3 -0.625501 -0.696387 -2.302250  0.224089  0.215718 -0.5012200 -1.0750800
## 4  0.374613 -2.571250 -0.825077 -0.727066 -0.606653 -1.7053100 -2.0381000
## 5 -0.173505 -0.443516 -0.093964 -0.639347  0.511583  0.4607520 -0.3114920
## 6  0.134549  0.454952 -0.129294  0.774373 -1.011360 -0.0309106 -0.1467060
##        PC317       PC318     PC319      PC320     PC321     PC322      PC323
## 1 -1.2652000 -0.00506833  1.342590  0.8060470 -0.132721 -1.334280 -0.5696410
## 2 -0.4431830  0.71761600  0.373968 -0.3651000  1.493980  1.019030  0.1125780
## 3  0.9750040 -0.14353200 -0.372653 -0.3065110  1.438280 -0.585709 -0.0957389
## 4 -0.4231130  0.23121100  0.394054  0.4501970  1.270710 -0.501647 -0.2829260
## 5 -0.0134821 -0.05963560  0.595172 -0.6823740  0.412990  0.442514  0.1327450
## 6 -0.6708910  0.11538300 -1.251810 -0.0822363 -1.569170 -0.440656  1.0226600
##       PC324      PC325      PC326      PC327      PC328      PC329      PC330
## 1  0.438355  0.0776800  0.0495142 -0.0506694 -0.0149687 -0.0430816  0.1677090
## 2  0.282173  0.2536410  0.4902050 -0.3234080 -0.1826820 -0.3846510 -0.1355330
## 3  0.524582  0.1188800  0.0529997  0.2741200 -0.3504540 -0.1118490 -0.2768630
## 4 -0.380963  0.4693500 -0.0589738  0.5795380  0.3944940 -0.0561675  0.2645650
## 5  0.618017 -0.2050030 -0.2749630  0.0557564 -0.1506440 -0.1818220  0.3267600
## 6 -0.152357 -0.0620448  0.1298630 -0.3807810 -0.1834630  0.1412770  0.0468199
##         PC331 Individual  Pop_City Country Latitude Longitude Continent Year
## 1 1.57436e-06        282 Barcelona   Spain  41.3851    2.1734    Europe 2018
## 2 1.57436e-06        283 Barcelona   Spain  41.3851    2.1734    Europe 2018
## 3 1.57436e-06        280 Barcelona   Spain  41.3851    2.1734    Europe 2018
## 4 1.57436e-06        279 Barcelona   Spain  41.3851    2.1734    Europe 2018
## 5 1.57436e-06        286 Barcelona   Spain  41.3851    2.1734    Europe 2018
## 6 1.57436e-06        287 Barcelona   Spain  41.3851    2.1734    Europe 2018
##            Region   Subregion order order2 orderold
## 1 Southern Europe West Europe    16      8        8
## 2 Southern Europe West Europe    16      8        8
## 3 Southern Europe West Europe    16      8        8
## 4 Southern Europe West Europe    16      8        8
## 5 Southern Europe West Europe    16      8        8
## 6 Southern Europe West Europe    16      8        8

6.5.2 Create PCA plots for Iberian peninsual + US + native range

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_americas_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

6.5.3 PC1 and PC1 plot for Iberian peninsula + Americas + native range

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_americas_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

6.5.4 With individual pops named

sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_US_2.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_US_2.rds"))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_US_2.rds"))
head(sampling_loc)
##     Pop_City           Location  Latitude  Longitude Continent Abbreviation
## 1 Berlin, NJ                USA  39.79081 -74.929100  Americas          BER
## 2 Palm Beach                USA  26.70560 -80.036400  Americas          PAL
## 3 Recife, PE             Brazil  -8.05882 -34.878100  Americas          REC
## 4   Gravatai             Brazil -29.93760 -50.990700  Americas          GRV
## 5   Penafiel Penafiel, Portugal  41.18555  -8.329371    Europe          POP
## 6      Loule    Loule, Portugal  37.09084  -8.092465    Europe          POL
##   Year          Region   Subregion order order2 orderold
## 1 2018   North America                 1     NA       75
## 2 2018   North America                 3     NA       77
## 3 2017   South America                 7     NA       81
## 4 2018   South America                 8     NA       82
## 5 2017 Southern Europe West Europe    11      3        3
## 6 2017 Southern Europe West Europe    12      4        4

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 39 Levels: BAR BEN BER CAM CHA GEL GRV HAI HAN HOC HUN INJ INW JAF KAC ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 39

Check the regions

unique(sampling_loc$Region)
## [1] "North America"   "South America"   "Southern Europe" "East Asia"      
## [5] "South Asia"      "Southeast Asia"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population     PC1       PC2     PC3     PC4      PC5      PC6     PC7
## 1        BAR 25.5499  2.001850 32.4190 7.67455 -25.4593 -4.67251 13.7777
## 2        BAR 25.4107  0.354687 31.4114 5.91305 -20.6103 -7.52188 21.6435
## 3        BAR 21.9004 -1.111830 31.3627 5.46695 -22.3339 -6.51403 19.6255
## 4        BAR 22.5679  1.277120 33.8145 6.21831 -25.4711 -6.89546 20.8575
## 5        BAR 24.4462 -1.403420 29.1115 6.30322 -19.0357 -5.08700 14.6359
## 6        BAR 25.3203  0.761056 34.6690 7.37388 -25.1849 -6.13724 17.6580
##       PC8       PC9      PC10     PC11    PC12     PC13    PC14    PC15
## 1 3.40690 -0.733271  -9.67442 -15.9058 18.0216  6.32654 5.98767 3.41399
## 2 5.84533 -2.214230 -12.85460 -19.9574 17.3894  6.11969 7.98925 1.07950
## 3 9.17413 -2.425720  -9.48653 -17.5797 18.6901  9.58027 5.83143 2.08171
## 4 8.02944 -3.644450 -12.77820 -19.6965 21.8319 10.70570 6.00047 5.04656
## 5 2.53529 -1.993170  -7.14319 -17.3004 14.1459  7.36393 5.16066 4.08381
## 6 8.71096 -1.058250 -12.28910 -19.4578 17.8304  7.96268 7.49455 2.73509
##       PC16     PC17    PC18      PC19      PC20      PC21      PC22      PC23
## 1 -7.16208  7.71709 4.41331 -2.754290  1.854080  2.129810  0.884887  1.293300
## 2 -6.41903  7.30044 5.32912  3.931020 -1.013790 -0.234323 -8.631420 -2.862340
## 3 -3.89952 11.34300 4.84956  2.312330 -3.679250 -2.301880 -6.299350 -0.498285
## 4 -4.11928 10.77560 8.72978  1.012840 -0.646801 -2.926890 -7.660640 -0.725622
## 5 -3.15545  6.72153 4.27435  0.619417 -0.920164 -3.943620 -5.897880 -3.275340
## 6 -9.46860 11.47150 4.60506 -2.486380  0.665044  3.262860 -2.738350  2.407820
##       PC24     PC25     PC26      PC27       PC28      PC29      PC30      PC31
## 1 -2.23555 3.980410 0.479427 -0.130636  0.0865897 -2.361130  0.557199  0.316978
## 2  2.34082 2.598980 0.134286  2.492170  4.3267900 -4.631260  1.770090 -0.939229
## 3  5.09459 0.749207 0.836163 -1.730430 -0.4442760 -2.515980 -0.942403  4.041180
## 4  3.40329 2.569730 4.961780 -4.176320 -0.1042140 -0.131276 -0.999407 -1.027160
## 5  1.23383 1.401020 2.345880  4.706740  3.5340900 -3.955960 -2.782230  2.525790
## 6  4.66316 4.781560 0.827205 -0.737351 -1.4653200 -3.554900  3.813600 -0.341777
##         PC32     PC33     PC34        PC35      PC36     PC37      PC38
## 1 -0.4444750 -3.15818 -2.83666  4.82432000 -0.997399  1.21471 -5.746860
## 2  3.5647700  1.54800 -3.88493  9.35334000 -1.017370 -6.12165  6.997830
## 3 -2.4151800 -1.41287  4.46832 -7.77748000 -1.808960  8.18154 -0.910979
## 4  0.0760186  3.24226  2.29609 -2.64337000  4.722200 -1.96688 -1.398860
## 5 -1.2545300  2.59171 -4.77755  0.00356112  5.917210 -4.51056 -3.554590
## 6 -1.9315000  2.22768 -1.22077 -2.63133000  0.270552  6.88953 -4.599620
##         PC39       PC40      PC41      PC42       PC43     PC44      PC45
## 1 -0.0614424 -10.545000  7.883870  6.857250 -3.2807000  2.47440 16.497700
## 2 -3.3119900   0.578622  2.470320 -6.460780 -4.1464900  2.14022 -7.894510
## 3  4.8478600   1.088610 -3.357980 -0.298669  0.0652313 -1.38465 -0.620771
## 4 -0.8465660   9.940250 -8.551070 -4.867380  5.7667200 -7.29192 -7.406230
## 5 -3.6516100  -4.192260 10.120500 -1.745520 -0.0524046  4.05158 12.631500
## 6  0.9997290  -2.663760  0.210486  3.010410 -3.9270600  3.11524  3.306900
##       PC46      PC47     PC48     PC49      PC50      PC51      PC52       PC53
## 1  8.22976   6.59451  8.11986  5.01540  -4.40843 -0.740971 -15.62990   9.878280
## 2  3.88213  -6.63139 -3.31186 -9.77056   8.51794 -2.449940   7.63387   4.434340
## 3 -8.34699 -11.30220 -7.68358 12.71820 -20.46040 -2.483860 -21.09400 -21.488000
## 4 -7.00947   6.08540 -7.22786 -7.66642  11.90510  2.748040  16.59100 -10.143900
## 5  5.70632  20.08740  3.50690 -4.87685  18.25830  5.788220  13.66970  16.963500
## 6  5.80395  -7.31314  2.43073 11.88560 -19.00440  0.178450 -13.99530   0.764454
##        PC54       PC55      PC56      PC57     PC58       PC59     PC60
## 1  -5.57121 -15.027800 -13.90860 -4.094810  4.21689 -10.382200 -4.29901
## 2   3.36386  -4.893800   2.43052  4.229120 -5.20076   1.254800 -8.31485
## 3   3.95256   6.789440   1.51764 -5.220900  8.97183   0.382508  7.51565
## 4   5.02958  18.559000  15.71960  3.405140 -6.72767   7.536590  9.78866
## 5 -15.53990   0.428366 -12.44660 -2.525550 -6.14423  -4.232770  6.26329
## 6   2.29198 -10.959400  -3.82909  0.495758  5.25124   3.543490 -6.27881
##          PC61       PC62      PC63      PC64       PC65       PC66      PC67
## 1 -1.09797000  -4.804900 -0.156622  0.683491  -2.260210   4.234910 -3.649060
## 2  0.00490617  10.205300 -0.668617 -8.417400  -1.931710   3.357220 -5.219050
## 3  1.20415000  -0.623453  0.974949 10.128200   4.281520 -10.625900  9.376110
## 4  3.38923000  -4.474070 -3.546520 -1.106340  -0.252152  -3.496220  1.515490
## 5 -1.17010000 -16.055000  0.699717 -2.160300   2.908200   2.509440 -0.379699
## 6  0.01844000   5.739770 -9.486850  2.533710 -10.111700  -0.978215 -6.956280
##        PC68      PC69      PC70      PC71      PC72      PC73      PC74
## 1 -1.574830 -5.243250 -1.403580 -12.63960  1.356470  -8.12799  0.859371
## 2 -0.278632  0.894293 -3.828950   5.62895 -1.293390   1.69540  4.779710
## 3  8.166690 -4.691220  3.184360  -9.13291 -0.734180   4.67734  2.345690
## 4  0.836239  8.973690 -6.029360   5.15928 -2.319940   4.61097 -1.591360
## 5 -2.314620 -3.092170 10.263800   3.76500 -4.196360 -18.39850 -3.893150
## 6  8.467670 -3.522660  0.544784 -13.40720 -0.114845   1.46456 12.050100
##        PC75       PC76     PC77     PC78     PC79     PC80     PC81      PC82
## 1  6.572720  -1.970600 -6.48878  2.12883 -8.44218  2.83437 10.09980 -6.658690
## 2  0.289025 -10.742600  9.31079  2.62550 -5.67173  0.65423 -9.82558  6.597310
## 3 -4.700500   0.240791 -9.46227 -1.55213  2.30165  5.40084  1.85246 -0.882662
## 4 -3.918090   4.068350 -4.40486 -2.23271 12.17960 -2.56363 -9.31355  0.260666
## 5  4.600820   9.808770  4.18361  1.28728 -7.80091 -5.49614 10.93040 -3.438530
## 6 -5.969230 -10.239100 -4.05508  2.52897 -6.26424 -6.24197  2.27204 -0.564218
##       PC83       PC84      PC85     PC86     PC87      PC88      PC89      PC90
## 1 -2.86887  14.001600  0.957331 -1.87941  4.64860  0.657698  0.399883  2.022860
## 2 -2.41667   0.050327  2.596270  1.67471 -2.48395 -1.396320 -4.257030 -2.291260
## 3  2.37558  -2.717030  0.846448  3.65477 -3.07108 -2.243730  3.539840 -0.099632
## 4  3.51192 -10.564200 -0.569494 -2.68905 -2.09208  5.139930  6.494860  5.454050
## 5 -2.26074  13.791700  3.296180 -5.23505  8.30755 -4.069930 -6.242270 -4.136490
## 6 -2.48147  10.155800  0.622089  4.60028 -1.74139  5.396000  2.946920 -2.739450
##        PC91      PC92      PC93      PC94      PC95     PC96      PC97
## 1 -7.621980  0.347251 -7.679630 -4.570580   6.94600  2.55569 -6.281100
## 2  6.124300 -6.053970  5.249970  9.215560   1.07701 -3.67124 -7.650710
## 3 -0.821424  6.222880 -5.285170 -1.756490  -2.14933 -1.85051  0.284107
## 4  0.316467 -1.141910 -3.056630 -1.073980 -10.11590  3.00487  5.355180
## 5  3.806260  0.314570  0.591955 -9.671800  14.58320  2.33464 -1.780890
## 6 -3.593550 -4.794020 -7.208490 -0.857269   1.25426 -2.20382 -2.009490
##        PC98      PC99     PC100     PC101     PC102     PC103     PC104
## 1 -10.40770 -2.110000 -9.424470 -1.839250  1.535390 -5.510170 -9.217160
## 2   1.88745 -0.834114 -2.570220  0.206347  4.293290  0.302222  1.592700
## 3   1.58134 -1.053580  6.435840  5.676030 -3.411820 -0.304258 -0.429984
## 4   8.50590  1.367650  0.608194  1.623170 -1.206740  7.056930  3.993230
## 5  -6.29561 -4.134710 -1.820040 -6.111940  0.997718  0.208238  4.110780
## 6  -2.58204  3.409730  0.152236  1.872320 -0.201032 -4.711020 -6.356940
##       PC105    PC106     PC107     PC108    PC109     PC110    PC111     PC112
## 1  6.079390  1.64507  5.043130 -1.280310 -5.12226  2.400870 -2.93166 -2.596930
## 2 -6.858940  4.49297 10.819200  1.935290  1.18597 -3.354460 -3.12687  5.176590
## 3  1.402610  5.83262 -8.318620 -0.867838  4.82371 -2.783900  0.98931 -0.873792
## 4  0.540892 -8.13902  2.633320  2.897030 -2.00770  4.063370  5.14743 -4.799910
## 5 -2.495470 -2.72289 -0.883392 -1.856300 -3.07880 -1.573520 -5.40047  3.153300
## 6  2.410270  3.76763  0.650305  5.733720 -1.65211 -0.793212 -1.72445 -6.884940
##        PC113    PC114     PC115     PC116     PC117     PC118     PC119
## 1  2.2578800  3.88597  2.867630  0.539158 -0.845930 -5.403780  2.784410
## 2 -4.3606400 -0.92686 -7.133760 -4.580270 -3.141370 -2.555780 -6.057470
## 3 -0.0760097  4.41340  2.063490  4.869730  1.861820  5.288130 -0.360741
## 4 -2.7829100 -2.09383  1.375930 -2.798820 -0.927025 -0.118288  8.700330
## 5  7.6067200  3.72398  2.286540  3.436420  1.227490  0.349492  0.649339
## 6 -0.9041910 -2.10562  0.943164 -0.286257 -4.915870 -1.528040  2.788780
##      PC120    PC121     PC122    PC123    PC124     PC125     PC126    PC127
## 1 -4.91608  2.93566 -8.418820  3.80704 -1.52167 -5.461930  2.234070 -2.08119
## 2 -5.82860  2.04339  7.237420  1.77367  5.72487  4.807260  0.455513 -4.55348
## 3  1.93994 -6.71502  5.527480 -1.24756 -6.20322 -2.739320 -1.555350  2.43503
## 4 -4.21144 -2.04098 -5.836880 -6.76454 -2.89093  0.808653  0.486359  3.37248
## 5  6.23956 -3.65780  9.455560  1.56276  2.35529 -6.507030 -3.885540 -1.85520
## 6 -6.83929  3.39949  0.274695 -3.51281 -1.19905  5.340580 -0.898033 -5.43983
##       PC128     PC129     PC130     PC131     PC132      PC133     PC134
## 1  -2.68327 -2.549630  9.401090  0.683652 -1.271010 -11.416600 -1.922070
## 2   9.75043 -0.917970 -0.372633  1.562830 -1.881060   4.285940  4.032880
## 3   2.38452 -5.374380 -3.196170 -2.384930  2.686380   4.526360 -2.830820
## 4 -11.87540  4.469200 -7.035340 -3.212530 -3.742870   8.108840  2.652020
## 5   4.62774  0.716657  1.085520  3.138260  0.723081  -5.909650  0.225585
## 6  -2.55966 -3.140090  3.077420 -0.500853  3.364680   0.357091  5.086970
##      PC135    PC136     PC137    PC138    PC139     PC140     PC141     PC142
## 1 -4.87730  2.26513  0.262335  4.62478  5.99020  4.339540  0.189673 -3.939540
## 2  8.17511  1.93598  5.514280  1.03692 -1.33778 -6.677280  1.741950  0.921766
## 3  4.24522 -5.15313  0.201977 -6.31506 -1.37553  1.129400 -5.531570  3.147510
## 4 -5.15336  2.21695 -2.338480 -5.61432 -8.98571 -1.644590  4.369480 -1.193880
## 5 -2.41895  3.90826  3.841930  4.90093 14.33500  0.260579 -5.722460  7.555040
## 6  2.18485 -3.69796  0.569709 -1.83564 -6.22962  5.775860 -4.806010 -6.111940
##       PC143     PC144     PC145     PC146     PC147    PC148     PC149    PC150
## 1 -0.699479 -2.504360 -2.254420 -3.395470   4.77453 -3.15509 -1.713990  3.44239
## 2 -2.513140 -0.844755 -3.618760 -3.942080   5.65364 -5.38484 -3.114800 -4.41353
## 3  5.840610 -1.219760 -8.340430  5.265660   3.93128  3.53840  5.863480  7.40169
## 4  2.002100 -1.773520  5.916870 -2.958120 -11.15000 -1.58413 -5.806020 -5.48468
## 5 -5.756550  4.389890  4.857280  6.174350  -4.38367 -1.28658  0.225397 -2.82186
## 6  4.348170  0.796501 -0.205014 -0.827709  -4.34729 -5.21046 -1.004390  4.42634
##       PC151    PC152     PC153     PC154    PC155    PC156     PC157     PC158
## 1  2.118480  4.28212 -0.317366 -1.457550  1.15876 -1.83845  0.677561  2.787140
## 2 -4.782600  4.04689 -0.657632  3.414950  3.59248  1.42896  0.855985  0.960586
## 3 -0.398366 -1.25384 -2.677050  0.337193 -4.41156 -3.83077 -0.899710  0.143709
## 4 -4.051750  3.49544  9.373580 -3.702670  2.50726 -3.59417 -3.470970 -2.842710
## 5  0.403745  1.09411 -5.127360 -6.036350 -5.27839  7.88031  0.981945 -4.104420
## 6  2.459710  1.82108  3.974530 -0.626101  2.53644 -6.06100 -0.463329  2.182540
##      PC159     PC160     PC161    PC162    PC163    PC164     PC165    PC166
## 1  2.61953  2.776970  6.499190 -3.17932 -1.26286 -5.81216 -3.177150 -1.86669
## 2 -2.04183 -3.528650 -5.698820 -4.64500 -4.59376 -5.62974  2.897230 -1.27826
## 3 -4.13686  0.359589  2.721880  2.89826  2.82899  7.24960  0.378805 -6.48326
## 4  2.79463 -1.328860 -0.184743  1.14260  1.50184  4.79097 -4.914690  8.68172
## 5  1.95661 -2.774440  1.031620 -0.31542  5.01982 -1.95284 -2.344740  8.01955
## 6 -2.27209 -2.122670 -2.977810 -1.96589  3.56997  3.67382  7.778470 -1.85858
##        PC167     PC168     PC169     PC170      PC171     PC172    PC173
## 1 -0.0803253  2.848890 -2.162710  4.967950 -3.9800100 -4.861580 -1.59772
## 2 -3.7661300 -3.614570 -4.534350 -0.843756 -5.4733700  7.792640 -1.93847
## 3 -0.6931030 -1.764910 -1.017960 -3.551750 -2.6722700 -0.845494 -4.76455
## 4  3.4457900 -4.300140  5.789650 -0.429008  0.0647389 -7.637510 -1.81966
## 5 -0.7328930  0.499075 -0.141274  0.987371 14.8210000  5.370970  1.26637
## 6  4.7712000 -0.928789 -2.991940 -4.036390 -5.7398300 -5.186370 -3.25269
##      PC174     PC175     PC176     PC177     PC178     PC179     PC180
## 1  9.62203  3.794880  7.310290  1.446480  3.169850  2.729350 -5.314270
## 2 -2.25811 14.894800 -5.213880 -4.870530 -2.762820  4.817930  3.516820
## 3  5.99955  2.589690 -1.611290  7.348660 -0.456571  2.428020  4.821800
## 4 -7.29503 -9.533150  0.415911 -8.096380  2.315460 -2.030240 -4.813670
## 5 -4.08710 -4.051960  2.816590  0.522548  5.103060  0.550644 -0.870163
## 6 -4.15292 -0.687135 -0.908193 -0.298223  2.353910  2.230670  3.742080
##        PC181    PC182     PC183      PC184     PC185    PC186     PC187
## 1  3.4773000  4.92234 -0.658278  4.7902500  0.790756  5.81346 -1.153170
## 2  0.0253924 -6.73912 -0.494408  1.9940900 -4.615020  6.09937 -0.101715
## 3 -5.7484900 -2.87637  2.330840 -2.9863900  2.284260  5.20689 -2.454970
## 4 10.4513000  7.54592  3.610410  6.0622700  3.310300 -6.91275  1.656040
## 5 -1.4731600  1.22017 -6.339070 -7.6392100 -4.212780  1.59322  0.115176
## 6 -1.2248000 -2.79284 -2.296190  0.0286201 -3.590650  3.76164  1.137250
##         PC188     PC189    PC190     PC191     PC192      PC193    PC194
## 1   4.5866100  0.486935  3.98583   1.18599 -0.239188 -7.5906200 -1.54424
## 2 -10.3407000 -2.671190  4.26308  -3.43792  2.857570  1.0492400 -3.08940
## 3   0.1671920 -6.557360 -9.36124  10.18680  1.382550  0.0706454 -4.30834
## 4   4.4596000  1.430970  3.55802 -11.40820  0.961035 -0.1869930 10.67490
## 5  -7.9391000 -3.556150 -2.03019  -0.64403 -6.336980 12.7351000  2.46297
## 6   0.0759437  3.128770  2.74974  -8.75257 -3.094980 -6.5699700  4.72646
##       PC195     PC196    PC197     PC198     PC199       PC200     PC201
## 1  0.910948 -8.042340  5.23727  8.172020   1.74364   7.4632400  10.90750
## 2 -5.426750  0.130371  7.72826 -6.550310 -11.48980 -13.9436000  -8.64064
## 3  6.408280  4.563530 -4.61863 -6.578520   5.92747   0.0228253 -10.26530
## 4  1.673040  5.761970 -4.43693  0.321186   1.10595  -4.3009700   8.66860
## 5  1.472540 -5.483900 -6.46014  5.276940  -3.83060  -4.6839400   1.42336
## 6  3.555810  6.871120 -4.15241 -8.280110   1.35386   4.7200800  -9.43587
##        PC202     PC203    PC204     PC205     PC206     PC207     PC208
## 1 -2.2669100  3.741520 -6.90213  1.521190  0.548047   1.04816  7.438350
## 2  1.6462600  9.763130 -2.57959 -2.844000 -1.215910  -2.81616 -3.145650
## 3 -0.0215309 -6.618470  6.54731  9.394450 -3.523440 -10.66750  6.256250
## 4 -6.2581700 -0.318148 -5.49795 -6.739860  8.748370   5.30643 -0.278222
## 5 -1.1527600  4.195740  3.75332  8.709440  0.724445   1.80931 -4.247090
## 6 -1.9675900 -2.659990  2.69489 -0.196236 -1.345850  -1.33835 -3.777190
##        PC209     PC210     PC211    PC212     PC213     PC214     PC215
## 1   1.079830  0.804993   5.95026 -1.61042  3.547410 -0.497208  -2.09902
## 2  -3.828520 -3.319220  -3.68087  3.30502  0.411716  1.725200  -9.77083
## 3   3.878940  6.145040   6.15633  4.76610  2.528460  1.723470  11.92680
## 4  -3.133370 -0.159137  -1.99040  1.98730 -3.919200 -1.113520 -12.08330
## 5 -10.947600  2.338610 -11.36970  1.54996 -2.101040  2.795130   6.68741
## 6   0.730206  1.055790   4.48507 -3.09285 -3.694160  2.946190  -3.76150
##       PC216     PC217    PC218    PC219     PC220     PC221      PC222
## 1  -1.93799  2.137280  1.49136 -6.35979 -1.999650  6.460560 -10.585700
## 2  10.25870 -0.681566 -8.56728 -2.77335  0.664778  7.269180  -5.625950
## 3  -1.91891 -4.036690 -5.57953  4.47943 -3.644230 -1.642510   0.177161
## 4   0.40950 -2.822450  1.48543 -6.64756  3.153860 -0.323252   1.453790
## 5 -11.59720 11.862400  5.37617  6.33545 -6.907660 -4.249690  -3.328230
## 6  -1.94337 -4.715090 -1.19449  3.98731  7.462940 -8.265040   7.105690
##       PC223    PC224    PC225      PC226      PC227      PC228     PC229
## 1   6.26030  5.75470 -6.84284  11.715700 -5.6183400  0.2430480 -0.704013
## 2  17.91000 -7.41692 23.08070   0.976779 -5.1448300 -2.3510100 -0.273485
## 3 -14.50860  6.04174 -3.54294   1.507420  2.9864500 -0.8730820  4.071330
## 4  -2.47360 -5.11154 -2.27466  -2.801570 -2.3035100 -0.0670175 -9.115460
## 5  -1.17638  2.57996 -9.42025  -1.487250  6.2886600  3.8361400  0.896267
## 6  -3.81264 -3.02251 -3.97208 -11.323300  0.0969562  5.2164200 -1.921160
##      PC230     PC231     PC232     PC233     PC234     PC235    PC236     PC237
## 1 -6.11467 -2.251420  6.120510 -2.641430  1.507590  -4.00376 -6.11933 -0.933163
## 2 -4.48263  1.855160  5.160780  5.599340 -4.713290   6.16179 -2.42093 -2.878430
## 3  7.98041 -0.923137 -0.136223  0.705681 -6.179890 -10.17500 -1.47485  3.840300
## 4  0.81340 -1.179330  0.886945 -2.777520  4.520280  -1.01872  7.61043 13.094500
## 5  4.38866  3.506720 -8.013210 -2.897140 -1.386990   1.37174  6.54433  3.476010
## 6 -2.58047 -1.612530  6.915120 -4.707690 -0.449379   5.55076 10.31380  0.237316
##       PC238     PC239    PC240      PC241    PC242     PC243      PC244
## 1  21.10460 -15.26040  2.19886 -0.0960591 -3.26182 -18.25860  -4.793180
## 2 -14.65680  11.68660 -2.92870 -6.7929100 -3.74096  -3.30562   0.578432
## 3 -12.73490   1.53312  8.05155  1.6704700 -9.82794  -3.79779  -5.898550
## 4   6.76285   6.71659 -2.03321  7.6123200  9.25779   6.91597 -10.802400
## 5 -17.45570   5.53179  1.85606  0.4529880  3.55132  21.33230  -6.126410
## 6  -2.81911   6.30187 -3.38643 11.3769000 26.88920   5.01527   6.718160
##        PC245     PC246      PC247    PC248      PC249     PC250     PC251
## 1   9.825920  0.368498 -10.421100 -3.01939   4.934450  5.364350   3.21276
## 2   0.249513 -3.860660  -6.924750 -7.77055   6.010400  1.173970   9.52594
## 3  -1.940680 -1.843600   0.517222 11.53290  12.955700  8.033140  23.63580
## 4  13.227400 -5.330850  -1.493860  7.58503  -3.396850  8.558150  14.18900
## 5 -12.008000  2.958860   3.197090  7.69150   0.490028  5.073940   4.34951
## 6 -14.440200 -1.294670   0.980852  3.58781 -12.775300 -0.912472 -22.90500
##      PC252     PC253      PC254     PC255    PC256      PC257     PC258
## 1 18.37280   6.08839  -5.178480  -1.77009 -1.49813   0.215559 13.878100
## 2 -1.37248   1.89693 -18.117400   3.58664 -9.25055 -17.999100  9.275970
## 3 -5.39199 -10.17820  -9.378100 -11.21190  4.04999  -5.659700  7.364760
## 4 13.28860   0.77435  -0.941677 -10.06840  3.86081   4.607560  6.790750
## 5 -3.71363  -3.86821  -8.100050  -7.65747 -4.77171   6.838140  0.779761
## 6  5.60337  10.07590 -14.518000   1.70536 -8.48897   6.001850 -2.653630
##      PC259    PC260    PC261    PC262     PC263    PC264     PC265     PC266
## 1 1.366520 -1.67545 12.12620 -2.73072  7.122350  5.61174 -0.369992 -3.337640
## 2 3.253030 -0.52109 11.01710  1.41546 -1.347250 -4.68588 -5.470220 -0.847408
## 3 0.240385  7.00778  3.52365 -1.66673 -1.377640 -7.34325 -6.123390 -7.495310
## 4 2.596990 -1.69915 11.56710  5.40263  8.225790  5.23127  2.497470 -1.212910
## 5 1.670470 -2.51541  3.88925  2.05927  0.447871 -2.91688  2.134180  2.218940
## 6 1.779740 -2.98839 -6.96636  1.62252 -5.128720  2.32212  4.330380  6.185210
##       PC267     PC268      PC269     PC270    PC271     PC272    PC273    PC274
## 1  0.613616   5.68206  2.3816700  1.196100 -8.22209 -4.639850  1.34193  7.72916
## 2 -5.298480   5.02221  0.3060520  0.410279 -1.12968 -3.351980 -1.24930 -1.67022
## 3  0.169219   6.72680  1.2057500 -3.196180  2.88484 -6.311270 -1.33891 -3.01090
## 4  1.645960   2.84261 -0.0118685 -0.859617 -3.34874  0.263232  2.95285  2.27743
## 5  0.729482   3.56651  0.1658270 -2.992770  1.79006  0.343371 -3.37505 -3.23617
## 6  2.918850 -10.56080 -5.3668600 -3.750360  7.57666  6.797100 -1.14310 -7.39161
##       PC275     PC276      PC277       PC278     PC279     PC280     PC281
## 1 -1.567000 -0.541626 -2.3688300  2.31419000  4.288540 -0.221416  2.291920
## 2  0.699928 -2.999690 -0.5837480 -2.83002000  1.545390  1.294580  1.069000
## 3  0.200851 -0.670598 -0.0464095  0.00465983  1.196600 -0.930999  3.447720
## 4 -2.834220  0.707821  3.9734100  3.71178000  2.108120 -0.984043  0.493455
## 5  1.411270  3.746540  0.4366800  1.48215000 -0.988513 -2.537130  1.234670
## 6 -3.469010  5.891030  3.8895800 -2.91172000  2.393080 -0.382463 -2.931400
##       PC282     PC283     PC284    PC285     PC286     PC287     PC288
## 1  1.003570 -1.175530 -1.824410  5.13454  2.423280 -3.702960 -1.284480
## 2  2.713420  0.303270 -0.383434 -1.57956  1.351580 -0.952356 -0.811361
## 3  3.383290 -1.118160 -2.668420 -1.04655  0.701324  0.058565 -0.758462
## 4  1.375720 -1.235870 -1.625840  5.27569  0.522691 -4.968060  0.197759
## 5  2.055850 -0.958846 -1.070320 -1.32984 -0.462819 -1.993750  1.911980
## 6 -0.886907 -0.966520  4.639050 -5.32699  2.417190  1.671340  2.014800
##       PC289     PC290     PC291     PC292     PC293      PC294     PC295
## 1  1.836550 -1.957090  0.530749  0.508176  0.636557 -1.4038200 -0.248397
## 2 -0.594592 -1.720770 -0.268210  0.288945  2.037530 -2.3017200  2.315090
## 3 -0.708405  2.430020 -2.235310 -0.810539 -0.489244 -0.5221430  1.293830
## 4  1.670870 -1.281180  1.018950 -2.859150 -1.072850 -0.0479585 -3.677910
## 5  0.898994 -0.508513  1.531620 -1.622450 -0.550698  2.2110100 -1.993760
## 6  2.775520 -0.158644  1.099160 -1.171090 -3.326430 -0.5605280  2.982730
##        PC296     PC297      PC298     PC299     PC300     PC301      PC302
## 1 -0.0467915 -0.654160 -2.0625000  0.240649 -0.588341  0.295810 -0.0316656
## 2  1.8139700  0.779074  1.5598200 -1.556570 -0.529166 -0.825035 -0.3991970
## 3  0.2042640 -1.983090  0.8547860  0.188940  0.952455 -1.293090  0.4155780
## 4 -1.0536000  1.223620 -0.1958010 -0.205820 -1.186100  0.895926  0.9182580
## 5 -0.5999970  0.708073 -0.0893639  0.430328  0.908237  0.879889 -1.3128300
## 6  0.5782380 -1.207890  0.8823800 -0.102105  1.834670  1.117060 -0.7175880
##        PC303      PC304     PC305     PC306     PC307     PC308      PC309
## 1 -0.1808410 -0.0681759  0.857804  0.903926  0.147160 -0.205308 -0.6859770
## 2  2.6869200  0.8234650 -0.634279 -0.204301  0.413023  0.684150 -0.7299410
## 3 -1.5163100 -0.6338990 -0.109502 -0.826637 -0.450889  0.641502  0.6849520
## 4 -0.0282513 -0.9098190  1.934460  0.766758 -1.479430 -0.584326 -0.7959710
## 5  0.7944070 -0.7262600 -0.943115 -1.985560  0.300072 -1.724090  0.0472941
## 6  0.7691120  2.1418600 -0.130876 -1.041520  0.215288 -1.160540  0.4151860
##       PC310     PC311     PC312     PC313     PC314      PC315      PC316
## 1 -0.571153 -1.214920 -1.229820 -0.739570 -0.612735 -0.3053200  0.2118910
## 2 -0.394852  1.681880 -1.950200  0.699761 -0.967610  1.7095700 -0.0358667
## 3 -0.625501 -0.696387 -2.302250  0.224089  0.215718 -0.5012200 -1.0750800
## 4  0.374613 -2.571250 -0.825077 -0.727066 -0.606653 -1.7053100 -2.0381000
## 5 -0.173505 -0.443516 -0.093964 -0.639347  0.511583  0.4607520 -0.3114920
## 6  0.134549  0.454952 -0.129294  0.774373 -1.011360 -0.0309106 -0.1467060
##        PC317       PC318     PC319      PC320     PC321     PC322      PC323
## 1 -1.2652000 -0.00506833  1.342590  0.8060470 -0.132721 -1.334280 -0.5696410
## 2 -0.4431830  0.71761600  0.373968 -0.3651000  1.493980  1.019030  0.1125780
## 3  0.9750040 -0.14353200 -0.372653 -0.3065110  1.438280 -0.585709 -0.0957389
## 4 -0.4231130  0.23121100  0.394054  0.4501970  1.270710 -0.501647 -0.2829260
## 5 -0.0134821 -0.05963560  0.595172 -0.6823740  0.412990  0.442514  0.1327450
## 6 -0.6708910  0.11538300 -1.251810 -0.0822363 -1.569170 -0.440656  1.0226600
##       PC324      PC325      PC326      PC327      PC328      PC329      PC330
## 1  0.438355  0.0776800  0.0495142 -0.0506694 -0.0149687 -0.0430816  0.1677090
## 2  0.282173  0.2536410  0.4902050 -0.3234080 -0.1826820 -0.3846510 -0.1355330
## 3  0.524582  0.1188800  0.0529997  0.2741200 -0.3504540 -0.1118490 -0.2768630
## 4 -0.380963  0.4693500 -0.0589738  0.5795380  0.3944940 -0.0561675  0.2645650
## 5  0.618017 -0.2050030 -0.2749630  0.0557564 -0.1506440 -0.1818220  0.3267600
## 6 -0.152357 -0.0620448  0.1298630 -0.3807810 -0.1834630  0.1412770  0.0468199
##         PC331 Individual  Pop_City         Location Latitude Longitude
## 1 1.57436e-06        282 Barcelona Barcelona, Spain  41.3851    2.1734
## 2 1.57436e-06        283 Barcelona Barcelona, Spain  41.3851    2.1734
## 3 1.57436e-06        280 Barcelona Barcelona, Spain  41.3851    2.1734
## 4 1.57436e-06        279 Barcelona Barcelona, Spain  41.3851    2.1734
## 5 1.57436e-06        286 Barcelona Barcelona, Spain  41.3851    2.1734
## 6 1.57436e-06        287 Barcelona Barcelona, Spain  41.3851    2.1734
##   Continent Year          Region   Subregion order order2 orderold
## 1    Europe 2018 Southern Europe West Europe    16      8        8
## 2    Europe 2018 Southern Europe West Europe    16      8        8
## 3    Europe 2018 Southern Europe West Europe    16      8        8
## 4    Europe 2018 Southern Europe West Europe    16      8        8
## 5    Europe 2018 Southern Europe West Europe    16      8        8
## 6    Europe 2018 Southern Europe West Europe    16      8        8

6.5.5 Create PCA plots for Iberian peninsual + US + native range

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_americas_pops_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

PC1 & PC3

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_americas_pops_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

6.6 PCA for Iberian Peninsula + Turkey SNP Set 3 (MAF 1%, R2<0.01)

6.6.1 Import the data for SNP Set 3 subset for native_iberia_turkey

genotype <- here(
   "euro_global/output/neuroadmixture/native_turkey_iberia2.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22537
##   column count: 301
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22537
##   Character matrix gt cols: 301
##   skip: 0
##   nrows: 22537
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## BAR BEN CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN LAM MAT 
##  12  12  12  12   2  12   4   7  12  11   4   2   6  12  11   6   4   4   9  12 
## OKI POL POP QNC SON SPM SPS SSK SUF SUU TAI TUA TUH UTS YUN 
##  12   2  12  11   3   5   8  12   6   6   7   9  12  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   292
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   292
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.removed file, for more informations.
## 
## 
##  - number of detected individuals:   292
##  - number of detected loci:      22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.lfmm"

PCA for MAF 1% r2<0.01 snp set of native_turkey_iberia2

nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          292
##         -L (number of loci)                 22537
##         -K (number of principal components) 292
##         -x (genotype file)                  /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.lfmm
##         -a (eigenvalue file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.pca/native_turkey_iberia2.eigenvalues
##         -e (eigenvector file)               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.pca/native_turkey_iberia2.eigenvectors
##         -d (standard deviation file)        /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.pca/native_turkey_iberia2.sdev
##         -p (projection file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.pca/native_turkey_iberia2.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/ 
## pca result directory:            native_turkey_iberia2.pca/ 
## input file:                      native_turkey_iberia2.lfmm 
## eigenvalue file:                 native_turkey_iberia2.eigenvalues 
## eigenvector file:                native_turkey_iberia2.eigenvectors 
## standard deviation file:         native_turkey_iberia2.sdev 
## projection file:                 native_turkey_iberia2.projections 
## pcaProject file:                   native_turkey_iberia2.pcaProject 
## number of individuals:           292 
## number of loci:                  22537 
## number of principal components:  292 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          292
##         -i (input file)                     /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.pca/native_turkey_iberia2.eigenvalues
##         -o (output file)                    /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.pca/native_turkey_iberia2.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)

Get values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_turkey.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_turkey.rds"))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_turkey.rds"))
head(sampling_loc)
##    Pop_City           Location Latitude Longitude Continent Abbreviation Year
## 1  Penafiel Penafiel, Portugal 41.18555 -8.329371    Europe          POP 2017
## 2     Loule    Loule, Portugal 37.09084 -8.092465    Europe          POL 2017
## 3 San Roque   San Roque, Spain 36.17042 -5.371530    Europe          SPS 2017
## 4 Barcelona   Barcelona, Spain 41.38510  2.173400    Europe          BAR 2018
## 5   Magaluf     Magaluf, Spain 39.50679  2.530729    Europe          SPM 2017
## 6    Aliaga     Aliaga, Turkey 38.76390 26.944800    Europe          TUA 2019
##            Region   Subregion order order2 orderold
## 1 Southern Europe West Europe    11      3        3
## 2 Southern Europe West Europe    12      4        4
## 3 Southern Europe West Europe    14      6        6
## 4 Southern Europe West Europe    16      8        8
## 5 Southern Europe West Europe    17      9        9
## 6  Eastern Europe East Europe    40     32       32

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 35 Levels: BAR BEN CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 35

Check the regions

unique(sampling_loc$Region)
## [1] "Southern Europe" "Eastern Europe"  "East Asia"       "South Asia"     
## [5] "Southeast Asia"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population     PC1          PC2      PC3     PC4     PC5      PC6      PC7
## 1        BAR 32.1490 -3.57024e-05 -25.5612 7.45879 28.0198 -23.5221 -7.14769
## 2        BAR 29.7010  8.26655e-01 -29.7475 6.83475 32.8377 -22.6346 -6.74459
## 3        BAR 31.8548  1.57682e+00 -24.0926 6.63071 28.6902 -10.3283 -5.37942
## 4        BAR 30.6260 -1.78255e+00 -21.3461 7.65705 24.6081 -14.7701 -5.28804
## 5        BAR 32.7104  3.41229e-01 -27.6710 8.00618 31.9859 -18.7968 -5.40058
## 6        BAR 31.2687 -2.77012e-01 -24.7553 6.53760 27.1167 -18.1478 -5.27905
##         PC8     PC9      PC10      PC11    PC12    PC13      PC14      PC15
## 1 -2.833060 2.94707  -6.67679  0.580087 6.75390 5.13864  0.642729  -7.98961
## 2 -5.341140 1.51015 -10.79810  1.184940 9.68668 2.47137 -3.023580 -10.83150
## 3 -0.786157 3.20566  -5.48401  4.546320 3.12622 2.25593 -2.696430  -4.70747
## 4  0.267839 1.94401  -7.51096 -0.489323 7.30012 1.47254 -0.811141  -7.49421
## 5 -5.396440 3.35203  -7.14974  3.565780 4.36574 5.12720 -6.290740  -6.65929
## 6 -4.958520 3.99904  -6.52298  0.101405 4.99329 3.55949 -6.392520  -7.75628
##       PC16      PC17     PC18      PC19       PC20      PC21      PC22
## 1 11.70470 -10.20200 -4.88538  7.837980 -9.9147700 -0.479633  -6.28504
## 2  6.93279  -7.65994 -1.66107 -1.347090 -5.1788200 -0.305122 -14.25280
## 3 23.43990 -19.31760 -3.02697 -0.418472  2.3626600  0.958394   2.51428
## 4 14.93080  -8.47872 -3.09886  8.076280 -7.0295800  1.290610  -2.30400
## 5  9.19702 -11.17340 -3.94096 -2.087780 -0.0487011  2.746560   8.28849
## 6 -6.87185   3.23277  5.34675 -8.543800  1.2756200 -0.728523  -1.83813
##        PC23      PC24      PC25     PC26      PC27     PC28     PC29      PC30
## 1   3.20330  2.141370  0.268091  3.14516  -1.39835 -2.59199 0.165947  0.138815
## 2   9.57207  2.445640  1.454880  4.90913 -11.01050 -1.14730 1.468560 16.808900
## 3  -1.93554  0.278962 -1.822940  2.83876   4.27482  1.89196 2.125540 -7.931290
## 4  -0.90770 -1.265050  1.081640 -4.95508   6.84403 -3.57526 3.163940 -3.457080
## 5 -12.78510 -0.956789 -2.225350 -5.53712   2.69595 -3.06201 2.974850 -0.710658
## 6   1.63721  3.185860  3.173470  8.87940  -3.41808  4.38339 0.370649  1.754760
##        PC31      PC32      PC33      PC34      PC35       PC36       PC37
## 1  -2.49102 -15.19890  5.559990   7.70411   6.96136  -0.454729  2.3142500
## 2  -1.50218 -13.66460  8.896240  22.43480  12.56900  -1.421720  0.0656101
## 3  -8.84277  12.42150  0.588983 -25.72340 -12.06360  -5.559240  5.0160800
## 4   4.34493   1.24909 -1.646820 -14.65900  -1.61074   5.818420 18.9872000
## 5 -12.27880   9.48178 -6.799240  -2.05809  -3.65177  -2.650200  4.1611900
## 6  -4.45135   6.57529  5.896930  -4.39076  -1.92407 -20.792100 -8.3009400
##         PC38     PC39     PC40      PC41      PC42      PC43      PC44
## 1 -6.5797300 -6.63206 -5.77841 -2.764440   4.63476  9.976810  -3.63684
## 2  0.0569845 -9.14024 11.38230 -4.742730   4.03482  0.127265  11.23750
## 3  6.1940000  9.08458 -1.00197  1.382480  -9.60538 -3.977130 -12.59740
## 4 11.5738000 -8.43252  2.79942 13.646200 -11.91980  1.882400   3.78714
## 5 -5.2823200  9.87206 -4.78070 -0.333928  -1.30264 -1.728110 -18.69690
## 6  2.2247800  6.09109 10.58240 -8.098160   3.31380 -6.138280  -8.47033
##        PC45     PC46     PC47       PC48      PC49      PC50      PC51
## 1   1.24104 -1.79502  2.02788   0.202494  2.052020  0.583413 -17.27530
## 2  -2.77518 -4.82345  2.02721   6.254440  0.711571  1.389390  11.27720
## 3   3.60260  2.41759  2.71815  -6.492660 -1.199350  0.137072   3.73542
## 4  28.90900  3.53925 12.07280  12.783400  1.958420 -2.262660  15.48320
## 5  -3.16160  1.49227 -1.70442 -12.495000 -4.212900  6.230010 -17.26090
## 6 -10.56160  6.20400 -5.53827  -2.131910 -3.502590  4.657760  15.36510
##        PC52      PC53       PC54      PC55      PC56     PC57     PC58
## 1  4.183290 -10.11660  -1.852700 -5.927280 -4.842320  1.14485 -6.60310
## 2 -0.434797   0.56894  -2.396640  3.629040  1.150830 -7.23607  2.01905
## 3  3.762090   8.53289  -5.933400  2.753020 -2.475180  4.68986 -4.34825
## 4  7.964550  -2.18586 -13.702500 -0.908266  4.209110 -1.89241  7.83188
## 5 -6.598620   9.26582  -0.395142  4.532910 -0.782116 16.06220 -8.84062
## 6  1.858520  18.80760   4.187990  9.145780 -8.813420 11.45840  2.93130
##        PC59      PC60     PC61      PC62      PC63       PC64       PC65
## 1  1.428660 -0.224658  4.50809 -12.89220  6.966180 -3.4251400 -2.5965000
## 2  5.127400  7.047190  6.23085  10.22110  4.210590 -1.8488900  3.4306000
## 3 -3.699910  6.228740 -0.79353  -2.49357 -5.317330  0.0670438 -7.9249600
## 4 -0.946048 -8.536180 -1.21879  -9.62624 -8.943100 -5.6570100 -2.9408100
## 5 -3.218550 11.911700 -1.49187   2.03924  5.121750  4.2073500 -0.9615950
## 6 -8.914410 21.347100  1.64316   6.79338 -0.613567  0.7114540 -0.0850202
##        PC66       PC67       PC68      PC69     PC70      PC71      PC72
## 1 -1.394440  2.5350300  -0.984569  -2.71401  3.76750 -2.391040  0.528144
## 2 -0.249666 -2.2332100  -0.951251   8.16241  7.01649  8.667030 -1.819340
## 3  1.730570 -1.2261300 -14.775400 -11.76170 -4.56802  5.436410 -2.579790
## 4 -4.234570  0.0494313   3.120380 -15.91000  1.10756 -0.720438 -5.229170
## 5 -5.575250 -6.3473100 -12.894600   3.96418  7.22063 10.553000  6.878650
## 6 -2.059380  6.0829700 -17.714300  -3.29783  5.48122  6.393830  7.033050
##         PC73      PC74      PC75     PC76     PC77      PC78      PC79
## 1   9.170830  1.132140   5.41162 -1.01724  5.23653 -5.587490 -2.728420
## 2 -10.695100 -4.196060  -1.74054  1.03567 -5.94560 -2.838010 -6.004180
## 3   4.919590 -2.675190 -10.59510 -4.50283  1.95748 10.682800  7.429000
## 4   7.550350 -4.879980  -4.83926 -6.04896  4.72036 -1.833050 -1.261300
## 5  -0.259543 -1.715190  -3.29362 -4.68912 -3.67337  0.723783  0.798548
## 6  -3.543140  0.728822  -9.60782 -1.91837  5.43289 11.810700  3.962350
##        PC80     PC81      PC82       PC83       PC84      PC85     PC86
## 1   7.51738  3.02058  2.110330  11.872000   0.177072  5.318140 -1.21857
## 2  -4.00588 -4.13504  2.248930 -12.196000  -7.105820  1.513840 -1.19066
## 3 -11.21860 -7.06428 -6.747470   3.836470 -11.728100 -0.962982 -1.17061
## 4   3.05410  3.08371 -6.788600   4.166460   2.937640 -4.680980  2.79978
## 5  -7.89223 -6.13900 -0.678201  -2.656480  -8.182620  1.449200 -2.73650
## 6  -9.60430 -3.84158  0.466986   0.752945  -3.672560  1.632090 -2.20957
##         PC87      PC88    PC89     PC90      PC91      PC92     PC93     PC94
## 1  0.0157472 -4.909870 3.99136 -2.85633  9.271030 -0.923105 -6.57579 -1.49670
## 2  3.5776300 -0.999569 2.30794  1.76383 -1.017980  0.807255  6.24050 -9.88014
## 3 -5.6129500 -3.190530 1.75275 -2.39217 -1.213020 -6.918610 -4.30439  9.63454
## 4 -6.8574200 -4.303170 2.55183  1.03767 -9.147880 -9.851680 -9.94547  5.88331
## 5  2.8125600 -5.057380 1.35060 -6.82979 -0.999461 -2.151130  3.71211  3.76508
## 6 10.2339000 -5.916110 8.74460  8.40468 -0.140939 -2.441770  6.07298  1.14909
##        PC95      PC96     PC97      PC98      PC99    PC100     PC101    PC102
## 1 -1.705970 -4.774080  3.89459 10.406200 -8.220620  2.35138 -5.480480  5.26092
## 2 -6.884480 -1.700440 -2.71008  2.630500  9.395200 -2.98169  6.741940 -1.67474
## 3 -2.656010 -0.190924 -1.81154  0.625412 -0.178168  3.36716 -4.078920  2.52581
## 4  2.205440  6.076500  3.00509 -5.880210  1.282360 -3.78624  0.641194 -3.76860
## 5 -5.484760 -5.676200 -3.67940 11.792000 -0.227397 -2.07177 -3.399660  5.28666
## 6 -0.873207  3.135320  2.43684 12.161800 -6.445410 -2.11264  1.008960  3.29930
##       PC103     PC104    PC105     PC106    PC107      PC108    PC109    PC110
## 1 -3.023300  3.813990  7.93639  0.902842  7.41100 -0.2004300  3.67039 -4.85523
## 2  8.847310 -5.621070 -2.89782  6.028220 -8.71039 -4.3522200 -4.48840 -2.70624
## 3 -5.123530  5.568530 -1.57448 -7.204430  1.60002 -3.8318300  5.63741  2.17192
## 4  0.892564  3.807400 -8.26336 -3.123740  5.00888  4.2662400  5.76049 -3.50233
## 5 -0.746933  3.118180 -3.21939 -5.926790 -3.34355  2.6331700 -3.60086 -1.42533
## 6  3.570980  0.436257  5.13895 -5.720270  1.40391  0.0467343 -4.64397  2.15655
##       PC111     PC112     PC113     PC114     PC115     PC116     PC117
## 1  2.522410 -11.42840 -2.584290 -2.659490 -5.204680 -8.146140 -11.65680
## 2 -2.035720   7.92575 -2.149800 -4.723350  1.163330  0.377635   6.62557
## 3 -3.940020   8.59267  3.408660  0.321313 -2.424040  6.045010   1.13054
## 4  1.557970   1.61051 -0.993359  1.721980  3.255830  3.871180  -6.47478
## 5 -0.166056   1.25854  2.024940 -4.074460  1.265390  2.840870   4.38115
## 6  5.345260   7.13038  6.443210 -2.166000 -0.616138  0.575602  -5.05057
##       PC118      PC119     PC120     PC121    PC122     PC123     PC124
## 1 -2.444820 -0.0876937 -2.534290  3.282190  3.54598 -3.033000  0.446108
## 2  4.743430 -1.6208700 -0.420141 -3.080520 -2.12107 -2.867780 -1.178180
## 3  0.293379  9.8130200  5.764840  4.349400 -2.43091  1.888440  3.065260
## 4 -4.449650  1.9980900 -3.695730  1.491300 -1.01010 -0.254435  9.304650
## 5  1.693280  2.4008400  4.028670  0.599541 -2.61832 -2.957640 -1.233740
## 6  4.365700  3.6950100 -2.044930  2.396000  1.55483  2.945390 -3.625050
##      PC125     PC126     PC127     PC128    PC129     PC130     PC131    PC132
## 1 -3.34879 -0.838937  3.108210 -2.070410 -4.60976 -2.649300 -0.799363 -5.62859
## 2 -2.28663  6.339860 -4.635130  0.670587  1.13643  3.782980  0.656040  5.42355
## 3 -1.32975 -0.110951 -5.971700 -1.143310 -3.48084 -0.134847  1.817120 -6.33430
## 4  7.30425  9.904460 -0.946319  1.385330  6.91505 -3.828870 -1.561360  3.72315
## 5 -5.81024 -3.087670 -3.632620  1.676840  4.64251 -4.749220  0.369647 -1.95511
## 6  6.59379  2.627040  8.158240  2.199520  1.16396 -0.636840 -4.195020  3.73059
##      PC133    PC134     PC135     PC136      PC137     PC138     PC139
## 1 -3.98745 -5.58850 -2.916700 -4.850870 -5.0250300  2.713950  1.410660
## 2  1.14994 -1.35394 -3.837110  1.299120  1.4566400 -4.204280 -3.930120
## 3 -5.72722  2.47950  2.389970 -2.076530  2.6929300 -1.373520 -0.113003
## 4  6.01717  2.30139  4.156230  0.535404  1.2864400 -5.960520 -0.102168
## 5  6.94073 -1.74643 -3.264780 -7.717880 -4.0906700  0.633978  0.627256
## 6  2.99438 -5.72905 -0.031382  3.845800 -0.0921848 -7.688700  0.749744
##       PC140     PC141     PC142     PC143    PC144     PC145     PC146
## 1  8.986360  2.970330  5.738310 -0.396681 -3.24753  3.486680  1.639590
## 2 -4.971700 -8.798420 -1.925670 -0.792272 -1.48837 -4.585590 -2.997830
## 3 -0.237661  0.160804 -0.968494  2.420390  4.50809  3.460580  1.402210
## 4 -6.319200 -0.945139  5.790410  3.402740  5.93614  1.375240  0.144302
## 5  4.835890  2.058880 -5.702570 -3.560350  3.44048  2.463880  3.648460
## 6  2.766190 -0.248406 -1.618830  0.625417 -2.02644  0.687294 -1.077740
##       PC147    PC148     PC149     PC150     PC151    PC152     PC153     PC154
## 1  9.166460  9.42286  3.453520  6.036000  2.290240 -6.30507  8.949280 -2.819570
## 2  2.255950  2.42000  2.299810 -0.969596 -2.713220  6.24072  3.331990  8.921950
## 3 -5.661140 -8.68346  3.446820  5.278620 -0.748538 -3.56643 -5.487790 -4.618480
## 4 -4.091830  3.99384 -3.132580  1.630490 13.329000  5.85011  8.638840 -0.333806
## 5  1.918840 -0.40300 -3.815660  5.580700 -2.167700  3.27036 -1.819410  3.136120
## 6 -0.318047  5.18932  0.714816  5.114930  4.762080 -6.80116 -0.916525  8.191540
##       PC155      PC156     PC157      PC158     PC159     PC160     PC161
## 1  4.331610 -12.618600 -7.389210 -6.0977800  0.493374  2.483150 -0.374770
## 2 -1.324990   4.929360 -0.236756  7.6325300  0.111205 -4.727440 -4.529530
## 3 -0.827121   2.600120  2.125740  2.8611100 -3.454370 -4.182660  0.476025
## 4  0.921155   0.225701 -3.727740  0.3532320  7.329480  6.054260  8.548000
## 5 -3.402070  -2.504550  2.723930  0.0601978 -1.111300  0.767375  0.506037
## 6 -1.525330  -9.115360  0.386766 -4.8976300  0.153708  5.832630  3.308670
##       PC162    PC163    PC164     PC165     PC166      PC167    PC168    PC169
## 1  3.371260  3.05525 -1.77405  4.508040 -8.736710   9.760500 -5.64812 -1.89308
## 2 -1.112350 -1.01702 -2.92876 -1.105570  0.330393  -2.064830  5.83701  2.77659
## 3  5.770140  3.95876  3.66836  0.675254  5.562040   5.450400 -7.89879 -6.41106
## 4 -3.113610 -5.07163  5.18427 -4.616600 -3.064280 -12.380500 -2.63779  3.02937
## 5 -0.368924  2.05014 -3.53167  1.612200 -4.567550   4.571290  0.67456  9.37038
## 6 -8.006940  4.13406 -5.91255  3.384550 -3.937380  -0.270813  4.85521 -1.02440
##       PC170    PC171    PC172    PC173     PC174      PC175     PC176
## 1  4.724860  2.80760  3.66319 -1.81166 -1.667590 -9.1643200  9.210010
## 2  0.671167  5.20303  1.64979 -3.81205 -0.545418 -0.1864850 10.373900
## 3 -1.125440 -2.49469  0.19660  6.17104  3.560690 -0.0123705  4.581950
## 4  4.574220  1.55755  1.77774 -1.86139 -3.834350 -4.9196000 -5.406110
## 5 -1.344380 -4.37764  1.53241  7.60831  2.868630  4.5294100 -3.267020
## 6 -1.653570  1.81293 -5.38385  2.12732 -4.363780 -1.8879800 -0.821741
##        PC177     PC178   PC179      PC180      PC181     PC182    PC183
## 1  -0.808282 -2.600050 3.49627 -1.4633200 -0.0162058 -5.668270 -5.90813
## 2 -10.472800  7.652740 4.45539  3.7145200 -8.8267600 10.582600  2.09555
## 3   5.429350  0.900173 5.78224  5.9533500 -0.1632180 -5.070990  3.13627
## 4  -1.039870  7.838280 0.95028 -6.7603700 -2.4469700  0.158066 14.98770
## 5   2.239460  0.722735 1.32263  3.8853400  1.9179400  3.342270  5.12427
## 6   2.526650  0.142561 2.56224  0.0895991 -1.9851200 -5.257390  1.23091
##       PC184    PC185     PC186     PC187     PC188      PC189       PC190
## 1 -1.088520 -4.75782 -6.192400 -7.359260 -9.826790  -0.388289   0.8833800
## 2  0.530547  2.07018 -0.294755  0.491013 -0.654046   7.179830  -1.3920400
## 3  2.324290 -2.10162 -4.341250 -2.911020 -2.507120 -11.000900  -0.0583295
## 4  0.637214  8.80171  0.781150 10.688300  1.175290   2.059360 -12.2454000
## 5 -6.039930  2.77807  0.322562  6.442530  3.073060  -0.794452  -3.7219700
## 6  1.404400 -2.80189  1.506750 -2.786420 -6.497590  -7.722230  -2.0715100
##       PC191     PC192     PC193    PC194     PC195      PC196    PC197    PC198
## 1 -2.762770 -2.959770  1.660910 -2.79958  11.27050 -10.614700  1.33377 16.66170
## 2 -3.587680  0.683926  2.611860 -4.48210 -10.00910  -0.122655 -1.49524 -6.43537
## 3 -1.382200 -3.330270  2.211450 -2.12218  -3.24565   0.817553 -1.02774 13.17060
## 4 -1.659820 13.497100 -4.905160 -6.08711   5.08899  10.839900  3.52518 -4.48595
## 5  1.081110 -9.507780  2.682270  1.91901   6.77421   1.864110 -6.04193 -8.03805
## 6 -0.822098 13.106200 -0.921539 -8.00720   6.55654   5.630560  5.44407 -1.57349
##      PC199      PC200     PC201     PC202    PC203     PC204     PC205    PC206
## 1  3.07933  4.9476800  5.079740   2.75030 -2.62805 -3.841330   2.16287  1.99476
## 2 -1.59744 11.0072000 -4.562800 -12.24880 -3.11288  4.940580 -14.96280  6.11133
## 3 -5.65437  0.0833825  0.930584   4.12751 -1.13538 -0.860866   1.83321  1.75967
## 4  9.57010 -0.8194510 -2.497450  -2.22290  5.76984  0.033245  -1.44901 10.06420
## 5 -4.87455  0.8831040 -2.155150  -7.54596 -4.93448 -1.012900  -6.68173  4.00790
## 6  7.03820  0.1863420 -1.690480   6.09464  5.80551 -2.320740   5.77096  6.02818
##        PC207      PC208    PC209     PC210     PC211      PC212    PC213
## 1  -4.346700  -3.289450  6.25447   9.59359  11.39850 -10.664800  5.34098
## 2 -17.936600  -5.766270 17.09600  -8.23135 -12.39320 -14.725900  6.39669
## 3   5.970320  16.712200 -4.46878 -11.89830 -18.75880   0.183248 -3.94048
## 4   0.984328 -10.354400  2.88181   4.32485   9.14880  12.801800  1.43463
## 5  -1.279120 -21.589400  1.26366   8.92264   8.59670  -3.124050 -5.09427
## 6   2.254300   0.491288  8.94043   6.55083   8.92357  13.538100 10.99700
##       PC214     PC215      PC216      PC217    PC218    PC219     PC220
## 1   2.03335  -4.89820  17.156000   0.569129 -8.54037 -4.27732  -7.58659
## 2   8.98167   1.12067   8.740180  -6.737930  9.81672 11.03830  11.39990
## 3 -13.35700 -10.16990  10.742800   7.821910  2.88008  8.07801  10.57460
## 4   3.72525  -2.91145   0.975213  -0.411577 -4.31412  6.24749   9.76544
## 5   8.99626   1.33743 -15.309300  -6.723800  2.39594 -5.56380  -7.92364
## 6   3.75290   8.41336   0.335052 -13.382600 -1.72089 -1.42374 -11.05330
##          PC221     PC222    PC223      PC224    PC225    PC226      PC227
## 1  -9.71545000 -15.97020  1.57895 -10.059800 -5.54120 -4.42842  -7.280220
## 2 -10.26930000   4.02669 -6.23260   3.703880 -1.71717  2.41911  -0.722488
## 3  -5.67233000   4.59724 -7.69099  -0.731153 -6.17550  8.91809 -12.116000
## 4   0.84976400   6.04488  4.11756   5.144400 -0.51155  1.21722   1.729120
## 5  -0.00341819  -2.43077  5.94141   6.384400  2.35078  9.68112 -15.516400
## 6   2.63042000   2.41889  6.96757  -7.781280  3.84133 -6.94825  15.439900
##      PC228       PC229     PC230    PC231    PC232     PC233    PC234     PC235
## 1  1.56653   1.9128200  6.663760  1.80641  3.21324 -7.058290 13.82720  1.490730
## 2  5.01804   1.4214500  4.968850  0.39041  4.73389 -0.209115  5.45948 -8.627970
## 3  7.24334   8.8614200  5.350230 -4.39558 10.22460 -9.296230  3.28080 -4.472500
## 4  6.97765  -0.9838880  6.531200  2.71362  2.40154  2.979480 -5.31963  0.778473
## 5 11.28150   0.0489831 -0.486633 -4.51210 -7.80903 12.007100  5.41963  3.159060
## 6 -7.67813 -11.2558000 -5.792450 -1.12982 -3.95942 -1.022840 -6.12596 -2.303430
##      PC236      PC237    PC238     PC239      PC240      PC241    PC242
## 1 -2.45632  5.9709600 -1.92098  -1.27551   7.802630   2.615760 -1.23867
## 2  0.55105 -0.0382294  5.18558   4.81965   2.566010   0.935648  3.58327
## 3  4.30844  3.5837200  6.88010  10.31570  -0.215351 -10.027800  8.68316
## 4 -2.55565 -0.7882580 -2.53036 -12.35090  -1.156690  12.280900 -4.18891
## 5 -6.91222  3.5995900  3.16979  -3.76331 -15.541100  14.235900 -1.60570
## 6  2.47666 -8.3607100 -4.51232   3.00459  15.746700 -13.219700 -2.08820
##      PC243    PC244     PC245      PC246     PC247     PC248    PC249     PC250
## 1 -2.74303  8.72729  3.841800 -13.477500 -6.394540  0.939012 -3.35675  0.301498
## 2 -1.18148  4.51859  5.689910  -0.543535  2.295940  4.706180  6.13716 -0.511929
## 3 -8.33822  5.63916  0.444548   2.968990  3.570350  4.390560  2.62112 -0.794626
## 4  4.97187  4.88890  4.143980  -8.726400 -1.017430 -4.743460  5.66040  2.951600
## 5 13.23640 -4.54562 -2.767880  12.071600 -0.395225 -2.381310 -7.41396 -1.400670
## 6 -6.61671 -1.43764  1.184490   0.489956 -0.692523 -2.291920  6.60338  1.607200
##       PC251      PC252      PC253     PC254     PC255    PC256     PC257
## 1 -0.419299 -0.0641331 10.0366000 -3.687180  0.986868  2.00737  0.846952
## 2  2.355360  4.1964400  0.1148790 -2.663940 -3.998730  1.22131 -4.127310
## 3  0.488164  4.6687700 -2.6827800  2.383900 -2.245530  2.21993 -2.576030
## 4  4.683660 -1.3397700  2.9772200 -3.330460  0.547032  1.98340  0.068591
## 5 -3.516040 -8.2333800  0.0749863  0.315826  1.616660  2.00374  2.052890
## 6  2.239730  2.1253100 -6.2841100  0.981943  2.011350 -4.47946 -3.964310
##        PC258     PC259     PC260     PC261     PC262      PC263     PC264
## 1 -0.7440720  0.288347 -1.179400  2.080990  1.604400  0.0755411  0.478741
## 2  0.0734875 -0.388623 -0.895096 -3.151140  1.084370  1.5254900  0.873749
## 3 -0.1464840 -4.073660  1.334820 -0.116169 -1.072870  1.4481000 -1.129210
## 4  0.0885669  1.687960 -1.387930 -1.708650 -0.411361 -1.1811700  0.386918
## 5  2.5220600 -4.583370  2.257550 -3.054520  3.266260 -0.7744740 -0.752204
## 6  0.6285420  1.638860 -0.995377 -0.716599 -2.626100  0.9657840  0.594791
##       PC265     PC266     PC267     PC268      PC269     PC270      PC271
## 1  0.421656 -0.628479 -1.629130 -1.499560  1.8114200 -2.368890  0.5769160
## 2  1.040240  0.360824  2.533680  2.011430 -0.3900610  0.689523 -1.8109500
## 3 -1.806820 -0.229284  3.211490  1.677790 -0.0630426  0.751667 -0.9790850
## 4 -0.427434  0.996801 -1.440390 -2.051640  0.2172810  0.203962  1.0798100
## 5 -1.667390  2.070440 -0.469447 -1.935430  2.0364700  0.719146 -0.2692470
## 6  1.288230 -0.504035  1.051390  0.928381 -2.0237600 -0.998783 -0.0467495
##       PC272      PC273      PC274      PC275     PC276      PC277      PC278
## 1  1.109900 -0.6796440  0.4229600 -1.7638400  2.446870 -0.9651020 -1.2354000
## 2 -0.272658  0.1830280 -0.6587680 -1.0463900 -1.118450  0.7515890 -2.8140300
## 3 -0.678672 -0.0811709 -0.4936660 -0.4592600 -0.502171 -0.3357670 -0.4385010
## 4  2.401550 -1.5051200  0.6993520 -0.6823450 -0.285309 -0.5145040 -0.0204774
## 5  1.465060 -0.6795800  0.2306010  0.4262590  1.230140  0.0184329 -0.4976150
## 6 -1.289780  0.2974330  0.0672784 -0.0663638  0.914317 -0.2827370  0.0993676
##       PC279       PC280     PC281     PC282       PC283     PC284      PC285
## 1  1.032090  1.47404000 -1.528150 -4.037510  0.32388100 -0.317983 -1.1915300
## 2  0.818536  0.77091300 -0.543492 -4.133750  0.00422665  0.422983  0.7635770
## 3 -1.210300  0.00167828  0.785943 -0.189621 -0.25221300 -0.277350  0.7455640
## 4  0.444233  1.35126000  0.380841  2.377150  0.09532000 -0.619482 -0.0575488
## 5 -0.377898 -1.08836000 -0.017085  1.589780  0.96122500  0.256280  0.8698030
## 6 -0.630993  1.27841000  1.361400  1.974320 -0.19138000  0.237632 -0.3190350
##       PC286      PC287      PC288      PC289      PC290        PC291 PC292
## 1  0.300507 -0.1658100  0.4254090  0.4903590 -0.1762250 -1.95817e-01     0
## 2 -0.288328  0.3950560  0.0283805  0.0701378  0.2312970  1.44166e-01     0
## 3  0.127325 -0.0381009  0.1522850  0.1031790  0.1557410  1.08869e-01     0
## 4 -0.246705  0.1027270  0.1072570 -0.0732240  0.3829160  4.32772e-05     0
## 5  0.496852 -0.1101960 -0.3285820 -0.7412270  0.0165348  2.78505e-02     0
## 6 -0.956114 -0.1354700 -0.2202060  0.1508820  0.1766310  2.62704e-02     0
##   Individual  Pop_City         Location Latitude Longitude Continent Year
## 1        283 Barcelona Barcelona, Spain  41.3851    2.1734    Europe 2018
## 2        279 Barcelona Barcelona, Spain  41.3851    2.1734    Europe 2018
## 3        282 Barcelona Barcelona, Spain  41.3851    2.1734    Europe 2018
## 4        286 Barcelona Barcelona, Spain  41.3851    2.1734    Europe 2018
## 5        287 Barcelona Barcelona, Spain  41.3851    2.1734    Europe 2018
## 6        284 Barcelona Barcelona, Spain  41.3851    2.1734    Europe 2018
##            Region   Subregion order order2 orderold
## 1 Southern Europe West Europe    16      8        8
## 2 Southern Europe West Europe    16      8        8
## 3 Southern Europe West Europe    16      8        8
## 4 Southern Europe West Europe    16      8        8
## 5 Southern Europe West Europe    16      8        8
## 6 Southern Europe West Europe    16      8        8

6.6.2 Create PCA plots for Iberian peninsual + turkey + native range

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_turkey_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

PC1 & PC3

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_turkey_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

For some reason I left out 2 of the Spanish pops in the above; use the dataset below (which also includes the USA) instead

6.7 PCA for Iberian Peninsula + Turkey + US (MAF 1%, R2<0.01 snp set)

Turkey + Iberia + US

echo "BER
PAL
POP
POL
SPB
SPS
SPC
BAR
SPM
TUA
TUH
HAI
YUN
HUN
OKI
KAN
UTS
KAG
TAI
GEL
BEN
SUF
INW
KLP
KUN
KAT
JAF
CAM
SUU
INJ
MAT
SSK
KAC
SON
CHA
LAM
HAN
HOC
QNC
" > euro_global/output/neuroadmixture/native_turkey_iberia_US.txt
cd /gpfs/gibbs/pi/caccone/mkc54/albo
plink \
--keep-allele-order \
--keep-fam euro_global/output/neuroadmixture/native_turkey_iberia_US.txt \
--bfile euro_global/output/file7b \
--make-bed \
--export vcf \
--out euro_global/output/neuroadmixture/native_turkey_iberia_US2 \
--extract euro_global/output/neuroadmixture/train/train_euro_nativeb.snplist \
--silent
grep "samples\|variants" euro_global/output/neuroadmixture/native_turkey_iberia_US2.log

100367 variants loaded from .bim file. –extract: 22537 variants remaining. Total genotyping rate in remaining samples is 0.969253. 22537 variants and 329 people pass filters and QC.

6.7.1 Import the data for R2<0.01 subset for native_iberia_turkey_US2

genotype <- here(
   "euro_global/output/neuroadmixture/native_turkey_iberia_US2.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22537
##   column count: 338
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22537
##   Character matrix gt cols: 338
##   skip: 0
##   nrows: 22537
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## BAR BEN BER CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN LAM 
##  12  12  12  12  12   2  12   4   7  12  11   4   2   6  12  11   6   4   4   9 
## MAT OKI PAL POL POP QNC SON SPB SPC SPM SPS SSK SUF SUU TAI TUA TUH UTS YUN 
##  12  12  11   2  12  11   3   8   6   5   8  12   6   6   7   9  12  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   329
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   329
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.removed file, for more informations.
## 
## 
##  - number of detected individuals:   329
##  - number of detected loci:      22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.lfmm"

PCA for MAF 1% r2<0.01 snp set of native_turkey_iberia2

nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          329
##         -L (number of loci)                 22537
##         -K (number of principal components) 329
##         -x (genotype file)                  /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.lfmm
##         -a (eigenvalue file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.pca/native_turkey_iberia_US2.eigenvalues
##         -e (eigenvector file)               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.pca/native_turkey_iberia_US2.eigenvectors
##         -d (standard deviation file)        /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.pca/native_turkey_iberia_US2.sdev
##         -p (projection file)                /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.pca/native_turkey_iberia_US2.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/ 
## pca result directory:            native_turkey_iberia_US2.pca/ 
## input file:                      native_turkey_iberia_US2.lfmm 
## eigenvalue file:                 native_turkey_iberia_US2.eigenvalues 
## eigenvector file:                native_turkey_iberia_US2.eigenvectors 
## standard deviation file:         native_turkey_iberia_US2.sdev 
## projection file:                 native_turkey_iberia_US2.projections 
## pcaProject file:                   native_turkey_iberia_US2.pcaProject 
## number of individuals:           329 
## number of loci:                  22537 
## number of principal components:  329 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          329
##         -i (input file)                     /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.pca/native_turkey_iberia_US2.eigenvalues
##         -o (output file)                    /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.pca/native_turkey_iberia_US2.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)

Get values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
sampling_loc <- read.csv(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_turkey_US.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_turkey_US.rds"))

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_turkey_US.rds"))
head(sampling_loc)
##     Pop_City             Location Latitude  Longitude Continent Abbreviation
## 1 Berlin, NJ     Berlin (NJ), USA 39.79081 -74.929100  Americas          BER
## 2 Palm Beach Palm Beach (FL), USA 26.70560 -80.036400  Americas          PAL
## 3   Penafiel   Penafiel, Portugal 41.18555  -8.329371    Europe          POP
## 4      Loule      Loule, Portugal 37.09084  -8.092465    Europe          POL
## 5    Badajoz       Badajoz, Spain 38.86622  -6.974194    Europe          SPB
## 6  San Roque     San Roque, Spain 36.17042  -5.371530    Europe          SPS
##   Year          Region   Subregion order order2 orderold
## 1 2018   North America                 1     NA       75
## 2 2018   North America                 3     NA       77
## 3 2017 Southern Europe West Europe    11      3        3
## 4 2017 Southern Europe West Europe    12      4        4
## 5 2018 Southern Europe West Europe    13      5        5
## 6 2017 Southern Europe West Europe    14      6        6

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 39 Levels: BAR BEN BER CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG ... YUN
#Check how many sampling localities
length(unique(pc.coord$Population))
## [1] 39

Check the regions

unique(sampling_loc$Region)
## [1] "North America"   "Southern Europe" "Eastern Europe"  "East Asia"      
## [5] "South Asia"      "Southeast Asia"

Merge

merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
##   Population     PC1      PC2     PC3      PC4      PC5      PC6      PC7
## 1        BAR 25.7129 -2.62185 28.9986 -5.51174 -28.5950 -4.85375  8.80835
## 2        BAR 25.9719 -4.37975 30.2435 -6.29308 -28.0503 -9.28326 20.98460
## 3        BAR 22.6584 -5.98900 31.2084 -6.17300 -30.1854 -8.67753 20.77940
## 4        BAR 23.3355 -3.95914 34.0338 -5.23732 -32.6703 -8.54093 20.55520
## 5        BAR 24.6073 -5.65383 25.9796 -6.70372 -24.6932 -6.85840 13.02770
## 6        BAR 26.0343 -4.48019 33.1304 -6.75906 -32.0280 -8.35747 16.63600
##         PC8       PC9     PC10     PC11     PC12     PC13     PC14     PC15
## 1  1.870870  1.734370 -4.91634 -5.53705 -3.56304 -2.94512 4.457260 -4.79310
## 2  1.474940  0.220050 -7.61993 -2.89329 -8.41748 -5.24904 0.814279 -5.94347
## 3 -1.932840 -1.092970 -4.85270 -8.35119 -7.82170 -5.77658 0.190452 -4.28722
## 4 -0.531471 -1.355440 -7.56444 -9.04959 -8.02850 -9.13727 0.506113 -4.30162
## 5  2.721540  1.018850 -2.60428 -4.39771 -7.28905 -6.78252 1.763620 -3.00040
## 6 -2.175300  0.747143 -6.44493 -4.93338 -6.69604 -3.22217 4.346380 -8.02626
##        PC16    PC17      PC18     PC19      PC20       PC21     PC22    PC23
## 1  1.590870 5.39503  2.290690  9.44932 19.933400 -15.758100 11.18300 1.14165
## 2 -0.368734 4.33769 -4.907070  2.69515 10.715300  -2.582720 16.10230 5.94577
## 3  5.501230 6.05556 -3.303920 -3.16960  0.672816   2.631570  2.17795 4.66959
## 4  2.402530 7.61034 -0.886132  1.93028  6.566990   0.742576 10.85790 8.38076
## 5  0.393075 4.37686 -1.464760  2.51858 13.918800  -2.127500 12.61620 2.04683
## 6  5.193690 5.56186  1.268110  4.25996  7.034090  -8.806690  5.58722 5.85928
##        PC24      PC25       PC26        PC27      PC28     PC29      PC30
## 1  0.333029 -0.122214 -0.3611860 -0.59082200  0.457773  1.87716  0.932496
## 2  1.410770 -4.356960 -1.5981200 -0.00533906 -8.634270  1.59730  2.394030
## 3 -0.108263 -0.805248  0.0503982  2.43193000  5.947910  4.08274 -2.081860
## 4 -5.096590  6.249210 -7.5627300 -5.53342000 -8.613630 -5.50024  1.394430
## 5  3.214470 -6.152820 -1.7519700  0.66419400 -3.034210  2.56470 -3.608680
## 6  2.108070 -1.624790 -0.3402630  4.89707000 11.758800  5.48238  0.971937
##         PC31      PC32      PC33     PC34     PC35      PC36     PC37
## 1  0.6491640 -3.785210 -0.843485 -2.46992 10.71850 -1.998580  3.27762
## 2 -0.0736669 -0.482962  0.522689  4.43410  1.89056  7.354840 -1.98055
## 3  2.6615400  1.039170  0.727252 -3.15391  2.01250 -7.941030  3.76717
## 4 -0.6493140 -6.555910  5.050470  5.58842 -8.85389 -0.556657 -2.59914
## 5  2.1252000  4.771000 -3.755900 -1.84615  1.88672 -1.541800 -8.72959
## 6 -2.3446000  4.829720  0.339756 -1.15407  6.99774  0.138146  3.05512
##         PC38      PC39      PC40       PC41      PC42      PC43      PC44
## 1 -13.296200  -8.42828 -3.509060 -23.379600  -6.42773  1.246000  4.111360
## 2   8.781640   9.11525 -1.841470  10.115300  12.48200 -0.592523 -0.672527
## 3   0.686314  -7.56950  1.042220  -3.505750  -1.65533 -2.831840 -1.518690
## 4  -3.414240  17.77690 10.326000  19.538900  13.86880 -4.557990 -8.983550
## 5  -9.756850 -10.41210 -9.030700  -8.702520   3.47650  3.723150 -1.278030
## 6  -5.096560  -1.93924  0.693932  -0.733361 -12.07670  4.917280 -0.077805
##        PC45     PC46      PC47      PC48         PC49     PC50     PC51
## 1  -8.22693  7.26459   2.81229 -13.69750   0.00529661  7.22674  7.18839
## 2   3.38634  5.00751  -3.71344   2.27498   5.70275000 -5.13276 -6.63773
## 3 -14.49830  3.85632 -12.99480 -11.99780  -2.84140000 -6.46671  6.38107
## 4   3.07865 -6.04796   1.83729  14.83490  -5.04357000 -5.73618 -8.20044
## 5   4.45817  5.27687  19.34660   1.15268 -11.74550000  8.01450  1.28139
## 6  -4.10900 13.32710  -9.49633 -11.68910  -3.05790000  2.61201  2.57471
##       PC52      PC53      PC54      PC55      PC56     PC57     PC58      PC59
## 1 -2.48475 -3.391080   9.13812 11.920900 -0.238951 -1.51282  1.87436  -1.01701
## 2  5.22769 -2.487550   5.47851  4.870490  0.584372 -2.86258 -6.79152  -5.49720
## 3 -1.42788 -0.025347  -4.05139  0.913042  1.998210 21.02590 -1.94426  -7.02490
## 4  2.82566 11.016500 -11.09660 -6.243970 -0.969577 -1.51034  1.29543   7.58641
## 5  9.87423 16.311000  25.72790 -4.130750 -3.567030 -8.54634 -2.06231  17.18530
## 6 -7.43502 -9.250040   3.42509 12.396700  0.165556  3.05928 -2.30151 -14.62470
##        PC60      PC61      PC62     PC63     PC64      PC65       PC66
## 1  -2.23455 -0.177461  -7.37615 1.940790 10.91640   2.69161   6.945990
## 2   6.31023 -5.034510  11.50390 0.596963 -7.65995   1.95384   5.083740
## 3  14.60620  9.074690 -20.43490 2.095490  6.29937 -17.01570 -14.686000
## 4  -2.72874  0.546962  -1.29349 2.048900 -1.10257   4.74395  -3.385140
## 5 -10.08030 -6.365580  -3.59783 1.705990  2.52326   4.03709   0.651713
## 6  13.69920  4.198100  -2.36435 0.707916  3.81585   2.18127  -1.560010
##        PC67     PC68     PC69      PC70       PC71      PC72     PC73
## 1  0.917349  7.35632 -1.57878  5.889800 -10.698400   2.62260  6.20659
## 2  0.197489  2.87269  2.20831 -5.609890   1.243510  -1.96598 -2.96545
## 3 10.067700 -2.97230 -5.42337 -0.881617  -0.155405  14.57200 -2.60278
## 4 -2.023540 -3.34201  5.79487 -3.921900   6.365240   2.93555  1.89292
## 5 -4.264750  4.21339 -7.45710  9.331820  -1.367420 -13.22440  1.68586
## 6 -0.949399  6.69744 -6.73716 -6.014000  -6.764330  19.51010  3.30330
##         PC74      PC75     PC76     PC77      PC78      PC79      PC80     PC81
## 1   2.393500 -1.228940  1.65910 -2.49965 -0.395638 -6.365730  -3.90690 -6.40180
## 2  -0.889967  9.492890  8.07974 -7.77725 -2.983300 -0.546322  -5.86025  6.90206
## 3 -11.783100 -5.797900 -9.11690  2.16389 -1.813100  3.329050  13.50310 -2.92843
## 4   2.405830 -5.448380 -1.06254  1.01008 -2.424320  2.709030  14.50870 -3.00442
## 5   4.348830  9.638210  1.26453 12.57100 -0.381574  0.754405 -12.03030  2.78153
## 6  -7.999040 -0.687616  2.21934 -2.31997 -6.937260 -2.012540   8.46498 -8.61532
##        PC82      PC83      PC84      PC85       PC86      PC87     PC88
## 1  7.634040  -8.58928  6.864730  5.214140 -13.132200 -6.822920 -1.89355
## 2  7.743140  -1.80619 -1.761400  4.625120  -0.432541 -2.978370 -1.18636
## 3 -0.914486   5.87550  0.891326 -6.076440   6.584080 12.120100  1.18209
## 4 -4.396260  -8.21684 -1.405600 -3.149420   1.306570 -0.226873  6.57654
## 5 -3.688690  -1.17806 -0.431416  0.654583  -9.213000 -9.637340 -6.24666
## 6  6.685710 -14.22560  3.122500  7.727820  -3.419080 -1.335180  4.31747
##        PC89      PC90      PC91      PC92      PC93     PC94      PC95
## 1 -0.243170 12.631100  9.771980   7.22482  0.453895 -4.69968 -8.237440
## 2 -1.721830 -5.142270  2.703170 -15.97690 -4.544550  8.85826  1.801130
## 3 -0.781144 -2.744640 -8.071470   3.65437  3.360340 -3.39123  1.189590
## 4  2.235670  0.522126 -1.432080   7.00188  7.283740 -7.05028  2.663720
## 5 -1.591390 -3.129380  6.857100   4.46007 -0.876335  1.50089 -3.151160
## 6  0.308486  3.797690 -0.361274   1.70548  3.427190 -2.16816  0.803193
##        PC96     PC97     PC98      PC99      PC100      PC101     PC102
## 1 -1.438810 -1.87967 -4.16469  0.455943 -2.1381200 -10.157700  2.930280
## 2 -0.211897  5.15472 -1.60486 -6.203590 -2.6882800   0.958768 -0.885385
## 3  3.623900 -1.79305 -1.07488  5.120680 -0.7640870  -2.019440 -1.978810
## 4 -2.889360  6.44299 -3.84847  4.838880 -0.0480383   0.572218 -4.688430
## 5  9.152500 -4.41583  6.59044  2.456890  2.3220800  -0.853881 -0.815616
## 6 -2.430890  6.96112 -3.40400 -0.208818 -0.7036810  -3.604280 -1.187880
##      PC103     PC104     PC105     PC106    PC107    PC108     PC109    PC110
## 1  1.23902 -5.120930 -4.368840  1.037800  7.20698  3.68224 -0.368935 -1.15635
## 2 -3.39272 -1.157220 -0.758977 -0.745568  3.71898 -6.88873  7.221980 -2.64895
## 3  1.98004  0.716473  5.637400 -3.504650 -3.92182  3.46150 -7.183810 -5.62683
## 4 -1.52862  5.666670 -5.294130  2.326670 -5.19702  1.87295  4.297490  7.95955
## 5  1.62872 -8.420870 -5.656670  3.814460 -2.19808 -2.60299  0.521129 -5.61614
## 6 -7.22169  2.379020 -1.308250 -0.870971  5.86702  4.99827  0.935283 -3.22324
##       PC111    PC112      PC113     PC114     PC115    PC116      PC117
## 1  0.600113  3.70751  -5.547490  2.866860 -1.676030 -2.86628 -2.4995400
## 2  1.268020  2.06241  -3.280630 -0.670925  0.352867  7.81723 -6.4582600
## 3 -5.493960  3.26006   2.601890 -6.114910  3.526690  3.56937  3.3057300
## 4 -8.377160 -7.17172  -0.978643 -4.673240 -2.612030 -3.42140  1.2679300
## 5 -0.379843  9.57801   2.224540  5.764800 -2.447190 -1.41866  0.0821863
## 6 -3.641380 -3.68478 -13.761700 -3.286960  2.376510 -2.51607 -1.2937700
##       PC118     PC119    PC120    PC121     PC122      PC123     PC124
## 1  1.152380   1.22148  1.35734 -4.75114 -0.869155 -1.2783300  0.106456
## 2 -0.930811 -13.56500 10.66720 -3.95575  4.973730 -0.0271546  2.242130
## 3 -1.494830   4.91867 -6.50309  1.23996  3.410710  2.5321400 -5.066040
## 4  3.660750   4.03208 -4.78504  6.70646 -0.650758 -1.9886800 -1.732770
## 5 -3.553300  -2.80008 -3.72177  7.63769  0.821607  1.1064100  4.676510
## 6  3.677810  -3.98069 -1.46809  2.81461 -5.016200 -0.7803590 -2.897490
##       PC125     PC126     PC127     PC128     PC129    PC130    PC131     PC132
## 1   3.14992 -6.801810 -3.449730 -2.921770  4.384100 -6.01140  8.71534 -8.418310
## 2   5.08365  2.089020  0.222601 -6.857800 -3.845480  4.69359  5.26967  3.309780
## 3  -1.46590  4.046790  4.163530 -1.177230  1.276930  2.56634 -9.04946 -1.268550
## 4 -11.20610 -1.634600 -1.271220  8.729580  1.645530 -4.47525 -7.73236  3.178270
## 5  10.61010  5.909370  4.285370  0.288425  4.797270  1.33226 10.08740  0.620095
## 6  -1.24259 -0.987467  1.959150 -1.316770  0.770285 -4.00533  2.21727 -4.599960
##       PC133      PC134    PC135    PC136    PC137    PC138     PC139    PC140
## 1  1.247160 -3.6746600  6.08511 -5.12278 -3.44129  0.68073  0.884904 -4.72218
## 2  0.550375  2.0801000 -6.33822  6.22251 -4.86637  4.13430 -1.732350  5.29714
## 3  1.377060 -0.0354954 -5.76098  4.52578  1.33248  2.71518 -0.636026 -2.01015
## 4 -4.919670 -0.8878070  3.74182  3.33335  5.36889 -2.79551  3.634640  1.38568
## 5  7.066210  4.3528600  2.34308 -3.76563 -7.62249 -8.11352  2.224500 -2.35684
## 6 -1.291370 -6.2836800  4.32724 -1.94272 -2.85946 -4.05331  3.258220 -2.45105
##       PC141     PC142    PC143      PC144      PC145     PC146     PC147
## 1 -1.444540  5.484490 -0.32021  -3.446660 -11.478100  1.359720  2.262930
## 2  1.657710  4.174880  1.08986 -10.475200  -0.206588  0.848232  0.693631
## 3 -0.854115  2.174070 -2.75772  -0.706346   4.781330  0.346067 -2.499990
## 4 -2.066620 -4.076950  1.22522  -0.198642   1.127730 -5.188380 -5.001110
## 5 -5.628000 -7.421560  6.22590   5.442630   0.855120 -2.511970 -1.218310
## 6  1.823440 -0.485569 -5.11459   1.353360   1.204220  5.280530 -2.129380
##        PC148     PC149    PC150     PC151     PC152    PC153     PC154    PC155
## 1 -1.5995600  2.930680  1.25866  1.313810  3.953900  3.37841 -1.226190 -4.06738
## 2  0.0533152 -1.080660  2.98743  2.907660 -1.649560  5.18786  4.043370  1.24385
## 3  1.1897600 -2.096530 -2.58254 -3.632070  2.155250 -5.74233 -1.430910  4.59022
## 4 -3.4337100 -1.676370  3.10077 -0.548271 -4.798290 -1.05579  1.475150  6.12158
## 5  1.5200700  0.242815 -1.22193 -5.419560  0.755099 -2.93214 -6.324730 -1.97275
## 6  0.4135370  0.836087  7.43499 -0.662060 -1.328750  3.40554  0.793226  1.84796
##        PC156     PC157    PC158     PC159    PC160    PC161     PC162     PC163
## 1  0.0200301 -0.198192 -4.09215   1.34319  5.45502 -5.16670  0.748815 -4.250780
## 2 -0.9412180  8.028920  4.53141  -2.14673 -5.54470 -3.44115 -0.306315  2.182890
## 3 -6.1530200 -6.446070  1.44059  -5.08515 -1.00983 -1.19545 -2.392580  1.128390
## 4  1.1947300 -2.754890  2.59877   5.17269 -4.48090  4.58743 -4.364330 -0.327329
## 5  6.7848300  1.190440 -5.43150   7.32264 -2.02349 -1.63856 -0.605595  3.293560
## 6  2.9913600  3.059010  3.59015 -11.93240 -3.18970 -1.92553 -0.481372  2.239820
##      PC164    PC165     PC166     PC167     PC168     PC169    PC170     PC171
## 1  1.33749  1.21474 -6.598020 -0.795236  4.978780  0.560395 -3.30080 -4.185800
## 2 -5.11573  5.23798  2.314560 -6.867580  0.198715 -0.439490  8.39280  2.711710
## 3 -6.41499 -1.35580 -1.843520  4.107610 -1.846500  0.476608  6.01534  1.541340
## 4  2.89053 -2.85711 -1.651910 -0.805614  2.109260 -4.529850 -4.21240 -2.149130
## 5  6.69327 -7.24007  6.646710 -2.481420 -4.517500 -1.050480 -6.23273  4.969200
## 6  5.73663  3.07591 -0.642563  0.570129  0.363035 -4.123080  0.85659  0.543589
##       PC172      PC173      PC174      PC175     PC176     PC177     PC178
## 1  0.717898   0.701147 -0.0744813   6.310380  0.673007 -6.672160 -0.547337
## 2 -0.238610   9.963270 -2.4448600 -16.645900 -5.058020  1.246670 -2.543140
## 3 -1.948520   1.676890  1.9756400  -0.651444 -0.388861 -4.898290 -5.976720
## 4  1.650750 -13.219700 -2.2461700   3.712520 -1.790180 -1.646410  3.921580
## 5  9.230910   3.536260  7.3502200  -3.973760 -4.863820 -0.490094  1.035400
## 6 -2.349120  -0.735585 -5.0606100   0.973047  1.675880  4.786290 -0.439128
##       PC179     PC180     PC181    PC182     PC183    PC184    PC185    PC186
## 1  0.374826 -1.902990 -1.998040 -2.83480 -4.639700 -1.54106 -3.21807  3.63484
## 2  2.348450 -0.913489  3.398340 -0.22483  2.326280  4.18375 -3.63032 -7.29759
## 3  3.458210  2.874280  4.809760  1.29689 -0.611268 -1.61441  8.20865  5.30831
## 4 -1.150270 -3.200230 -1.130810  1.11435 -2.256870 -0.78954 -0.42869  8.50227
## 5  2.611880  6.006020  0.230703  4.81012 -3.384240 -1.40677 -4.23615 -3.91429
## 6 -0.911831 -2.733000 -2.226200 -2.12319  0.676775  3.86474 -2.42279 -8.56611
##       PC187     PC188     PC189     PC190     PC191     PC192     PC193
## 1 -4.579180 -7.858830  0.335818  -4.92246  10.50470  2.906410  3.250850
## 2 -6.723570 -0.603431 -5.736950 -11.97710   2.50693 -3.615450 -1.908840
## 3  2.945630  1.479520  2.778580   1.53770   2.74395  4.408030 -4.887140
## 4  5.058180  1.477590  2.287680   2.16909 -15.67790  2.858210  1.051200
## 5  0.966594 -4.056130  1.024060   7.74491 -11.46850 -9.392760 -0.621817
## 6  1.316620  4.087070  8.082890  -3.17717   3.19539  0.125781 -4.479550
##       PC194     PC195     PC196     PC197     PC198    PC199    PC200     PC201
## 1 -2.803420  0.940661  4.550060 -6.312980 -0.871334  2.97657  2.48236  0.176071
## 2  3.934200  3.359870  4.392700 -1.148540 -3.500150  8.54854 -5.29140 -0.345064
## 3 -0.479624  2.438460 -4.651430  4.272160  1.218910 -7.26198 -2.72198 -5.283790
## 4  4.348100 -3.088640  6.140210 -6.708430  6.038530  4.18459 -2.13864 14.368900
## 5  1.259300 -4.584890  8.022080  3.414910 -0.533752  2.07845  3.71373 -3.263110
## 6 -4.395400 -7.833460  0.521059  0.710471 -3.947820 -2.35297  1.21840 -0.378980
##      PC202     PC203    PC204    PC205    PC206     PC207    PC208     PC209
## 1 -3.11112 -4.598450 -1.96079  3.52513 -1.31918 -6.670200 -1.34216 -3.612510
## 2  1.54712  2.344110 -2.94640 -3.13680  7.17757 -0.395078 -5.69215  8.461160
## 3  3.38690  5.952260  1.38994 -5.29183 -4.22535  9.260910  2.98283 -0.764994
## 4  5.31532  0.932129 -3.17659 -2.80713  2.56407  1.306090  2.79659  1.101850
## 5 -2.00416 -0.203443  4.38951  6.97284 -9.70525 -3.697460  6.39656  8.730730
## 6  2.50952 -1.602600 -2.30116  1.19615 -4.62051 -3.716480  6.20565 -5.826400
##       PC210    PC211     PC212      PC213    PC214     PC215     PC216    PC217
## 1 -1.661940 -2.46879  6.710560 -2.8241400  7.22261 -3.770720   5.20565  3.42311
## 2  3.492260 -1.72476 10.872400  0.6363790  3.57742 -3.091950   1.20010  8.41381
## 3 -0.703084  2.14656 -6.903270 -0.0225667 -2.40430  3.660320   1.55056 -1.84901
## 4 -0.832008  3.17770 -2.990440 -4.8824500  4.54524 -0.998691   2.75592 -4.78552
## 5 -8.730790  7.31274 -0.552715  8.4335300 -1.56579 -0.817271 -12.71400 -1.70658
## 6 -8.740840 -5.74159 -1.670020  2.4412600 -3.45297  3.287710   6.58442 -2.43074
##      PC218     PC219     PC220     PC221     PC222     PC223    PC224    PC225
## 1 -3.57521 -0.925743 -6.040000  5.841540 -4.217500  2.565470  1.25886  2.31512
## 2 -3.62146 -8.082930 -9.108710 -9.552030  0.604587  9.647750  1.56354 -2.98224
## 3  5.59194  0.255314  6.716710 -1.586710  1.778830 -2.208140 -3.38602 -1.42084
## 4 -3.74929 -1.932080  2.643500  3.766340  8.106450 -4.411350 -1.80712 -2.66210
## 5 -6.29215 12.235500 -0.865865 -0.144382 -0.940503 -0.838229 -8.17155 -1.57281
## 6  5.48879 -0.402248 -0.760478 -0.933320  2.795250 -5.700500  7.01591 -2.00649
##       PC226     PC227      PC228      PC229     PC230     PC231     PC232
## 1 -5.812820 -0.556436   5.421390   0.973315   4.22507   7.74684 -2.824980
## 2 -8.759420 -5.133450  10.434700   1.852170  -3.21919  -3.28281  3.737270
## 3  5.424080 -2.494230  -0.398656   0.790017   2.30765   5.70697 -2.449790
## 4  1.892400 -2.255280 -10.998000  -4.597690 -15.59370 -10.99590 18.098300
## 5  6.300520  5.356360  -4.260340   1.476340  -9.80437  -3.12196 -4.577790
## 6  0.898544  4.905560  -4.256270 -13.299500  -5.55964  -6.69199  0.910271
##       PC233      PC234      PC235     PC236     PC237    PC238     PC239
## 1  13.31080 -14.730400 -10.598300   5.86821  11.89720  9.15428 15.522100
## 2   4.15302   7.857400   6.537100   2.57194  -2.80241 -5.56853  0.433601
## 3   4.56916  -0.185496 -13.465200   1.58770   2.80303 -6.51858 11.355300
## 4   2.44302  -5.491020  21.092000   8.65642  -3.30788 10.82330  3.974270
## 5 -13.60080   9.396310   0.541495 -11.66070  -1.05810 -4.85984  0.621867
## 6 -11.91010   9.942480   7.015510   2.87224 -13.57450 -8.40561 -5.611740
##       PC240      PC241      PC242    PC243      PC244     PC245    PC246
## 1 -4.644390 -11.122300  -6.166970 -3.32413   6.832570 -11.75250 -7.66241
## 2 12.428600   0.691729  -0.452836  4.99957  12.638600  12.76320 -8.00939
## 3  0.110384  -3.356570   5.381920 -7.69284  19.911200  -9.56644 -5.50014
## 4  1.485160   1.767310 -10.804000 11.09060   0.416645 -12.45470 -8.62727
## 5  2.528690  -4.375780  -0.512906  7.34698   1.382710  -8.82247 -1.21971
## 6 -1.892380  12.843200   4.894520  2.67951 -10.023300   3.72962  2.72864
##        PC247     PC248     PC249      PC250    PC251     PC252     PC253
## 1  -6.937510 -12.83190  1.755680  3.4566200 12.67260  7.218590  -8.65325
## 2 -13.479700   5.93387 12.874600 11.8111000 -3.10706  4.321340   1.47639
## 3 -12.127900  -6.71963 -0.699389  8.0150400 -2.73580 -4.820810   6.34486
## 4  -7.063090 -11.45900 -8.729490 -0.0793172  2.14604 -0.974239  -4.99281
## 5   0.914657   1.14351 -2.689320  0.4449780  2.56107 -3.629580  -8.71877
## 6   0.316696   7.46687 -2.639550  3.7237300  6.38938 13.615600 -15.47010
##      PC254    PC255     PC256      PC257     PC258     PC259     PC260
## 1 -6.25543 -6.73511  6.193890   8.329880 -4.816680  0.541965  3.560440
## 2 -2.23967 -3.84316  4.929060   2.167880 -1.366380 -8.534950 17.023800
## 3 -4.47488  5.17743  5.237630   4.352360  8.581240  4.618530 -6.328750
## 4 -2.15612 -1.94788 -2.493940   5.955880 -8.083960  3.160150  7.550890
## 5 -1.76590  8.92968  0.184374  -0.355971  0.469736  4.693120 -2.885510
## 6 -2.66822 -5.84437 -7.129670 -12.423400  2.449930  5.382430 -0.471878
##       PC261    PC262    PC263    PC264     PC265     PC266     PC267    PC268
## 1 -4.425650  4.27069 -1.25455 -9.34464  5.559410   6.18695  0.462009 -9.02276
## 2 -2.629560  1.50408 -3.30473 -1.35286 -3.996380  -7.36740  1.449120  2.02989
## 3  4.362500 -4.13551 -2.78760  9.91492  3.298040 -16.04700  2.584200 10.84370
## 4 -0.347105 -2.87329 -3.63635 -2.70680  0.360816  -1.46211 -0.878445 -2.89191
## 5 -2.124490  2.83095 -1.44903  9.42488  0.346431  -9.87365 -3.597790 10.50730
## 6 -2.978680 -1.46830 -6.50351  7.48992 -5.964130   7.05490 -6.785280  3.70866
##       PC269     PC270     PC271    PC272     PC273      PC274     PC275
## 1  1.141710 -7.186420  10.57470 -2.68709   3.47205  1.1103100 -0.810875
## 2  1.192420  2.483320   4.30524  6.19651  11.50560 -8.5042200 -1.974170
## 3  1.192010  6.057580   1.61979  7.68589  12.31950 -6.4028600 -1.019940
## 4 -0.576283  0.352734   7.91840  0.83963   3.54216  0.6112570  4.864560
## 5  2.559920  4.747950  -2.79979  8.44632   4.10378  0.0290276 -0.917252
## 6 10.045800 11.496600 -11.87330  4.83425 -14.90620  3.2880400  1.765770
##       PC276     PC277      PC278     PC279     PC280     PC281     PC282
## 1  0.790267  3.214050 -2.5700800 -2.826450 -0.366929  4.122520 -2.259730
## 2  0.966456 -0.283686  0.0998602 -1.628590 -7.814720 -5.775000  2.118510
## 3 -4.030300 -2.866380  7.2347400 -2.698500 -4.185240 -0.281134 -1.627250
## 4 -3.583800  0.187923  0.4278200 -2.382940 -2.748640  0.440407 -5.040810
## 5 -3.616090 -6.134920  7.3913300 -0.984962 -2.159430 -0.970610  0.136633
## 6  1.738120  3.473030 -1.9091000  8.148860  2.158070 -2.777400  3.523600
##       PC283      PC284     PC285     PC286      PC287     PC288    PC289
## 1  3.672340  1.8835700  5.066380 -2.349720  2.4786700  1.407310 -2.68301
## 2 -5.648640  1.1015000 -1.305600  0.533424 -1.2730100  0.831478  1.60040
## 3 -6.075130  1.7544100  0.249563  0.237337 -6.3643400 -2.454020  1.95367
## 4  0.228679  0.5514890  3.395440 -5.114750  0.0399911  0.445819  1.14122
## 5 -5.147220 -0.0582729 -0.038785 -1.173280 -1.9469400  0.726258  1.32271
## 6  0.387900  0.3683700  1.687890  3.454950  2.4010700  2.583450 -2.11218
##       PC290     PC291     PC292      PC293     PC294      PC295     PC296
## 1 -0.753858 -0.542549 -1.997630  0.0354434  1.927740  1.1620000  1.838630
## 2  2.806650  1.957440 -0.168855 -1.2627400 -2.278180 -1.3238000 -1.647870
## 3  1.550630  3.414140  1.327070 -0.4078890 -2.231820 -0.4119370  0.250438
## 4 -3.312840  0.264682 -1.876620  0.4229650  1.819580 -0.0481879 -1.276520
## 5 -2.071300 -0.558831  1.313250  0.5746580  0.503964  0.0674113 -0.216368
## 6 -3.694120  2.995870  0.601978 -1.8855900 -1.583300 -1.1155900  1.513920
##       PC297     PC298    PC299     PC300     PC301     PC302     PC303
## 1 -0.140480 -2.214850 -2.31808 -0.703568 -0.113414 -0.574813 -0.676687
## 2 -0.708318  1.880170  2.63775 -2.554080 -0.555429  0.606084  0.957965
## 3 -0.579125  1.810380  2.46392  1.421180  0.275171  0.218699  0.836659
## 4 -0.157423 -1.078660 -2.84694 -0.782049  1.190840 -0.917747 -2.590050
## 5  1.229620  0.976108  2.50623 -0.649626  0.888099 -0.722353  1.189520
## 6  1.612860 -0.807718  2.68794 -0.276612 -2.457360 -0.800217  0.289320
##       PC304     PC305     PC306     PC307     PC308    PC309      PC310
## 1 -0.277139  1.008660 -0.378657  0.252205  1.480000 0.308352  0.3872220
## 2  0.984959  0.176792 -0.421508 -1.631460 -1.291760 0.393527  1.3237900
## 3  0.837330 -1.237040  0.667640 -0.916607  0.419896 2.058720  0.8124120
## 4 -0.992738  0.699266 -0.405584  0.334573  1.557710 1.619710 -0.0314967
## 5  1.052450  0.405087  2.204360 -1.001040 -0.162834 1.168010  0.9724400
## 6  0.907985 -0.268888  1.328200  0.103274 -0.802521 0.262558 -0.1756110
##       PC311     PC312     PC313      PC314     PC315       PC316      PC317
## 1 -0.889394 -0.255358 -1.340710 -0.3103120 -1.312420  0.76824100 -0.0165334
## 2 -2.533370  0.148725  0.642613 -0.3722790  0.438960 -1.67704000 -1.2035500
## 3 -0.959478 -1.302850  1.367620  0.1548570 -0.390091  0.00345459  2.5224200
## 4  0.226174 -2.424980  1.293800  0.2648420  0.489714 -0.49734800 -1.0871300
## 5  0.335292  0.209168  0.374381  0.0715358 -1.294310 -0.42690300  1.7618000
## 6 -1.175180 -0.360316 -0.409936 -0.3753060  1.115560  0.34592600 -0.1183310
##       PC318      PC319      PC320      PC321      PC322      PC323      PC324
## 1 -0.592226 -0.2698480  0.2902790  0.0689809  0.7612440 -0.0592515  0.0208142
## 2 -4.146340  0.3083650  0.2248010  0.1965100 -1.1406300 -0.2998080  0.1335550
## 3  0.516375  0.0640749  0.7191560 -0.2257290 -0.1598330  0.5081930  0.0913785
## 4 -3.886320  0.0446736 -0.4160680 -0.1636460  0.7368040  0.3300800 -0.3511050
## 5  1.589810 -0.0528384  0.7272740 -0.1102090 -0.0970665  0.2105090 -0.0522031
## 6  2.103760  0.9858290 -0.0967412  0.2292060  0.8340250 -0.5315110  0.0766531
##        PC325      PC326       PC327       PC328       PC329 Individual
## 1  0.1203100  0.1043010  0.16595600  0.11606000 1.78132e-06        282
## 2  0.5015050  0.4163570 -0.16696300 -0.20619300 1.78132e-06        283
## 3  0.1932900  0.2411500 -0.22998900  0.06923640 1.78132e-06        280
## 4  0.0371669  0.0575980  0.21069900  0.14293000 1.78132e-06        279
## 5  0.0995012 -0.0928798  0.38160600 -0.00189713 1.78132e-06        286
## 6 -0.3880720 -0.6782730  0.00931281  0.01990270 1.78132e-06        287
##    Pop_City         Location Latitude Longitude Continent Year          Region
## 1 Barcelona Barcelona, Spain  41.3851    2.1734    Europe 2018 Southern Europe
## 2 Barcelona Barcelona, Spain  41.3851    2.1734    Europe 2018 Southern Europe
## 3 Barcelona Barcelona, Spain  41.3851    2.1734    Europe 2018 Southern Europe
## 4 Barcelona Barcelona, Spain  41.3851    2.1734    Europe 2018 Southern Europe
## 5 Barcelona Barcelona, Spain  41.3851    2.1734    Europe 2018 Southern Europe
## 6 Barcelona Barcelona, Spain  41.3851    2.1734    Europe 2018 Southern Europe
##     Subregion order order2 orderold
## 1 West Europe    16      8        8
## 2 West Europe    16      8        8
## 3 West Europe    16      8        8
## 4 West Europe    16      8        8
## 5 West Europe    16      8        8
## 6 West Europe    16      8        8

6.7.2 Create PCA plots for Iberian peninsual + turkey + US + native range

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_turkey_US_pc1_pc2_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

PC 1 & PC 3

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_turkey_US_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)
chartcolors <- c("#66C2A5", "#c41A1C", "goldenrod", "#146c45", "#2524f9", "#a113b2", "#FF7F00")
pch=15

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_turkey_US_pc1_pc3_b.pdf"
  ),
  width  = 8,
  height = 8,
  units  = "in"
)

7. Run LEA for subsets of data in SNP Set 3 (MAF 1%)

7.1 LEA for Italy + native pops for SNP Set 3 (MAF 1%, R2<0.01)

7.1.1 Import the data for SNP Set 3 (R2<0.01 MAF 1%) subset for euro_native_italy_all

genotype <- here(
   "euro_global/output/neuroadmixture/euro_native2_italy_all.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22537
##   column count: 339
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22537
##   Character matrix gt cols: 339
##   skip: 0
##   nrows: 22537
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## BEN BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC KAG 
##  12  13  12  14  12  16   2  12   4   7  12   4  11   4   5   9  12   2   6  12 
## KAN KAT KLP KUN LAM MAT OKI QNC ROM SIC SON SSK SUF SUU TAI TRE UTS YUN 
##  11   6   4   4   9  12  12  11   4   9   3  12   6   6   7  12  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   330
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   330
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.removed file, for more informations.
## 
## 
##  - number of detected individuals:   330
##  - number of detected loci:      22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.lfmm"

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

7.1.2 Run LEA for SNP Set 3 (MAF 1% and r2<0.01) for Italy + native pops

We will do 5 repetitions

# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
  genotype,
  K = 1:25,
  project = "new",
  repetitions = 5,
  percentage = 0.25,
  iterations = 500,
  CPU = 10,
  entropy = TRUE
)
project = load.snmfProject("euro_global/output/neuroadmixture/euro_native2_italy_all.snmfProject")

Cross entropy

# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","MAF_1","lea_cross_entropy_euro_native2_italy_all_r01_b.pdf"), width = 6, height = 4)

# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)

# Close the pdf file
dev.off()
## png 
##   2
plot(project, col = "blue", pch = 19, cex = 1.2)

Summary of project

check with run is best

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9284443 0.8883377 0.8814999 0.8742773 0.8714906 0.8685528 0.8669024
## mean 0.9289609 0.8888186 0.8819074 0.8746872 0.8720530 0.8697568 0.8674704
## max  0.9295439 0.8893572 0.8823791 0.8751120 0.8724608 0.8710534 0.8681971
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8663302 0.8656725 0.8647275 0.8655945 0.8643631 0.8656917 0.8661961
## mean 0.8665036 0.8661961 0.8660245 0.8661466 0.8658581 0.8661260 0.8666861
## max  0.8668618 0.8666665 0.8674751 0.8674410 0.8676213 0.8672100 0.8672014
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8669973 0.8678448 0.8697238 0.8712374 0.8722511 0.8742418 0.8777928
## mean 0.8684575 0.8693895 0.8707579 0.8731435 0.8741718 0.8767588 0.8790701
## max  0.8697920 0.8701229 0.8736439 0.8746222 0.8755863 0.8805344 0.8812548
##         K = 22    K = 23    K = 24    K = 25
## min  0.8794246 0.8832928 0.8826137 0.8861262
## mean 0.8824751 0.8852063 0.8854825 0.8883168
## max  0.8848575 0.8913951 0.8885691 0.8898279
# get the cross-entropy of all runs for K = 10
ce10 = cross.entropy(project, K = 10)
ce10 #run 2 is best for k=10
##          K = 10
## run 1 0.8674751
## run 2 0.8647275
## run 3 0.8673379
## run 4 0.8656414
## run 5 0.8649409
# get the cross-entropy of all runs for K = 5
ce5 = cross.entropy(project, K = 5)
ce5 #run 2 is best for k=5
##           K = 5
## run 1 0.8724608
## run 2 0.8714906
## run 3 0.8722575
## run 4 0.8722302
## run 5 0.8718258
# get the cross-entropy of all runs for K = 7
ce7 = cross.entropy(project, K = 7)
ce7 #run 2 is best for k=7
##           K = 7
## run 1 0.8676905
## run 2 0.8669024
## run 3 0.8675180
## run 4 0.8681971
## run 5 0.8670443

7.1.3 Plots for K=7

color_palette_7 <-
  c(
    "#77DD77",
    "yellow2",
    "magenta", 
    "#75FAFF",
    "orange",
    "#008080",
    "#1E90FF"
     )

7.1.3.1 Mean admixture by country for K=7

using ggplot

best = which.min(cross.entropy(project, K = 7)) #2

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:7)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_7[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=12.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_7)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "native_italy", "LEA_admixture_by_country _native_italy_k7_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.1.3.2 Plot individual admixtures for K=7

Extract ancestry coefficients for K=7

change to correct matrix

leak7 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.snmf/K7/run2/euro_native2_italy_all_r2.7.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak7)
## # A tibble: 6 × 7
##         X1       X2    X3       X4       X5       X6       X7
##      <dbl>    <dbl> <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1 0.0657   0.0204   0.585 0.0137   0.0746   0.229    0.0116  
## 2 0.100    0.0460   0.484 0.0435   0.0729   0.239    0.0147  
## 3 0.000100 0.00976  0.986 0.00429  0.000100 0.000100 0.000100
## 4 0.0172   0.000100 0.912 0.0300   0.0407   0.000100 0.000100
## 5 0.0157   0.000100 0.922 0.0412   0.0139   0.00718  0.000100
## 6 0.000100 0.000100 0.999 0.000100 0.000100 0.000100 0.000100

The fam file

fam_file <- here("euro_global/output/neuroadmixture/euro_native2_italy_all.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak7 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak7)

head(leak7)
##    ind pop          X1          X2       X3         X4          X5          X6
## 1 1001 OKI 0.065675700 0.020350700 0.585107 0.01372930 0.074562700 0.228981000
## 2 1002 OKI 0.099996700 0.046040100 0.484319 0.04348660 0.072893000 0.238553000
## 3 1003 OKI 0.000099964 0.009755190 0.985557 0.00428777 0.000099964 0.000099964
## 4 1004 OKI 0.017164400 0.000099973 0.911757 0.03002970 0.040749400 0.000099973
## 5 1005 OKI 0.015661800 0.000099982 0.921853 0.04116820 0.013933200 0.007183290
## 6 1006 OKI 0.000099950 0.000099950 0.999400 0.00009995 0.000099950 0.000099950
##            X7
## 1 0.011593700
## 2 0.014711700
## 3 0.000099964
## 4 0.000099973
## 5 0.000099982
## 6 0.000099950

Rename the columns

# Rename the columns starting from the third one
leak7 <- leak7 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak7)
##    ind pop          v1          v2       v3         v4          v5          v6
## 1 1001 OKI 0.065675700 0.020350700 0.585107 0.01372930 0.074562700 0.228981000
## 2 1002 OKI 0.099996700 0.046040100 0.484319 0.04348660 0.072893000 0.238553000
## 3 1003 OKI 0.000099964 0.009755190 0.985557 0.00428777 0.000099964 0.000099964
## 4 1004 OKI 0.017164400 0.000099973 0.911757 0.03002970 0.040749400 0.000099973
## 5 1005 OKI 0.015661800 0.000099982 0.921853 0.04116820 0.013933200 0.007183290
## 6 1006 OKI 0.000099950 0.000099950 0.999400 0.00009995 0.000099950 0.000099950
##            v7
## 1 0.011593700
## 2 0.014711700
## 3 0.000099964
## 4 0.000099973
## 5 0.000099982
## 6 0.000099950

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak7 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:7)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_7[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_7) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "native_italy", "lea_k=7_native_italy_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

7.1.4 Plots for K=10

color_palette_10 <-
  c(
    "#00FF00",
    "#FFFF00",
    "#FF0000",
    "#0000FF",
    "orange",
    "purple",
    "#1E90FF",
    "magenta",
    "#75FAFF",
    "#F49AC2"
     )

7.1.4.1 Mean admixture by country for K=10

using ggplot

best = which.min(cross.entropy(project, K = 10)) #2

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 10, run = best))

# Create a named vector to map countries to regions

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:10)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_10[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=12.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_10)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "native_italy", "LEA_admixture_by_country _native_italy_k10_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.1.4.2 Plot individual admixtures for K=10

Extract ancestry coefficients for k=10

change to correct matrix

leak10 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.snmf/K10/run2/euro_native2_italy_all_r2.10.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak10)
## # A tibble: 6 × 10
##         X1      X2      X3    X4      X5      X6      X7      X8      X9     X10
##      <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1  1.77e-1 1.15e-1 1.00e-4 0.481 2.94e-2 3.71e-2 4.58e-2 1.02e-1 1.10e-2 1.81e-3
## 2  1.63e-1 6.11e-2 1.00e-4 0.358 1.17e-1 7.46e-2 1.53e-2 1.82e-1 1.00e-4 2.89e-2
## 3  9.99e-5 9.99e-5 6.97e-3 0.991 9.99e-5 9.99e-5 1.47e-3 9.99e-5 9.99e-5 9.99e-5
## 4  5.52e-3 3.92e-2 1.00e-4 0.875 1.23e-2 4.73e-2 8.01e-4 1.00e-4 1.00e-4 1.97e-2
## 5  1.76e-3 4.30e-2 1.00e-4 0.895 1.00e-4 4.25e-2 1.00e-4 1.72e-2 1.00e-4 1.00e-4
## 6  9.99e-5 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5

The fam file

fam_file <- here("euro_global/output/neuroadmixture/euro_native2_italy_all.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak10 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak10)

head(leak10)
##    ind pop          X1          X2          X3       X4          X5          X6
## 1 1001 OKI 1.77105e-01 1.14984e-01 9.99910e-05 0.481022 2.93972e-02 3.71193e-02
## 2 1002 OKI 1.62571e-01 6.11419e-02 9.99820e-05 0.358093 1.17088e-01 7.46483e-02
## 3 1003 OKI 9.99370e-05 9.99370e-05 6.96579e-03 0.990862 9.99370e-05 9.99370e-05
## 4 1004 OKI 5.52047e-03 3.91880e-02 9.99730e-05 0.874837 1.22991e-02 4.73223e-02
## 5 1005 OKI 1.76204e-03 4.30026e-02 9.99550e-05 0.895062 9.99550e-05 4.24868e-02
## 6 1006 OKI 9.99201e-05 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05
##            X7          X8          X9         X10
## 1 4.58028e-02 1.01648e-01 1.10103e-02 1.81238e-03
## 2 1.53470e-02 1.81973e-01 9.99820e-05 2.89379e-02
## 3 1.47223e-03 9.99370e-05 9.99370e-05 9.99370e-05
## 4 8.01401e-04 9.99730e-05 9.99730e-05 1.97314e-02
## 5 9.99550e-05 1.71871e-02 9.99550e-05 9.99550e-05
## 6 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05

Rename the columns

# Rename the columns starting from the third one
leak10 <- leak10 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak10)
##    ind pop          v1          v2          v3       v4          v5          v6
## 1 1001 OKI 1.77105e-01 1.14984e-01 9.99910e-05 0.481022 2.93972e-02 3.71193e-02
## 2 1002 OKI 1.62571e-01 6.11419e-02 9.99820e-05 0.358093 1.17088e-01 7.46483e-02
## 3 1003 OKI 9.99370e-05 9.99370e-05 6.96579e-03 0.990862 9.99370e-05 9.99370e-05
## 4 1004 OKI 5.52047e-03 3.91880e-02 9.99730e-05 0.874837 1.22991e-02 4.73223e-02
## 5 1005 OKI 1.76204e-03 4.30026e-02 9.99550e-05 0.895062 9.99550e-05 4.24868e-02
## 6 1006 OKI 9.99201e-05 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05
##            v7          v8          v9         v10
## 1 4.58028e-02 1.01648e-01 1.10103e-02 1.81238e-03
## 2 1.53470e-02 1.81973e-01 9.99820e-05 2.89379e-02
## 3 1.47223e-03 9.99370e-05 9.99370e-05 9.99370e-05
## 4 8.01401e-04 9.99730e-05 9.99730e-05 1.97314e-02
## 5 9.99550e-05 1.71871e-02 9.99550e-05 9.99550e-05
## 6 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak10 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:10)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_10[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=10.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_10) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "native_italy", "lea_k=10_native_italy_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

Clean env & memory

# Remove all objects from the environment
rm(list = ls())

# Run the garbage collector to free up memory
gc()
##           used  (Mb) gc trigger  (Mb) max used  (Mb)
## Ncells 3233303 172.7    5892838 314.8  5892838 314.8
## Vcells 6299688  48.1   68763439 524.7 84253609 642.9

7.2 LEA for Italy + native + US pops for SNP Set 3 (MAF 1%, R2<0.01)

7.2.1 Import the data for SNP Set 3 subset for italy_all_and_US

genotype <- here(
   "euro_global/output/dapc/dapc_italy_all_and_US.vcf"
  )
d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22642
##   column count: 362
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22642
##   Character matrix gt cols: 362
##   skip: 0
##   nrows: 22642
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22642
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## BEN BER BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC 
##  12  12  13  12  14  12  16   2  12   4   7  12   4  11   4   5   9  12   2   6 
## KAG KAN KAT KLP KUN LAM MAT OKI PAL QNC ROM SIC SON SSK SUF SUU TAI TRE UTS YUN 
##  12  11   6   4   4   9  12  12  11  11   4   9   3  12   6   6   7  12  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   353
##  - number of detected loci:      22642
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   353
##  - number of detected loci:      22642
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.removed file, for more informations.
## 
## 
##  - number of detected individuals:   353
##  - number of detected loci:      22642
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.lfmm"

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

7.2.2 Run LEA for SNP Set 3 (MAF 1% and r2<0.01) for Italy + native + US pops (italy_all_and_US)

We will do 5 repetitions

# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
  genotype,
  K = 1:25,
  project = "new",
  repetitions = 5,
  percentage = 0.25,
  iterations = 500,
  CPU = 10,
  entropy = TRUE
)
project = load.snmfProject("euro_global/output/dapc/dapc_italy_all_and_US.snmfProject")

Cross entropy

# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "native_italy", "lea_cross_entropy_italy_all_and_US_r01_b.pdf"), width = 6, height = 4)

# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)

# Close the pdf file
dev.off()
## png 
##   2
plot(project, col = "blue", pch = 19, cex = 1.2)

Summary of project

check with run is best

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9268937 0.8877699 0.8813283 0.8744758 0.8719716 0.8693805 0.8675017
## mean 0.9272783 0.8880830 0.8816350 0.8749238 0.8730286 0.8702210 0.8682202
## max  0.9275629 0.8884493 0.8819855 0.8755391 0.8748210 0.8711330 0.8695598
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8665617 0.8660492 0.8652353 0.8653178 0.8640970 0.8651959 0.8661665
## mean 0.8677238 0.8665552 0.8661283 0.8660485 0.8656138 0.8663148 0.8668948
## max  0.8697357 0.8671418 0.8668879 0.8671324 0.8669948 0.8691199 0.8673556
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8661263 0.8674308 0.8684976 0.8693216 0.8712749 0.8715545 0.8731175
## mean 0.8672159 0.8684909 0.8692299 0.8704666 0.8721665 0.8745922 0.8754878
## max  0.8686471 0.8702589 0.8700549 0.8714425 0.8733087 0.8779314 0.8794419
##         K = 22    K = 23    K = 24    K = 25
## min  0.8762912 0.8799641 0.8797621 0.8838846
## mean 0.8787773 0.8803735 0.8821449 0.8847680
## max  0.8828910 0.8806781 0.8844514 0.8862852
# get the cross-entropy of all runs for K = 10
ce10 = cross.entropy(project, K = 10)
ce10 #run 2 is best for k=10
##          K = 10
## run 1 0.8668879
## run 2 0.8652353
## run 3 0.8660570
## run 4 0.8664507
## run 5 0.8660108
# get the cross-entropy of all runs for K = 7
ce7 = cross.entropy(project, K = 7)
ce7 #run 2 is best for k=7
##           K = 7
## run 1 0.8695598
## run 2 0.8675017
## run 3 0.8678661
## run 4 0.8682642
## run 5 0.8679091

7.2.3 Plots for K=7

color_palette_7 <-
  c(
    "#77DD77",
    "yellow2",
    "magenta",
    "#75FAFF",
    "orange",
    "#1E90FF",
    "orangered"
     )

7.2.3.1 Mean admixture by country for K=7

using ggplot

best = which.min(cross.entropy(project, K = 7)) #2

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:7)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_7[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=7.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_7)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "native_italy", "LEA_admixture_by_country _italy_all_US_k7_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.2.3.2 Plot individiual admixtures for K=7

Extract ancestry coefficients for K=7

change to correct matrix

leak7 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.snmf/K7/run2/dapc_italy_all_and_US_r2.7.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak7)
## # A tibble: 6 × 7
##      X1       X2       X3       X4       X5       X6       X7
##   <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1 0.601 0.0661   0.00149  0.0456   0.213    0.0388   0.0345  
## 2 0.482 0.0982   0.0615   0.0976   0.203    0.000100 0.0576  
## 3 0.999 0.000100 0.000100 0.000100 0.000100 0.000100 0.000100
## 4 0.915 0.0145   0.0464   0.0236   0.000100 0.000100 0.000100
## 5 0.927 0.0248   0.0232   0.0244   0.000100 0.000100 0.000100
## 6 0.999 0.000100 0.000100 0.000100 0.000100 0.000100 0.000100

The fam file

fam_file <- here("euro_global/output/dapc/dapc_italy_all_and_US.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak7 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak7)

head(leak7)
##    ind pop       X1         X2         X3         X4          X5          X6
## 1 1001 OKI 0.600727 0.06607960 0.00149369 0.04558840 0.212836000 0.038797300
## 2 1002 OKI 0.482055 0.09821330 0.06151460 0.09755160 0.203013000 0.000099991
## 3 1003 OKI 0.999400 0.00009995 0.00009995 0.00009995 0.000099950 0.000099950
## 4 1004 OKI 0.915228 0.01449530 0.04638100 0.02359630 0.000099973 0.000099973
## 5 1005 OKI 0.927280 0.02484630 0.02316850 0.02440550 0.000099973 0.000099973
## 6 1006 OKI 0.999400 0.00009995 0.00009995 0.00009995 0.000099950 0.000099950
##            X7
## 1 0.034478100
## 2 0.057552800
## 3 0.000099950
## 4 0.000099973
## 5 0.000099973
## 6 0.000099950

Rename the columns

# Rename the columns starting from the third one
leak7 <- leak7 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak7)
##    ind pop       v1         v2         v3         v4          v5          v6
## 1 1001 OKI 0.600727 0.06607960 0.00149369 0.04558840 0.212836000 0.038797300
## 2 1002 OKI 0.482055 0.09821330 0.06151460 0.09755160 0.203013000 0.000099991
## 3 1003 OKI 0.999400 0.00009995 0.00009995 0.00009995 0.000099950 0.000099950
## 4 1004 OKI 0.915228 0.01449530 0.04638100 0.02359630 0.000099973 0.000099973
## 5 1005 OKI 0.927280 0.02484630 0.02316850 0.02440550 0.000099973 0.000099973
## 6 1006 OKI 0.999400 0.00009995 0.00009995 0.00009995 0.000099950 0.000099950
##            v7
## 1 0.034478100
## 2 0.057552800
## 3 0.000099950
## 4 0.000099973
## 5 0.000099973
## 6 0.000099950

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak7 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:7)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_7[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_7) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "native_italy", "lea_k=7_italy_all_US_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

7.2.4 Plots for K=10

color_palette_10 <-
  c(
    "#1E90FF",
    "#75FAFF",
    "orangered",
    "#77DD77",
    "orange",
    "magenta",
    "yellow2",
    "navy", 
    "#008080", 
    "green4"
     )

7.2.4.1 Mean admixture by country using ggplot

best = which.min(cross.entropy(project, K = 10)) #2

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 10, run = best))

# Create a named vector to map countries to regions

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:10)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_10[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=10.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_10)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "native_italy", "LEA_admixture_by_country _italy_all_US_k10_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.2.4.2 Plot individual admixtures for K=10

Extract ancestry coefficients for K=10

change to correct matrix

leak10 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.snmf/K10/run2/dapc_italy_all_and_US_r2.10.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak10)
## # A tibble: 6 × 10
##         X1      X2      X3      X4      X5      X6      X7    X8      X9     X10
##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl>   <dbl>
## 1  3.03e-2 3.70e-2 5.68e-2 3.26e-2 1.06e-1 1.37e-3 2.06e-1 0.458 6.21e-2 9.26e-3
## 2  1.00e-4 1.00e-4 1.02e-1 6.74e-2 1.74e-1 1.52e-2 1.60e-1 0.335 1.09e-1 3.62e-2
## 3  9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5
## 4  1.00e-4 1.00e-4 1.00e-4 2.34e-2 1.55e-3 5.97e-2 3.82e-2 0.844 3.25e-2 1.00e-4
## 5  1.00e-4 1.40e-3 1.00e-4 1.41e-2 6.60e-3 1.60e-2 2.90e-2 0.907 2.56e-2 1.00e-4
## 6  9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5

The fam file

fam_file <- here("euro_global/output/dapc/dapc_italy_all_and_US.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak10 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak10)

head(leak10)
##    ind pop          X1          X2          X3          X4          X5
## 1 1001 OKI 3.02897e-02 3.70008e-02 5.68101e-02 3.25922e-02 1.06060e-01
## 2 1002 OKI 9.99820e-05 9.99820e-05 1.01823e-01 6.74361e-02 1.74391e-01
## 3 1003 OKI 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## 4 1004 OKI 9.99640e-05 9.99640e-05 9.99640e-05 2.34413e-02 1.54534e-03
## 5 1005 OKI 9.99730e-05 1.40348e-03 9.99730e-05 1.40783e-02 6.59955e-03
## 6 1006 OKI 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
##            X6          X7       X8          X9         X10
## 1 1.36729e-03 2.06353e-01 0.458153 6.21146e-02 9.25911e-03
## 2 1.52188e-02 1.60348e-01 0.335293 1.09113e-01 3.61762e-02
## 3 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05
## 4 5.97380e-02 3.81962e-02 0.844131 3.25481e-02 9.99640e-05
## 5 1.60403e-02 2.89676e-02 0.906966 2.56453e-02 9.99730e-05
## 6 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05

Rename the columns

# Rename the columns starting from the third one
leak10 <- leak10 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak10)
##    ind pop          v1          v2          v3          v4          v5
## 1 1001 OKI 3.02897e-02 3.70008e-02 5.68101e-02 3.25922e-02 1.06060e-01
## 2 1002 OKI 9.99820e-05 9.99820e-05 1.01823e-01 6.74361e-02 1.74391e-01
## 3 1003 OKI 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## 4 1004 OKI 9.99640e-05 9.99640e-05 9.99640e-05 2.34413e-02 1.54534e-03
## 5 1005 OKI 9.99730e-05 1.40348e-03 9.99730e-05 1.40783e-02 6.59955e-03
## 6 1006 OKI 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
##            v6          v7       v8          v9         v10
## 1 1.36729e-03 2.06353e-01 0.458153 6.21146e-02 9.25911e-03
## 2 1.52188e-02 1.60348e-01 0.335293 1.09113e-01 3.61762e-02
## 3 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05
## 4 5.97380e-02 3.81962e-02 0.844131 3.25481e-02 9.99640e-05
## 5 1.60403e-02 2.89676e-02 0.906966 2.56453e-02 9.99730e-05
## 6 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak10 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:10)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_10[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=10.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_10) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "native_italy", "lea_k=10_italy_all_US_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

7.3 LEA for Albania, Croatia, Greece + native pops for SNP Set 3 (MAF 1%, R2<0.01 snp set)

7.3.1 Import the data for SNP Set 3 for subset for euro_native2_albania_croatia_greece_US

genotype <- here(
   "euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22537
##   column count: 323
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22537
##   Character matrix gt cols: 323
##   skip: 0
##   nrows: 22537
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## ALD ALV BEN BER CAM CHA CRO GEL GRA GRC HAI HAN HOC HUN INJ INW JAF KAC KAG KAN 
##  10  12  12  12  12  12  12   2  11  10  12   4   7  12  11   4   2   6  12  11 
## KAT KLP KUN LAM MAT OKI PAL QNC SON SSK SUF SUU TAI TIR UTS YUN 
##   6   4   4   9  12  12  11  11   3  12   6   6   7   4  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   314
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   314
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.removed file, for more informations.
## 
## 
##  - number of detected individuals:   314
##  - number of detected loci:      22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.lfmm"

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

7.3.2 Run LEA for SNP Set 3 (MAF 1% and r2<0.01) for euro_native2_albania_croatia_greece_US

We will do 5 repetitions

# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
  genotype,
  K = 1:25,
  project = "new",
  repetitions = 5,
  percentage = 0.25,
  iterations = 500,
  CPU = 10,
  entropy = TRUE
)
project = load.snmfProject("euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.snmfProject")

Cross entropy

# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","MAF_1", "albania_croatia_greece", "lea_cross_entropy_albania_croatia_greece_r01_b.pdf"), width = 6, height = 4)

# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)

# Close the pdf file
dev.off()
## png 
##   2
plot(project, col = "blue", pch = 19, cex = 1.2)

Summary of project

check with run is best

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9233611 0.8893582 0.8820397 0.8755814 0.8720341 0.8713425 0.8702904
## mean 0.9236991 0.8898473 0.8829263 0.8762218 0.8727421 0.8721093 0.8713905
## max  0.9241163 0.8903764 0.8838403 0.8767106 0.8732175 0.8728521 0.8723603
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8696857 0.8693687 0.8690723 0.8705174 0.8707420 0.8725734 0.8725764
## mean 0.8707711 0.8700650 0.8704738 0.8710626 0.8714378 0.8731421 0.8742283
## max  0.8711857 0.8707463 0.8714205 0.8720667 0.8722824 0.8742300 0.8748453
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8741498 0.8765791 0.8777430 0.8804020 0.8840067 0.8862442 0.8867222
## mean 0.8761364 0.8777634 0.8796934 0.8815985 0.8853774 0.8887773 0.8904791
## max  0.8778219 0.8801763 0.8822988 0.8834231 0.8865690 0.8908116 0.8925081
##         K = 22    K = 23    K = 24    K = 25
## min  0.8912113 0.8957265 0.8983535 0.9041044
## mean 0.8923743 0.8978368 0.9009491 0.9050913
## max  0.8929114 0.8993496 0.9025162 0.9064686
# get the cross-entropy of all runs for K = 10
ce10 = cross.entropy(project, K = 10)
ce10 #run 5 is best for k=10
##          K = 10
## run 1 0.8707718
## run 2 0.8708009
## run 3 0.8703036
## run 4 0.8714205
## run 5 0.8690723
ce7 = cross.entropy(project, K = 7)
ce7 #run 5 is best for k=7
##           K = 7
## run 1 0.8720676
## run 2 0.8723603
## run 3 0.8705620
## run 4 0.8716721
## run 5 0.8702904

7.3.3 Plots for K=7

color_palette_7 <-
  c(
    "#75FAFF",
    "navy",
    "orangered",
    "yellow2",
    "#FFFF99",
    "green",
    "magenta"
     )

7.3.3.1 Mean admixture by country for K=7

using ggplot

best = which.min(cross.entropy(project, K = 7)) #2

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))

# Create a named vector to map countries to regions

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:7)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_7[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=7.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_7)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "native_italy", "LEA_admixture_by_country _albania_croatia_greece_k7_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.3.3.2 Plot individual admixtures for K=7

Extract ancestry coefficients for K=7

change to correct matrix

leak7 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.snmf/K7/run5/euro_native2_albania_croatia_greece_US_r5.7.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak7)
## # A tibble: 6 × 7
##         X1       X2       X3       X4       X5       X6    X7
##      <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl> <dbl>
## 1 0.0749   0.0949   0.127    0.0358   0.0119   0.00637  0.649
## 2 0.124    0.0895   0.137    0.0723   0.0179   0.000100 0.559
## 3 0.000100 0.000100 0.000100 0.000100 0.00434  0.00426  0.991
## 4 0.0257   0.0349   0.0305   0.0166   0.00141  0.00607  0.885
## 5 0.0183   0.000100 0.0578   0.000100 0.000100 0.000100 0.923
## 6 0.000100 0.000100 0.000100 0.000100 0.000100 0.000100 0.999

The fam file

fam_file <- here("euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak7 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak7)

head(leak7)
##    ind pop          X1          X2          X3          X4          X5
## 1 1001 OKI 0.074938700 0.094872400 0.126692000 0.035842500 0.011863100
## 2 1002 OKI 0.124185000 0.089502600 0.137391000 0.072307900 0.017885400
## 3 1003 OKI 0.000099964 0.000099964 0.000099964 0.000099964 0.004338800
## 4 1004 OKI 0.025675400 0.034944100 0.030518100 0.016572200 0.001405040
## 5 1005 OKI 0.018338000 0.000099964 0.057766800 0.000099964 0.000099964
## 6 1006 OKI 0.000099950 0.000099950 0.000099950 0.000099950 0.000099950
##            X6       X7
## 1 0.006368430 0.649423
## 2 0.000099991 0.558628
## 3 0.004255540 0.991006
## 4 0.006070940 0.884814
## 5 0.000099964 0.923495
## 6 0.000099950 0.999400

Rename the columns

# Rename the columns starting from the third one
leak7 <- leak7 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak7)
##    ind pop          v1          v2          v3          v4          v5
## 1 1001 OKI 0.074938700 0.094872400 0.126692000 0.035842500 0.011863100
## 2 1002 OKI 0.124185000 0.089502600 0.137391000 0.072307900 0.017885400
## 3 1003 OKI 0.000099964 0.000099964 0.000099964 0.000099964 0.004338800
## 4 1004 OKI 0.025675400 0.034944100 0.030518100 0.016572200 0.001405040
## 5 1005 OKI 0.018338000 0.000099964 0.057766800 0.000099964 0.000099964
## 6 1006 OKI 0.000099950 0.000099950 0.000099950 0.000099950 0.000099950
##            v6       v7
## 1 0.006368430 0.649423
## 2 0.000099991 0.558628
## 3 0.004255540 0.991006
## 4 0.006070940 0.884814
## 5 0.000099964 0.923495
## 6 0.000099950 0.999400

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak7 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:7)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_7[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_7) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "albania_croatia_greece", "lea_k=7_albania_croatia_greece_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

7.3.4 Plotw for K=10

color_palette_10 <-
  c(
    "#75FAFF",
    "navy",
    "orangered",
    "yellow2",
    "#FFFF99",
    "green",
    "magenta", 
    "orange",
    "#008080",
    "goldenrod"
     )

7.3.4.1 Mean admixture by country for K=10

using ggplot

best = which.min(cross.entropy(project, K = 10)) #5

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 10, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:10)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_10[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=10.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_10)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "albania_croatia_greece", "LEA_admixture_by_country _albania_croatia_greece_k10_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.3.4.2 Plot individual admixtures for K=10

Extract ancestry coefficients for k=10

change to correct matrix

leak10 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.snmf/K10/run5/euro_native2_albania_croatia_greece_US_r5.10.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak10)
## # A tibble: 6 × 10
##         X1      X2    X3      X4      X5      X6      X7      X8      X9     X10
##      <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1  2.35e-2 8.97e-2 0.484 4.88e-2 3.95e-2 2.54e-2 6.44e-3 1.26e-1 1.23e-2 1.44e-1
## 2  5.29e-2 8.05e-2 0.328 7.14e-2 7.53e-2 1.00e-4 6.83e-3 1.89e-1 9.41e-3 1.86e-1
## 3  9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 4  1.00e-4 1.00e-4 0.847 3.33e-2 1.09e-2 4.57e-2 1.00e-4 5.68e-2 5.79e-3 1.00e-4
## 5  8.97e-3 1.00e-4 0.884 2.42e-2 1.00e-4 1.00e-4 1.00e-4 3.45e-2 1.00e-4 4.74e-2
## 6  9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5

The fam file

fam_file <- here("euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak10 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak10)

head(leak10)
##    ind pop          X1          X2       X3          X4          X5          X6
## 1 1001 OKI 2.35101e-02 8.97365e-02 0.483871 4.88332e-02 3.95461e-02 2.54034e-02
## 2 1002 OKI 5.28657e-02 8.04670e-02 0.328388 7.14030e-02 7.52950e-02 9.99910e-05
## 3 1003 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
## 4 1004 OKI 9.99640e-05 9.99640e-05 0.847077 3.33007e-02 1.09430e-02 4.57350e-02
## 5 1005 OKI 8.96517e-03 9.99550e-05 0.884467 2.41564e-02 9.99550e-05 9.99550e-05
## 6 1006 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
##            X7          X8          X9         X10
## 1 6.44432e-03 1.26041e-01 1.23340e-02 1.44280e-01
## 2 6.82661e-03 1.89227e-01 9.40816e-03 1.86019e-01
## 3 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## 4 9.99640e-05 5.67593e-02 5.78536e-03 9.99640e-05
## 5 9.99550e-05 3.45474e-02 9.99550e-05 4.73644e-02
## 6 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05

Rename the columns

# Rename the columns starting from the third one
leak10 <- leak10 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak10)
##    ind pop          v1          v2       v3          v4          v5          v6
## 1 1001 OKI 2.35101e-02 8.97365e-02 0.483871 4.88332e-02 3.95461e-02 2.54034e-02
## 2 1002 OKI 5.28657e-02 8.04670e-02 0.328388 7.14030e-02 7.52950e-02 9.99910e-05
## 3 1003 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
## 4 1004 OKI 9.99640e-05 9.99640e-05 0.847077 3.33007e-02 1.09430e-02 4.57350e-02
## 5 1005 OKI 8.96517e-03 9.99550e-05 0.884467 2.41564e-02 9.99550e-05 9.99550e-05
## 6 1006 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
##            v7          v8          v9         v10
## 1 6.44432e-03 1.26041e-01 1.23340e-02 1.44280e-01
## 2 6.82661e-03 1.89227e-01 9.40816e-03 1.86019e-01
## 3 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## 4 9.99640e-05 5.67593e-02 5.78536e-03 9.99640e-05
## 5 9.99550e-05 3.45474e-02 9.99550e-05 4.73644e-02
## 6 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak10 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:10)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_10[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=10.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_10) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "albania_croatia_greece", "lea_k=10_albania_croatia_greece_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

7.4 LEA for SNP Set 3 for Eastern European pops (MAF 1%, R2<0.01)

7.4.1 Import the data for SNP Set 3 subset for native_far_east_euro2

genotype <- here(
   "euro_global/output/neuroadmixture/native_far_east_euro2.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22537
##   column count: 347
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22537
##   Character matrix gt cols: 347
##   skip: 0
##   nrows: 22537
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## ALU ARM BEN CAM CHA GEL GES HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KER KLP 
##  12  10  12  12  12   2  12  12   4   7  12  11   4   2   6  12  11   6  12   4 
## KRA KUN LAM MAT OKI QNC RAR SEV SOC SON SSK SUF SUU TAI TIK UTS YUN 
##  12   4   9  12  12  11  12  12  12   3  12   6   6   7  12  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   338
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   338
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.removed file, for more informations.
## 
## 
##  - number of detected individuals:   338
##  - number of detected loci:      22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.lfmm"

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

7.4.2 Run LEA for SNP Set 3 (MAF 1% and r2<0.01) for far_east_euro

We will do 5 repetitions

# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
  genotype,
  K = 1:25,
  project = "new",
  repetitions = 5,
  percentage = 0.25,
  iterations = 500,
  CPU = 10,
  entropy = TRUE
)
project = load.snmfProject("euro_global/output/neuroadmixture/native_far_east_euro2.snmfProject")

Cross entropy

# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","MAF_1", "eastern_europe", "lea_cross_entropy_eastern_europe_r01_b.pdf"), width = 6, height = 4)

# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)

# Close the pdf file
dev.off()
## png 
##   2
plot(project, col = "blue", pch = 19, cex = 1.2)

Summary of project

check with run is best

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9217190 0.8821233 0.8717093 0.8645070 0.8610212 0.8604267 0.8590902
## mean 0.9222289 0.8827113 0.8721769 0.8650111 0.8616927 0.8609514 0.8601843
## max  0.9230844 0.8835033 0.8728852 0.8657793 0.8625686 0.8616257 0.8613609
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8589110 0.8585290 0.8581418 0.8579527 0.8583874 0.8592648 0.8591720
## mean 0.8594724 0.8591332 0.8587629 0.8587118 0.8590905 0.8594949 0.8600420
## max  0.8599754 0.8603889 0.8594127 0.8591350 0.8595783 0.8598640 0.8611707
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8596772 0.8612203 0.8625132 0.8631991 0.8659968 0.8663554 0.8683850
## mean 0.8607397 0.8614333 0.8637799 0.8649254 0.8677074 0.8693006 0.8711266
## max  0.8623412 0.8618979 0.8647681 0.8676732 0.8691997 0.8711685 0.8740780
##         K = 22    K = 23    K = 24    K = 25
## min  0.8697842 0.8725891 0.8741687 0.8757722
## mean 0.8735215 0.8756872 0.8763341 0.8776887
## max  0.8754081 0.8779592 0.8797642 0.8809103
# get the cross-entropy of all runs for K = 10
ce10 = cross.entropy(project, K = 10)
ce10 #run 3 is best for k=10
##          K = 10
## run 1 0.8586326
## run 2 0.8588221
## run 3 0.8581418
## run 4 0.8588052
## run 5 0.8594127
ce6 = cross.entropy(project, K = 6)
ce6 #run 3 is best for k=6
##           K = 6
## run 1 0.8616257
## run 2 0.8611494
## run 3 0.8604267
## run 4 0.8606753
## run 5 0.8608797
ce7 = cross.entropy(project, K = 7)
ce7 #run 4 is best for k=7
##           K = 7
## run 1 0.8603667
## run 2 0.8613609
## run 3 0.8602921
## run 4 0.8590902
## run 5 0.8598116

7.4.3 Plots for K=7

color_palette_7 <-
  c(
    "#75FAFF",
    "navy",
    "orangered",
    "yellow2",
    "#FFFF99",
    "purple",
    "magenta"
     )

7.4.3.1 Mean admixture by country for K=7

using ggplot

best = which.min(cross.entropy(project, K = 7)) #2

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))

# Create a named vector to map countries to regions

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:7)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_7[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=7.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_7)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "eastern_europe", "LEA_admixture_by_country _far_eastern_europe_k7_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.4.3.2 Plot individual admixture for K=7

Extract ancestry coefficients for k=7

change to correct matrix

leak7 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.snmf/K7/run4/native_far_east_euro2_r4.7.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak7)
## # A tibble: 6 × 7
##         X1       X2    X3       X4       X5       X6       X7
##      <dbl>    <dbl> <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1 0.0228   0.133    0.570 0.0853   0.0902   0.0324   0.0673  
## 2 0.0793   0.238    0.385 0.139    0.0208   0.0228   0.114   
## 3 0.000100 0.000100 0.999 0.000100 0.000100 0.000100 0.000100
## 4 0.0117   0.0127   0.890 0.0480   0.000100 0.000100 0.0372  
## 5 0.000100 0.0364   0.905 0.0137   0.0305   0.000100 0.0145  
## 6 0.000100 0.000100 0.999 0.000100 0.000100 0.000100 0.000100

The fam file

fam_file <- here("euro_global/output/neuroadmixture/native_far_east_euro2.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak7 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak7)

head(leak7)
##    ind pop          X1         X2       X3         X4          X5          X6
## 1 1001 OKI 0.022762300 0.13252800 0.569580 0.08532410 0.090171400 0.032372900
## 2 1002 OKI 0.079327200 0.23798600 0.385303 0.13944400 0.020824700 0.022758000
## 3 1003 OKI 0.000099950 0.00009995 0.999400 0.00009995 0.000099950 0.000099950
## 4 1004 OKI 0.011698200 0.01266560 0.890219 0.04800090 0.000099982 0.000099982
## 5 1005 OKI 0.000099982 0.03638430 0.904681 0.01372770 0.030542900 0.000099982
## 6 1006 OKI 0.000099950 0.00009995 0.999400 0.00009995 0.000099950 0.000099950
##           X7
## 1 0.06726200
## 2 0.11435800
## 3 0.00009995
## 4 0.03721680
## 5 0.01446400
## 6 0.00009995

Rename the columns

# Rename the columns starting from the third one
leak7 <- leak7 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak7)
##    ind pop          v1         v2       v3         v4          v5          v6
## 1 1001 OKI 0.022762300 0.13252800 0.569580 0.08532410 0.090171400 0.032372900
## 2 1002 OKI 0.079327200 0.23798600 0.385303 0.13944400 0.020824700 0.022758000
## 3 1003 OKI 0.000099950 0.00009995 0.999400 0.00009995 0.000099950 0.000099950
## 4 1004 OKI 0.011698200 0.01266560 0.890219 0.04800090 0.000099982 0.000099982
## 5 1005 OKI 0.000099982 0.03638430 0.904681 0.01372770 0.030542900 0.000099982
## 6 1006 OKI 0.000099950 0.00009995 0.999400 0.00009995 0.000099950 0.000099950
##           v7
## 1 0.06726200
## 2 0.11435800
## 3 0.00009995
## 4 0.03721680
## 5 0.01446400
## 6 0.00009995

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak7 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:7)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_7[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_7) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "eastern_europe", "lea_k=7_far_eastern_europe_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf)

7.4.4 Plots for K=6

color_palette_6 <-
  c(
    "#75FAFF",
    "orangered",
    "yellow2",
    "green4",
    "purple",
    "magenta"
     )

7.4.4.1 Mean admixture by country for K=6

using ggplot

best = which.min(cross.entropy(project, K = 6)) #3

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 6, run = best))


# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:6)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_6[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=6.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_6)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "eastern_europe", "LEA_admixture_by_country _far_eastern_europe_k6_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.4.4.2 Plot individual admixtures K=6

Extract ancestry coefficients for K=6

change to correct matrix

leak6 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.snmf/K6/run3/native_far_east_euro2_r3.6.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak6)
## # A tibble: 6 × 6
##         X1       X2       X3       X4       X5    X6
##      <dbl>    <dbl>    <dbl>    <dbl>    <dbl> <dbl>
## 1 0.0884   0.0378   0.104    0.0444   0.0536   0.671
## 2 0.0801   0.0356   0.142    0.0504   0.110    0.583
## 3 0.00626  0.000100 0.000100 0.000100 0.000100 0.993
## 4 0.0231   0.000100 0.0268   0.00188  0.0269   0.921
## 5 0.0367   0.000100 0.0161   0.000100 0.000100 0.947
## 6 0.000100 0.000100 0.000100 0.000100 0.000100 1.00

The fam file

fam_file <- here("euro_global/output/neuroadmixture/native_far_east_euro2.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak6 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak6)

head(leak6)
##    ind pop         X1          X2          X3          X4          X5       X6
## 1 1001 OKI 0.08835020 0.037768300 0.104384000 0.044442500 0.053568700 0.671486
## 2 1002 OKI 0.08007590 0.035595900 0.141756000 0.050354400 0.109508000 0.582710
## 3 1003 OKI 0.00625699 0.000099964 0.000099964 0.000099964 0.000099964 0.993343
## 4 1004 OKI 0.02305960 0.000099991 0.026848700 0.001880900 0.026911500 0.921199
## 5 1005 OKI 0.03666370 0.000099973 0.016109600 0.000099973 0.000099973 0.946927
## 6 1006 OKI 0.00009996 0.000099960 0.000099960 0.000099960 0.000099960 0.999500

Rename the columns

# Rename the columns starting from the third one
leak6 <- leak6 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak6)
##    ind pop         v1          v2          v3          v4          v5       v6
## 1 1001 OKI 0.08835020 0.037768300 0.104384000 0.044442500 0.053568700 0.671486
## 2 1002 OKI 0.08007590 0.035595900 0.141756000 0.050354400 0.109508000 0.582710
## 3 1003 OKI 0.00625699 0.000099964 0.000099964 0.000099964 0.000099964 0.993343
## 4 1004 OKI 0.02305960 0.000099991 0.026848700 0.001880900 0.026911500 0.921199
## 5 1005 OKI 0.03666370 0.000099973 0.016109600 0.000099973 0.000099973 0.946927
## 6 1006 OKI 0.00009996 0.000099960 0.000099960 0.000099960 0.000099960 0.999500

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak6 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:6)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_6[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=6.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_6) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "eastern_europe", "lea_k=6_far_eastern_europe_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.4.5 Plots for K=10

color_palette_10 <-
  c(
    "#75FAFF",
    "navy",
    "orangered",
    "yellow2",
    "#FFFF99",
    "purple",
    "magenta",
    "purple4",
    "green4",
    "#B20CC9"
     )

7.4.5.1 Mean admixture by country for K=10

using ggplot

best = which.min(cross.entropy(project, K = 10)) #3

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 10, run = best))

# Create a named vector to map countries to regions

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:10)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_10[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=10.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_10)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "eastern_europe", "LEA_admixture_by_country _far_eastern_europe_k10_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.4.5.2 Plot individual admixtures for K=10

Extract ancestry coefficients for K=10

change to correct matrix

leak10 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.snmf/K10/run3/native_far_east_euro2_r3.10.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak10)
## # A tibble: 6 × 10
##         X1      X2    X3      X4      X5      X6      X7      X8      X9     X10
##      <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1  1.00e-4 5.05e-2 0.523 3.65e-2 1.61e-1 2.27e-2 5.89e-2 1.49e-3 3.57e-2 1.10e-1
## 2  8.99e-2 1.80e-2 0.375 2.92e-2 1.40e-1 5.09e-2 4.42e-2 5.06e-2 2.04e-2 1.82e-1
## 3  9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 4  8.98e-3 1.41e-2 0.857 1.00e-4 6.82e-2 1.45e-2 1.00e-4 3.35e-3 1.00e-4 3.39e-2
## 5  1.00e-4 1.28e-2 0.888 1.00e-4 3.48e-2 1.27e-2 1.00e-4 3.45e-3 1.55e-2 3.26e-2
## 6  9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5

The fam file

fam_file <- here("euro_global/output/neuroadmixture/native_far_east_euro2.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak10 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak10)

head(leak10)
##    ind pop          X1          X2       X3          X4          X5          X6
## 1 1001 OKI 9.99910e-05 5.05316e-02 0.522522 3.65307e-02 1.61025e-01 2.27072e-02
## 2 1002 OKI 8.98890e-02 1.79684e-02 0.375147 2.92204e-02 1.40089e-01 5.09065e-02
## 3 1003 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
## 4 1004 OKI 8.97959e-03 1.41323e-02 0.856627 9.99730e-05 6.82032e-02 1.44932e-02
## 5 1005 OKI 9.99730e-05 1.28261e-02 0.887890 9.99730e-05 3.48179e-02 1.26841e-02
## 6 1006 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
##            X7          X8          X9         X10
## 1 5.89181e-02 1.49097e-03 3.57039e-02 1.10470e-01
## 2 4.41989e-02 5.06259e-02 2.04483e-02 1.81507e-01
## 3 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## 4 9.99730e-05 3.35282e-03 9.99730e-05 3.39115e-02
## 5 9.99730e-05 3.45216e-03 1.54640e-02 3.25658e-02
## 6 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05

Rename the columns

# Rename the columns starting from the third one
leak10 <- leak10 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak10)
##    ind pop          v1          v2       v3          v4          v5          v6
## 1 1001 OKI 9.99910e-05 5.05316e-02 0.522522 3.65307e-02 1.61025e-01 2.27072e-02
## 2 1002 OKI 8.98890e-02 1.79684e-02 0.375147 2.92204e-02 1.40089e-01 5.09065e-02
## 3 1003 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
## 4 1004 OKI 8.97959e-03 1.41323e-02 0.856627 9.99730e-05 6.82032e-02 1.44932e-02
## 5 1005 OKI 9.99730e-05 1.28261e-02 0.887890 9.99730e-05 3.48179e-02 1.26841e-02
## 6 1006 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
##            v7          v8          v9         v10
## 1 5.89181e-02 1.49097e-03 3.57039e-02 1.10470e-01
## 2 4.41989e-02 5.06259e-02 2.04483e-02 1.81507e-01
## 3 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## 4 9.99730e-05 3.35282e-03 9.99730e-05 3.39115e-02
## 5 9.99730e-05 3.45216e-03 1.54640e-02 3.25658e-02
## 6 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak10 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:10)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_10[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=10.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_10) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "eastern_europe", "lea_k=10_far_eastern_europe_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf)

7.5 LEA for Iberia + Turkey + native + US

echo "BER
PAL
POL
POP
SPB
BAR
SPC
SPM
SPS
TUA
TUH
HAI
YUN
HUN
OKI
KAN
UTS
KAG
TAI
GEL
BEN
SUF
INW
KLP
KUN
KAT
JAF
CAM
SUU
INJ
MAT
SSK
KAC
SON
CHA
LAM
HAN
HOC
QNC
" > euro_global/output/neuroadmixture/native_turkey_iberia_US.txt

Turkey+iberia+US

cd /gpfs/gibbs/pi/caccone/mkc54/albo
plink \
--keep-allele-order \
--keep-fam euro_global/output/neuroadmixture/native_turkey_iberia_US.txt \
--bfile euro_global/output/file7b \
--make-bed \
--export vcf \
--out euro_global/output/neuroadmixture/native_turkey_iberia_US2 \
--extract euro_global/output/neuroadmixture/train/train_euro_nativeb.snplist \
--silent
grep "samples\|variants" euro_global/output/neuroadmixture/native_turkey_iberia_US2.log

100367 variants loaded from .bim file. –extract: 22537 variants remaining. Total genotyping rate in remaining samples is 0.969253. 22537 variants and 329 people pass filters and QC.

7.5.1 Import the data for SNP Set 3 (r2<0.01 MAF 1%) subset for Iberia, Turkey, US & native

genotype <- here(
   "euro_global/output/neuroadmixture/native_turkey_iberia_US2.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22537
##   column count: 338
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22537
##   Character matrix gt cols: 338
##   skip: 0
##   nrows: 22537
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## BAR BEN BER CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN LAM 
##  12  12  12  12  12   2  12   4   7  12  11   4   2   6  12  11   6   4   4   9 
## MAT OKI PAL POL POP QNC SON SPB SPC SPM SPS SSK SUF SUU TAI TUA TUH UTS YUN 
##  12  12  11   2  12  11   3   8   6   5   8  12   6   6   7   9  12  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   329
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   329
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.removed file, for more informations.
## 
## 
##  - number of detected individuals:   329
##  - number of detected loci:      22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.lfmm"

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

7.5.2 Run LEA for SNP Set 2 (MAF 1% and r2<0.01) for native_turkey_iberia_US2

We will do 5 repetitions

# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
  genotype,
  K = 1:25,
  project = "new",
  repetitions = 5,
  percentage = 0.25,
  iterations = 500,
  CPU = 10,
  entropy = TRUE
)
project = load.snmfProject("euro_global/output/neuroadmixture/native_turkey_iberia_US2.snmfProject")

Cross entropy

# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","MAF_1", "iberia_turkey", "lea_cross_entropy_iberia_turkey_US_r01_b.pdf"), width = 6, height = 4)

# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)

# Close the pdf file
dev.off()
## png 
##   2
plot(project, col = "blue", pch = 19, cex = 1.2)

Summary of project check with run is best

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9271062 0.8900207 0.8839852 0.8765154 0.8723017 0.8691442 0.8669760
## mean 0.9278533 0.8907388 0.8847615 0.8772518 0.8728473 0.8703742 0.8679693
## max  0.9282251 0.8910604 0.8852040 0.8776148 0.8734732 0.8716343 0.8694265
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8656042 0.8649707 0.8649777 0.8639331 0.8642309 0.8657547 0.8653990
## mean 0.8675944 0.8658563 0.8654312 0.8655895 0.8653379 0.8660151 0.8673528
## max  0.8690673 0.8662602 0.8660101 0.8667388 0.8661961 0.8667623 0.8693168
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8669662 0.8669479 0.8687847 0.8714309 0.8723204 0.8731121 0.8753858
## mean 0.8674553 0.8696696 0.8697166 0.8723978 0.8736355 0.8748723 0.8772996
## max  0.8679922 0.8726039 0.8707179 0.8729233 0.8747222 0.8776899 0.8784752
##         K = 22    K = 23    K = 24    K = 25
## min  0.8789713 0.8811850 0.8838420 0.8849231
## mean 0.8796446 0.8819028 0.8851866 0.8877920
## max  0.8811388 0.8839430 0.8862658 0.8898421
# get the cross-entropy of all runs for K = 11
ce11 = cross.entropy(project, K = 11)
ce11 #run 3 is best for k=11
##          K = 11
## run 1 0.8664886
## run 2 0.8647956
## run 3 0.8639331
## run 4 0.8667388
## run 5 0.8659916

7.5.3 Plots for K=11

color_palette_11 <-
  c(
    "#75FAFF",
    "navy",
    "orangered",
    "yellow2",
    "#FFFF99",
    "blue",
    "#B22222",
    "#F49AC2",
    "#008080", 
    "orange",
    "magenta"
     )

7.5.3.1 admixture by country for K=11

using ggplot

best = which.min(cross.entropy(project, K = 11)) #3

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 11, run = best))

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:11)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_11[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=11.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_11)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "iberia_turkey", "LEA_admixture_by_country _iberia_turkey_US_k11_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.5.3.2 Plot individual admixtures for K=11

Extract ancestry coefficients for k=11

change to correct matrix

leak11 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.snmf/K11/run3/native_turkey_iberia_US2_r3.11.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak11)
## # A tibble: 6 × 11
##         X1      X2    X3      X4      X5      X6      X7      X8      X9     X10
##      <dbl>   <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1  5.84e-2 1.75e-2 0.538 2.48e-2 1.32e-1 4.00e-2 1.48e-1 7.68e-3 3.02e-2 9.80e-4
## 2  1.22e-1 9.77e-2 0.316 1.00e-4 4.79e-2 1.67e-1 1.65e-1 1.28e-2 2.72e-2 3.90e-2
## 3  9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 4  1.00e-4 7.00e-2 0.885 1.26e-2 7.66e-3 1.00e-4 1.00e-4 1.00e-4 6.89e-3 1.27e-2
## 5  9.99e-5 6.56e-2 0.911 9.99e-5 9.99e-5 9.99e-5 1.15e-2 9.99e-5 6.70e-3 4.35e-3
## 6  9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## # ℹ 1 more variable: X11 <dbl>

The fam file

fam_file <- here("euro_global/output/neuroadmixture/native_turkey_iberia_US2.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak11 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak11)

head(leak11)
##    ind pop          X1          X2       X3          X4          X5          X6
## 1 1001 OKI 5.84230e-02 1.74758e-02 0.538229 2.48206e-02 1.31607e-01 3.99854e-02
## 2 1002 OKI 1.22436e-01 9.76610e-02 0.316025 9.99910e-05 4.78928e-02 1.67050e-01
## 3 1003 OKI 9.99101e-05 9.99101e-05 0.999001 9.99101e-05 9.99101e-05 9.99101e-05
## 4 1004 OKI 9.99713e-05 6.99730e-02 0.885188 1.26074e-02 7.65750e-03 9.99713e-05
## 5 1005 OKI 9.99460e-05 6.55608e-02 0.911314 9.99460e-05 9.99460e-05 9.99460e-05
## 6 1006 OKI 9.99101e-05 9.99101e-05 0.999001 9.99101e-05 9.99101e-05 9.99101e-05
##            X7          X8          X9         X10         X11
## 1 1.48209e-01 7.67503e-03 3.02475e-02 9.79923e-04 2.34667e-03
## 2 1.65019e-01 1.27839e-02 2.71506e-02 3.90228e-02 4.85968e-03
## 3 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05
## 4 9.99713e-05 9.99713e-05 6.88714e-03 1.27036e-02 4.58366e-03
## 5 1.14668e-02 9.99460e-05 6.70434e-03 4.35455e-03 9.99460e-05
## 6 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05

Rename the columns

# Rename the columns starting from the third one
leak11 <- leak11 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak11)
##    ind pop          v1          v2       v3          v4          v5          v6
## 1 1001 OKI 5.84230e-02 1.74758e-02 0.538229 2.48206e-02 1.31607e-01 3.99854e-02
## 2 1002 OKI 1.22436e-01 9.76610e-02 0.316025 9.99910e-05 4.78928e-02 1.67050e-01
## 3 1003 OKI 9.99101e-05 9.99101e-05 0.999001 9.99101e-05 9.99101e-05 9.99101e-05
## 4 1004 OKI 9.99713e-05 6.99730e-02 0.885188 1.26074e-02 7.65750e-03 9.99713e-05
## 5 1005 OKI 9.99460e-05 6.55608e-02 0.911314 9.99460e-05 9.99460e-05 9.99460e-05
## 6 1006 OKI 9.99101e-05 9.99101e-05 0.999001 9.99101e-05 9.99101e-05 9.99101e-05
##            v7          v8          v9         v10         v11
## 1 1.48209e-01 7.67503e-03 3.02475e-02 9.79923e-04 2.34667e-03
## 2 1.65019e-01 1.27839e-02 2.71506e-02 3.90228e-02 4.85968e-03
## 3 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05
## 4 9.99713e-05 9.99713e-05 6.88714e-03 1.27036e-02 4.58366e-03
## 5 1.14668e-02 9.99460e-05 6.70434e-03 4.35455e-03 9.99460e-05
## 6 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak11 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:11)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_11[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=11.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_11) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "iberia_turkey", "lea_k=11_native_turkey_iberia_US_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf)

7.5.4 Plots for K=9

color_palette_9 <-
  c(
    "#75FAFF",
    "orangered",
    "yellow2",
    "#FFFF99",
    "blue",
    "#B22222",
    "#F49AC2",
    "magenta",
    "navy"
     )
ce9 = cross.entropy(project, K = 9)
ce9 #3
##           K = 9
## run 1 0.8660490
## run 2 0.8660339
## run 3 0.8649707
## run 4 0.8662602
## run 5 0.8659677

7.5.4.1 Mean admixture by country for K=9

using ggplot

best = which.min(cross.entropy(project, K = 9)) #3

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 9, run = best))

# Create a named vector to map countries to regions

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:9)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_9[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=9.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_9)

color_palette_9 <-
  c(
    "#F49AC2",
    "#75FAFF",
    "#B22222",
    "blue",
    "yellow2",   
    "green4",  
    "magenta",
    "#FFFF99",
    "orangered")
    
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=9.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_9)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "iberia_turkey", "LEA_admixture_by_country _iberia_turkey_US_k9_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.5.4.2 Plot individual admixtures for K=9

Extract ancestry coefficients for K=9

change to correct matrix

leak9 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.snmf/K9/run3/native_turkey_iberia_US2_r3.9.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak9)
## # A tibble: 6 × 9
##          X1        X2        X3        X4    X5       X6      X7      X8      X9
##       <dbl>     <dbl>     <dbl>     <dbl> <dbl>    <dbl>   <dbl>   <dbl>   <dbl>
## 1 0.0261    0.0150    0.0865    0.142     0.541  1.25e-2 4.78e-2 1.23e-1 5.26e-3
## 2 0.0256    0.0158    0.187     0.202     0.308  5.33e-2 1.09e-1 9.91e-2 1.00e-4
## 3 0.0000999 0.0000999 0.0000999 0.0000999 0.999  9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 4 0.000100  0.0110    0.000100  0.0118    0.908  1.50e-2 6.95e-3 4.05e-2 6.96e-3
## 5 0.000100  0.0235    0.000100  0.0296    0.912  5.80e-3 1.00e-4 2.84e-2 1.00e-4
## 6 0.0000999 0.0000999 0.0000999 0.0000999 0.999  9.99e-5 9.99e-5 9.99e-5 9.99e-5

The fam file

fam_file <- here("euro_global/output/neuroadmixture/native_turkey_iberia_US2.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak9 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak9)

head(leak9)
##    ind pop          X1         X2          X3         X4       X5         X6
## 1 1001 OKI 0.026078400 0.01504910 0.086530300 0.14187700 0.541452 0.01254240
## 2 1002 OKI 0.025618700 0.01577310 0.187377000 0.20171500 0.307703 0.05328930
## 3 1003 OKI 0.000099930 0.00009993 0.000099930 0.00009993 0.999201 0.00009993
## 4 1004 OKI 0.000099982 0.01102460 0.000099982 0.01177900 0.907593 0.01501460
## 5 1005 OKI 0.000099964 0.02347530 0.000099964 0.02963310 0.912340 0.00580212
## 6 1006 OKI 0.000099930 0.00009993 0.000099930 0.00009993 0.999201 0.00009993
##            X7         X8          X9
## 1 0.047774900 0.12343600 0.005260830
## 2 0.109337000 0.09908680 0.000099991
## 3 0.000099930 0.00009993 0.000099930
## 4 0.006948470 0.04048100 0.006959580
## 5 0.000099964 0.02835000 0.000099964
## 6 0.000099930 0.00009993 0.000099930

Rename the columns

# Rename the columns starting from the third one
leak9 <- leak9 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak9)
##    ind pop          v1         v2          v3         v4       v5         v6
## 1 1001 OKI 0.026078400 0.01504910 0.086530300 0.14187700 0.541452 0.01254240
## 2 1002 OKI 0.025618700 0.01577310 0.187377000 0.20171500 0.307703 0.05328930
## 3 1003 OKI 0.000099930 0.00009993 0.000099930 0.00009993 0.999201 0.00009993
## 4 1004 OKI 0.000099982 0.01102460 0.000099982 0.01177900 0.907593 0.01501460
## 5 1005 OKI 0.000099964 0.02347530 0.000099964 0.02963310 0.912340 0.00580212
## 6 1006 OKI 0.000099930 0.00009993 0.000099930 0.00009993 0.999201 0.00009993
##            v7         v8          v9
## 1 0.047774900 0.12343600 0.005260830
## 2 0.109337000 0.09908680 0.000099991
## 3 0.000099930 0.00009993 0.000099930
## 4 0.006948470 0.04048100 0.006959580
## 5 0.000099964 0.02835000 0.000099964
## 6 0.000099930 0.00009993 0.000099930

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak9 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:9)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_9[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=9.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_9) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "iberia_turkey", "lea_k=9_native_turkey_iberia_US_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.6 LEA for all Eastern European region pops with SNP Set 3 (MAF 1%, R2<0.01)

7.6.1 Import the data for SNP Set 3 (R2<0.01 MAF 1%) subset for native_eastern_europe2

Create new file native_eastern_europe

echo "
SER
BUL
ROS
TUA
TUH
SEV
ALU
KER
KRA
TIK
RAR
SOC
GES
ARM
HAI
YUN
HUN
OKI
KAN
UTS
KAG
TAI
GEL
BEN
SUF
INW
KLP
KUN
KAT
JAF
CAM
SUU
INJ
MAT
SSK
KAC
SON
CHA
LAM
HAN
HOC
QNC
" > euro_global/output/neuroadmixture/native_eastern_europe_b.txt

Turkey+iberia+US

cd /gpfs/gibbs/pi/caccone/mkc54/albo
plink \
--keep-allele-order \
--keep-fam euro_global/output/neuroadmixture/native_eastern_europe_b.txt \
--bfile euro_global/output/file7b \
--make-bed \
--export vcf \
--out euro_global/output/neuroadmixture/native_eastern_europe_b \
--extract euro_global/output/neuroadmixture/train/train_euro_nativeb.snplist \
--silent
grep "samples\|variants" euro_global/output/neuroadmixture/native_eastern_europe_b.log

100367 variants loaded from .bim file. –extract: 22537 variants remaining. Total genotyping rate in remaining samples is 0.970557. 22537 variants and 384 people pass filters and QC.

genotype <- here(
   "euro_global/output/neuroadmixture/native_eastern_europe_b.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 22537
##   column count: 393
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 22537
##   Character matrix gt cols: 393
##   skip: 0
##   nrows: 22537
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed

Get population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## ALU ARM BEN BUL CAM CHA GEL GES HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KER 
##  12  10  12  10  12  12   2  12  12   4   7  12  11   4   2   6  12  11   6  12 
## KLP KRA KUN LAM MAT OKI QNC RAR ROS SER SEV SOC SON SSK SUF SUU TAI TIK TUA TUH 
##   4  12   4   9  12  12  11  12  11   4  12  12   3  12   6   6   7  12   9  12 
## UTS YUN 
##  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   384
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   384
##  - number of detected loci:      22537
## 
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.removed file, for more informations.
## 
## 
##  - number of detected individuals:   384
##  - number of detected loci:      22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.lfmm"

Sample data

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80

7.6.2 Run LEA for SNP Set 3 (MAF 1% and r2<0.01) for all eastern europe

We will do 5 repetitions

# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
  genotype,
  K = 1:25,
  project = "new",
  repetitions = 5,
  percentage = 0.25,
  iterations = 500,
  CPU = 10,
  entropy = TRUE
)
project = load.snmfProject("euro_global/output/neuroadmixture/native_eastern_europe_b.snmfProject")

Cross entropy

# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","MAF_1", "eastern_europe", "lea_cross_entropy_eastern_europe_all_r01_b.pdf"), width = 6, height = 4)

# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)

# Close the pdf file
dev.off()
## png 
##   2
plot(project, col = "blue", pch = 19, cex = 1.2)

Summary of project

check with run is best

summary(project)
## $repetitions
##                       K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy        5     5     5     5     5     5     5     5     5
## without cross-entropy     0     0     0     0     0     0     0     0     0
## total                     5     5     5     5     5     5     5     5     5
##                       K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
##                       K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy         5      5      5      5      5      5      5      5
## without cross-entropy      0      0      0      0      0      0      0      0
## total                      5      5      5      5      5      5      5      5
## 
## $crossEntropy
##          K = 1     K = 2     K = 3     K = 4     K = 5     K = 6     K = 7
## min  0.9263358 0.8900728 0.8784947 0.8723280 0.8692091 0.8675548 0.8667524
## mean 0.9266675 0.8904538 0.8788574 0.8726525 0.8695476 0.8679456 0.8671573
## max  0.9270804 0.8909701 0.8793918 0.8731650 0.8699389 0.8684354 0.8679222
##          K = 8     K = 9    K = 10    K = 11    K = 12    K = 13    K = 14
## min  0.8659163 0.8653125 0.8656195 0.8645445 0.8643073 0.8639835 0.8644666
## mean 0.8665982 0.8659715 0.8660902 0.8648374 0.8650374 0.8654291 0.8656613
## max  0.8674655 0.8666695 0.8664496 0.8650090 0.8658513 0.8666653 0.8663259
##         K = 15    K = 16    K = 17    K = 18    K = 19    K = 20    K = 21
## min  0.8647958 0.8647558 0.8666941 0.8662315 0.8665095 0.8679754 0.8683581
## mean 0.8655229 0.8660045 0.8669962 0.8666929 0.8676454 0.8689586 0.8695316
## max  0.8660169 0.8667583 0.8674466 0.8669997 0.8687801 0.8694516 0.8719143
##         K = 22    K = 23    K = 24    K = 25
## min  0.8691795 0.8695068 0.8709611 0.8722640
## mean 0.8707714 0.8715768 0.8732896 0.8734511
## max  0.8727627 0.8742344 0.8764915 0.8741712
# get the cross-entropy of all runs for K = 11
ce11 = cross.entropy(project, K = 11)
ce11 #run 1 is best for k=11
##          K = 11
## run 1 0.8645445
## run 2 0.8649721
## run 3 0.8650090
## run 4 0.8649443
## run 5 0.8647170

7.6.3 Plots for K=11

color_palette_11 <-
  c(
    "#75FAFF",
    "navy",
    "orangered",
    "yellow2",
    "#FFFF99",
    "purple",
    "magenta",
    "purple4",
    "green4",
    "#B20CC9",
    "orange"
     )

7.6.3.1 Mean admixture by country for K=11

using ggplot

best = which.min(cross.entropy(project, K = 11)) #3

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))

library(reshape2)

# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 11, run = best))

# Create a named vector to map countries to regions

# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here("scripts", "RMarkdowns",
   "analyses", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:11)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_11[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=11.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
  scale_fill_manual(values = color_palette_11)

ggsave(
  here("scripts", "RMarkdowns",
    "output", "euro_global", "lea", "MAF_1", "eastern_europe", "LEA_admixture_by_country _eastern_europe_all_k11_r01_MAF1.pdf"
  ),
  width  = 10,
  height = 7,
  units  = "in"
)

7.6.3.2 Plot individual admixtures for K=11

Extract ancestry coefficients for K=11

change to correct matrix

leak11 <- read_delim(
  here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.snmf/K11/run1/native_eastern_europe_b_r1.11.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
head(leak11)
## # A tibble: 6 × 11
##         X1      X2      X3      X4      X5      X6      X7      X8      X9   X10
##      <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>
## 1  1.00e-4 2.78e-2 2.00e-2 1.01e-1 8.08e-2 7.20e-2 2.86e-2 4.80e-2 2.91e-2 0.496
## 2  3.60e-2 5.85e-3 2.81e-2 1.06e-1 3.51e-2 1.09e-1 2.12e-2 8.85e-2 6.43e-2 0.350
## 3  9.99e-5 9.99e-5 1.23e-2 9.99e-5 1.89e-3 9.99e-5 9.68e-4 9.99e-5 9.99e-5 0.981
## 4  1.68e-2 2.10e-2 1.00e-4 9.30e-4 1.00e-4 7.12e-2 1.00e-4 1.00e-4 1.29e-2 0.872
## 5  1.41e-2 1.00e-4 3.81e-3 1.77e-2 1.00e-4 4.01e-2 1.00e-4 3.13e-3 9.52e-3 0.911
## 6  9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.999
## # ℹ 1 more variable: X11 <dbl>

The fam file

fam_file <- here("euro_global/output/neuroadmixture/native_eastern_europe_b.fam")

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create column ID

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to the matrix

leak11 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak11)

head(leak11)
##    ind pop          X1          X2          X3          X4          X5
## 1 1001 OKI 9.99910e-05 2.77788e-02 2.00482e-02 1.00739e-01 8.07674e-02
## 2 1002 OKI 3.60497e-02 5.85426e-03 2.80621e-02 1.05538e-01 3.50887e-02
## 3 1003 OKI 9.99460e-05 9.99460e-05 1.23075e-02 9.99460e-05 1.88513e-03
## 4 1004 OKI 1.67591e-02 2.10264e-02 9.99640e-05 9.29501e-04 9.99640e-05
## 5 1005 OKI 1.40587e-02 9.99640e-05 3.80564e-03 1.76831e-02 9.99640e-05
## 6 1006 OKI 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05
##            X6          X7          X8          X9      X10         X11
## 1 7.20425e-02 2.86226e-02 4.80016e-02 2.91051e-02 0.496439 9.63562e-02
## 2 1.09040e-01 2.11568e-02 8.84652e-02 6.42642e-02 0.350395 1.56085e-01
## 3 9.99460e-05 9.68483e-04 9.99460e-05 9.99460e-05 0.981302 2.93705e-03
## 4 7.12489e-02 9.99640e-05 9.99640e-05 1.28659e-02 0.871708 5.06209e-03
## 5 4.01411e-02 9.99640e-05 3.13253e-03 9.52133e-03 0.911258 9.99640e-05
## 6 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 0.999001 9.99101e-05

Rename the columns

# Rename the columns starting from the third one
leak11 <- leak11 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak11)
##    ind pop          v1          v2          v3          v4          v5
## 1 1001 OKI 9.99910e-05 2.77788e-02 2.00482e-02 1.00739e-01 8.07674e-02
## 2 1002 OKI 3.60497e-02 5.85426e-03 2.80621e-02 1.05538e-01 3.50887e-02
## 3 1003 OKI 9.99460e-05 9.99460e-05 1.23075e-02 9.99460e-05 1.88513e-03
## 4 1004 OKI 1.67591e-02 2.10264e-02 9.99640e-05 9.29501e-04 9.99640e-05
## 5 1005 OKI 1.40587e-02 9.99640e-05 3.80564e-03 1.76831e-02 9.99640e-05
## 6 1006 OKI 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05
##            v6          v7          v8          v9      v10         v11
## 1 7.20425e-02 2.86226e-02 4.80016e-02 2.91051e-02 0.496439 9.63562e-02
## 2 1.09040e-01 2.11568e-02 8.84652e-02 6.42642e-02 0.350395 1.56085e-01
## 3 9.99460e-05 9.68483e-04 9.99460e-05 9.99460e-05 0.981302 2.93705e-03
## 4 7.12489e-02 9.99640e-05 9.99640e-05 1.28659e-02 0.871708 5.06209e-03
## 5 4.01411e-02 9.99640e-05 3.13253e-03 9.52133e-03 0.911258 9.99640e-05
## 6 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 0.999001 9.99101e-05

Import Sample Locations

sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
##       Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1   Berlin, NJ     USA 39.79081  -74.9291  Americas          BER 2018
## 2 Columbus, OH     USA 39.97170  -82.9071  Americas          COL 2015
## 3   Palm Beach     USA 26.70560  -80.0364  Americas          PAL 2018
## 4  Houston, TX     USA 29.75491  -95.3505  Americas          HOU 2018
## 5  Los Angeles     USA 34.05220 -118.2437  Americas          LOS 2018
## 6   Manaus, AM  Brazil -3.09161  -60.0325  Americas          MAU 2017
##          Region Subregion order order2 orderold
## 1 North America               1     NA       75
## 2 North America               2     NA       76
## 3 North America               3     NA       77
## 4 North America               4     NA       78
## 5 North America               5     NA       79
## 6 South America               6     NA       80
source(
  here("scripts", "RMarkdowns",
    "my_theme3.R"
  )
)

# Melt the data frame for plotting
Q_melted <- leak11 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(order, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Country = unique(Q_ordered$Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Country, function(rc)
    max(which(Q_ordered$Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 

# Generate all potential variable names
all_variables <- paste0("v", 1:11)


# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = color_palette_11[1:length(all_variables)])

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=11.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_11) +
  expand_limits(y = c(0, 1.5))

color_palette_11 <-
  c(
    "purple",
    "orangered",
    "purple4",
    "#FFFF99",    
    "chocolate4",
    "navy",
    "green4",
    "magenta",
    "orange",
    "#75FAFF",
    "yellow2"
     )

 ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=11.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette_11) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "eastern_europe", "lea_k=11_eastern_europe_all_MAF1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf)