## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
Check data - we created vcf files with LD pruning r2<0.01 (SNP Set 1) and r2<0.1 (SNP Set 2) and r2<0.01 and MAF>1% (SNP Set 3) after QC
## euro_global/output/snps_sets/neutral.vcf
## euro_global/output/snps_sets/r2_0.01.vcf
## euro_global/output/snps_sets/r2_0.01_b.vcf
## euro_global/output/snps_sets/r2_0.1.vcf
## euro_global/output/snps_sets/r2_0.1_b.vcf
Import the data for SNP Set 2 (r2<0.1)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 56384
## column count: 697
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 56384
## Character matrix gt cols: 697
## skip: 0
## nrows: 56384
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant: 56384
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS GEL GES GRA GRC GRV
## 10 12 12 10 12 12 12 13 10 12 14 12 12 16 12 2 12 11 10 12
## HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC KAG KAN KAT KER KLP KRA KUN LAM
## 12 4 7 12 4 11 4 5 9 12 2 6 12 11 6 12 4 12 4 9
## MAL MAT OKI PAL POL POP QNC RAR REC ROM ROS SER SEV SIC SLO SOC SON SPB SPC SPM
## 12 12 12 11 2 12 11 12 11 4 11 4 12 9 12 12 3 8 6 5
## SPS SSK STS SUF SUU TAI TIK TIR TRE TUA TUH UTS YUN
## 8 12 12 6 6 7 12 4 12 9 12 12 9
Convert format
##
## - number of detected individuals: 688
## - number of detected loci: 56384
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.geno"
##
## - number of detected individuals: 688
## - number of detected loci: 56384
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.removed file, for more informations.
##
##
## - number of detected individuals: 688
## - number of detected loci: 56384
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.lfmm"
PCA for SNP Set 2 (r2<0.1)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
##
## -n (number of individuals) 688
## -L (number of loci) 56384
## -K (number of principal components) 688
## -x (genotype file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.lfmm
## -a (eigenvalue file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.pca/r2_0.1.eigenvalues
## -e (eigenvector file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.pca/r2_0.1.eigenvectors
## -d (standard deviation file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.pca/r2_0.1.sdev
## -p (projection file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.pca/r2_0.1.projections
## -c data centered
## * pca class *
##
## project directory: /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/
## pca result directory: r2_0.1.pca/
## input file: r2_0.1.lfmm
## eigenvalue file: r2_0.1.eigenvalues
## eigenvector file: r2_0.1.eigenvectors
## standard deviation file: r2_0.1.sdev
## projection file: r2_0.1.projections
## pcaProject file: r2_0.1.pcaProject
## number of individuals: 688
## number of loci: 56384
## number of principal components: 688
## centered: TRUE
## scaled: FALSE
Test
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
##
## -n (number of eigenvalues) 688
## -i (input file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.pca/r2_0.1.eigenvalues
## -o (output file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.pca/r2_0.1.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)
Get values
# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
Check R symbols for plot
#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,33:127)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
scale_shape_manual(values=good.shapes[1:N]) +
geom_point()
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_all.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation
## 1 Franceville Gabon -1.59207 13.53242 Africa GAB
## 2 Antananarivo Madagascar -18.87920 47.50790 Africa ANT
## 3 Diego ville Madagascar -12.27361 49.29372 Africa DGV
## 4 Morondava Madagascar -20.28420 44.27940 Africa MAD
## 5 Vohimasy Madagascar -22.81591 47.75026 Africa VOH
## 6 Dauguet Mauritius -20.18530 57.52154 Africa DAU
## Year Region Subregion order
## 1 2015 Central Africa Africa 72
## 2 2022 East Africa East Africa 76
## 3 2022 East Africa East Africa 77
## 4 2016 East Africa East Africa 78
## 5 2016 & 2017 East Africa East Africa 79
## 6 2022 Indian Ocean Indian Ocean 80
More sample data
# import sample attributes
samples2 <- read.delim(
here("scripts", "RMarkdowns",
"output", "Population_meta_data.txt"
),
head = TRUE
)
samples2<- samples2 |>
dplyr::select(
region, pop
)
# check head of the file
head(samples2)
## region pop
## 1 Central Africa GAB
## 2 East Africa ANT
## 3 East Africa MAD
## 4 East Africa DGV
## 5 East Africa VOH
## 6 Indian Ocean DAU
Merge with sampling_loc
## pop region Pop_City Country Latitude Longitude
## 1 AIZ East Asia Aizuwakamatsu City Japan 37.49240 139.99360
## 2 ALD Southern Europe Durres Albania 41.29704 19.50373
## 3 ALU Eastern Europe Alushta Ukraine 44.68289 34.40368
## 4 ALV Southern Europe Vlore Albania 40.46600 19.48970
## 5 ANT East Africa Antananarivo Madagascar -18.87920 47.50790
## 6 ARM Eastern Europe Ijevan Armenia 40.87971 45.14764
## Continent Year Region Subregion order
## 1 Asia 2008 East Asia NA
## 2 Europe 2018 Southern Europe East Europe 25
## 3 Europe 2021 Eastern Europe East Europe 35
## 4 Europe 2020 Southern Europe East Europe 24
## 5 Africa 2022 East Africa East Africa 76
## 6 Europe 2020 Eastern Europe East Europe 42
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 73 Levels: ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS ... YUN
## [1] 73
Check the regions
## [1] "East Asia" "Southern Europe" "Eastern Europe" "East Africa"
## [5] "West Africa" "North America" "South Asia" "Caribbean"
## [9] "Southeast Asia" "Indian Ocean" "Western Europe" "Central Africa"
## [13] "South America" "North Africa"
Merge
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 ALD 15.2256 6.46126 32.1280 3.23808 -49.7953 -10.92670 -0.572034
## 2 ALD 13.7685 5.90307 35.0021 3.82902 -57.4622 -10.96090 2.392940
## 3 ALD 14.1156 5.15973 41.4119 4.86405 -44.7047 -13.55490 -1.567570
## 4 ALD 14.0351 4.63880 37.2412 2.50662 -51.3634 -9.20733 6.339370
## 5 ALD 13.7638 7.77443 29.0741 1.91275 -46.4292 -7.44999 6.612820
## 6 ALD 11.7649 4.68709 36.5414 2.42992 -53.2824 -15.04960 4.084150
## PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15
## 1 -14.30210 -2.22894 3.28104 -0.260834 -6.495890 1.12102 1.481720 -1.4254100
## 2 -17.34370 -4.73847 2.72773 -4.790710 -6.313720 10.88700 1.135290 -1.9179500
## 3 -21.48710 -5.15265 3.90904 -5.802810 -11.831500 6.07002 -0.741001 2.3593900
## 4 -12.26110 -7.07453 3.17415 0.719609 4.085840 2.68096 1.818100 -1.0343100
## 5 -7.38579 -4.23032 3.51418 -2.237670 0.521779 -4.26046 1.549950 -0.0944897
## 6 -12.77440 -1.34912 3.70964 -4.860350 -8.905270 4.72987 -1.311110 -4.1860700
## PC16 PC17 PC18 PC19 PC20 PC21 PC22
## 1 -3.404840 -6.681380 2.26284 -3.934980 9.94505 -2.59977 -8.6026000
## 2 0.850241 -11.734100 4.24561 -3.817430 11.08090 -8.02989 -4.9901200
## 3 -3.358110 -9.281240 4.13695 -6.140290 3.20650 -7.80397 0.0856546
## 4 0.848314 0.500187 1.92693 -0.991450 -4.81889 1.79756 -0.2692420
## 5 -3.502980 -10.684700 1.66666 -0.583166 12.22450 1.32066 -11.0036000
## 6 -0.362751 -7.160270 2.79407 -1.804800 6.41620 -4.73082 -6.8650400
## PC23 PC24 PC25 PC26 PC27 PC28 PC29
## 1 -0.393056 0.142389 -0.508575 -9.88046 -17.25000 4.67932 0.258118
## 2 -4.072270 3.701780 -1.030180 -9.99636 -13.44080 2.03050 -3.606230
## 3 -0.983792 1.240310 -1.573270 -10.22980 -16.59200 4.56860 -6.373030
## 4 0.865494 3.112280 3.126080 -8.86689 2.55136 7.30868 0.245592
## 5 0.793089 -0.259153 -8.438140 -6.71741 -14.32400 -24.81810 -4.207450
## 6 -0.533011 1.521040 -5.311990 -13.53110 -10.86650 6.41190 7.641670
## PC30 PC31 PC32 PC33 PC34 PC35 PC36 PC37
## 1 6.032530 12.34690 -1.306060 -3.39428 -8.958020 -1.565250 -6.81837 13.14430
## 2 3.346470 12.51080 2.294700 -1.19521 -4.538200 0.256068 -7.48108 11.47800
## 3 4.385860 17.39720 2.733380 -9.22178 -6.485740 4.242090 -4.12368 15.03180
## 4 0.401121 -5.34980 5.408590 1.90772 0.521117 0.751213 -2.01571 -6.56855
## 5 4.365140 -4.09776 6.179930 -22.42640 1.867930 1.421670 -6.04500 11.81370
## 6 6.832900 4.53687 -0.433561 -6.12641 -8.780930 -1.704770 -1.79774 1.42995
## PC38 PC39 PC40 PC41 PC42 PC43 PC44
## 1 4.437370 14.26960 -0.773884 -0.763226 -0.384879 4.53096 14.802200
## 2 0.792022 10.56230 3.527150 -4.092060 6.732950 3.39944 6.985610
## 3 -4.418860 7.86127 1.617910 -9.194360 8.817040 -1.81030 1.179980
## 4 -12.985200 9.25693 0.816089 -2.778960 10.981600 -7.97712 0.613751
## 5 -2.263540 2.18789 -1.435380 3.200060 -4.626190 10.26960 9.016560
## 6 15.989700 3.46830 -12.421200 2.007890 -20.046400 -1.68250 15.598900
## PC45 PC46 PC47 PC48 PC49 PC50 PC51
## 1 8.95944 13.522100 22.170400 9.4360900 -2.945040 -4.63524 15.27060
## 2 7.10364 6.176390 17.104800 1.1119500 -1.499020 -4.81574 1.96486
## 3 3.03013 0.344237 0.809553 0.0702351 0.986384 -8.20198 2.71953
## 4 5.64426 10.530100 -0.936301 0.5337370 -3.987930 -10.01840 -2.18238
## 5 -8.49261 9.256920 9.657540 1.1076700 -0.835895 2.51008 6.75000
## 6 -9.02604 9.009420 6.946320 -8.9872100 5.886990 -1.55644 -6.45180
## PC52 PC53 PC54 PC55 PC56 PC57 PC58
## 1 0.491098 -4.79928 1.73700 -10.735800 12.205200 -21.28840 -9.170540
## 2 -0.913348 -2.60682 6.27043 -7.785170 5.292690 -10.72860 -9.560770
## 3 -0.709451 -11.31390 12.27440 -1.729650 12.321900 -9.22542 -11.273500
## 4 2.514070 -1.36192 -6.23275 -4.475910 -0.171674 -2.56913 0.962375
## 5 1.430250 1.47281 -0.65328 -5.467850 2.789630 -10.37030 0.178506
## 6 -1.936830 11.55600 -2.18732 -0.172528 -7.732820 -4.36260 6.071840
## PC59 PC60 PC61 PC62 PC63 PC64 PC65
## 1 0.558107 -14.42350 -2.720380 2.669140 -9.918800 0.468175 -9.58663
## 2 -4.820260 -6.67264 -9.335350 -0.198429 -10.951600 -0.773179 -2.79265
## 3 -2.944490 -9.15708 0.578892 4.932590 -15.587500 -6.857900 -9.70347
## 4 14.683600 10.29440 -4.486420 -0.741527 7.001920 8.943210 -11.48830
## 5 1.023870 -3.04103 8.526610 2.262430 -7.140300 -2.183480 -5.95505
## 6 -12.300800 4.77015 -4.122340 3.441300 -0.637977 -2.148020 50.38250
## PC66 PC67 PC68 PC69 PC70 PC71 PC72
## 1 -2.58420 -0.707528 3.20538 1.28916 -8.62617 4.75803 0.982763
## 2 -4.68237 -7.628080 10.04620 -2.02789 -4.81989 12.59320 1.944160
## 3 -4.18801 3.127010 11.53110 -1.50220 1.88179 5.00590 0.751111
## 4 -1.46146 4.356940 7.68418 -3.44514 -12.42100 16.30550 7.590790
## 5 -9.59028 -8.745600 -6.94160 -5.41227 1.03817 5.74930 -4.315440
## 6 4.37345 -1.813220 -19.65520 20.37500 8.47092 -13.32410 -36.429000
## PC73 PC74 PC75 PC76 PC77 PC78 PC79
## 1 -2.218060 -15.72030 2.642970 -3.800860 -7.739280 -3.681390 -0.233420
## 2 -7.196370 -18.20340 7.807600 -6.785510 -4.169990 -2.282600 -2.871490
## 3 -10.343400 -18.68270 4.092350 -3.039770 4.008260 -9.653350 -0.661432
## 4 -1.635540 -2.51278 0.353208 3.140340 -6.819570 -6.359990 5.004960
## 5 0.244696 -8.40124 0.959723 -0.287991 0.817661 0.987632 0.917362
## 6 6.689240 8.66856 -4.062010 -4.625740 24.070400 27.470100 -5.363180
## PC80 PC81 PC82 PC83 PC84 PC85 PC86
## 1 -8.457150 4.238230 0.284226 -7.43818 3.389350 3.588980 -2.165900
## 2 -1.705140 6.921080 -9.030500 -6.39152 3.472570 6.366870 -0.645937
## 3 3.247060 0.761202 3.098860 -5.12717 2.775320 -0.441982 2.288010
## 4 0.287495 9.662640 -0.948469 -6.76181 0.592148 13.483200 2.898440
## 5 -2.224410 10.164700 1.511980 -8.35391 -11.221100 1.500370 5.899000
## 6 13.818800 -22.609100 -4.698610 -8.70657 7.638730 -9.129480 -3.919290
## PC87 PC88 PC89 PC90 PC91 PC92 PC93
## 1 -10.13950 -0.816244 2.637640 1.3776600 0.664003 -1.815120 1.028400
## 2 -20.45190 -2.119820 2.393490 1.8052400 7.129360 -4.744720 -0.763207
## 3 -14.74440 3.848620 -0.800062 -3.6824600 13.921400 6.337180 -2.308600
## 4 -7.58353 -2.590720 1.165250 0.1242860 13.625700 -0.817992 5.490910
## 5 -10.41860 3.268430 2.752260 0.0702075 2.144920 0.522905 7.698490
## 6 -1.16883 8.544870 2.945770 2.9031300 3.885100 10.645900 -12.801200
## PC94 PC95 PC96 PC97 PC98 PC99 PC100
## 1 -1.313660 -3.84598 2.908900 -5.2236200 -0.6353610 -9.08645 0.0143651
## 2 4.477040 -8.25974 -5.574690 -6.7365000 3.3960500 1.89099 0.2139130
## 3 6.370910 -12.22940 -12.203400 -16.7572000 6.5597700 16.19800 -3.2267400
## 4 -0.565558 7.25605 0.919801 -0.0930397 -1.5488300 11.60540 -9.1673800
## 5 -3.914470 -3.02074 -2.898510 -2.4957400 0.0482717 -11.08500 0.2298290
## 6 -3.018100 2.50314 10.226600 -5.7286700 -6.5308400 7.15800 -7.6628900
## PC101 PC102 PC103 PC104 PC105 PC106 PC107
## 1 1.07133 3.98173 -3.252210 -1.717680 -0.3923890 1.24816 -1.745100
## 2 -3.20132 -1.04195 2.741610 -9.328820 0.0417841 -4.15877 -6.262400
## 3 -6.14705 -7.25484 -3.150010 -0.826528 -8.1842200 -1.88390 -7.674850
## 4 3.11123 -2.33031 7.844560 0.250554 8.6788100 -0.19136 -11.530400
## 5 7.85554 -11.68920 -0.605669 -15.411900 -12.3652000 -1.02618 0.500824
## 6 4.89323 2.52768 11.499600 9.711950 1.5494400 6.71054 5.783070
## PC108 PC109 PC110 PC111 PC112 PC113 PC114 PC115
## 1 -3.02524 7.668930 -9.20340 -2.680370 11.728700 -2.738550 -4.23346 -7.32950
## 2 -7.51654 -9.780450 -2.64763 2.322780 5.607590 1.928910 3.46397 -5.31802
## 3 -8.86584 -0.808406 -4.31540 2.530450 8.258080 -8.850950 3.81741 -9.67504
## 4 -1.69105 9.377090 -17.90270 0.103756 -7.836550 11.537600 4.05930 5.59087
## 5 -8.16365 -5.208530 3.11312 -7.513210 -0.893163 11.616800 -2.47028 -3.20549
## 6 2.57702 -5.396150 -1.46467 1.333950 21.906200 -0.656812 9.92918 6.62916
## PC116 PC117 PC118 PC119 PC120 PC121 PC122
## 1 8.41180 11.058500 6.67989 1.88865 -0.432606 8.66429 -5.089880
## 2 0.59735 -2.957340 2.90396 6.49746 -5.256190 -4.13262 6.717650
## 3 7.16965 -6.832780 3.62204 8.14030 -11.674200 -6.20002 1.717490
## 4 2.13644 7.638760 6.57390 -2.40779 9.523130 -1.45848 -0.960588
## 5 -10.54530 2.178960 6.33640 1.27484 0.707920 10.08110 -16.120300
## 6 6.08900 0.943839 -5.06997 -1.30717 7.344950 9.33311 12.082600
## PC123 PC124 PC125 PC126 PC127 PC128 PC129 PC130
## 1 11.67940 2.14637 -3.450520 -0.674869 -3.01949 3.052020 15.97350 -8.120310
## 2 -2.41782 9.74802 2.167960 3.126920 -4.26479 7.256710 4.17745 -1.201090
## 3 -1.37718 3.03760 -9.648980 -1.442530 -5.67998 7.961680 4.01674 0.536839
## 4 -9.92076 -1.12548 -5.045300 9.174390 21.63250 0.461474 -0.70567 1.585860
## 5 -11.70200 1.53643 0.405001 -0.100278 11.13440 7.322360 4.60827 7.355260
## 6 9.61344 12.68280 1.568950 5.626010 1.69849 2.825770 12.97740 -11.241600
## PC131 PC132 PC133 PC134 PC135 PC136 PC137
## 1 10.77360 2.952130 8.7015300 -2.42737 -0.582769 3.10618 4.76369
## 2 -2.21396 -2.593830 4.0699800 1.83944 -6.421540 3.45549 -1.54275
## 3 3.26855 -0.112469 -0.0764857 -2.07947 -13.663100 6.12741 -6.32643
## 4 -11.72950 7.815740 -4.2015800 6.19167 -8.332150 13.88750 7.01573
## 5 6.90981 2.368750 -4.9287900 1.34625 3.943220 -7.33402 -3.76745
## 6 2.19118 -3.157010 4.5153700 2.51769 2.475630 1.62286 -4.65162
## PC138 PC139 PC140 PC141 PC142 PC143 PC144
## 1 3.78628 2.43580 -3.898380 15.94210 0.271981 3.40412 4.385640
## 2 -1.96172 10.27880 1.724300 2.74736 -11.145200 -8.40939 -0.408985
## 3 -11.85200 13.43260 0.191675 -6.99586 -10.731600 -3.43938 8.032100
## 4 7.61277 4.17395 5.126150 -6.57744 4.364710 -1.70990 -11.446700
## 5 3.41059 -6.55824 -9.269600 5.37878 7.505510 16.98780 1.765800
## 6 9.52479 2.93301 -0.312517 5.30163 8.506600 -7.73302 -0.153048
## PC145 PC146 PC147 PC148 PC149 PC150 PC151
## 1 3.39213 -1.026560 7.01833 -1.0968900 -8.80704 -0.0733263 -2.46216
## 2 -6.72700 -3.507730 11.98460 -7.2196100 -2.80295 0.6421910 -6.50993
## 3 1.57037 6.979750 16.47200 -2.6465900 -3.19842 -4.6261600 0.47703
## 4 -6.37204 2.036010 -11.67280 0.0580746 -10.16150 -10.7923000 3.38652
## 5 -10.21940 -0.431477 10.71800 -3.9486700 3.81882 7.7359800 -6.13244
## 6 -7.02991 1.704610 -4.70034 -0.4173480 8.43710 -8.6166900 8.41247
## PC152 PC153 PC154 PC155 PC156 PC157 PC158
## 1 -1.3818500 -0.0710997 7.569430 5.360200 -2.02716 3.710420 -6.03236
## 2 -6.8053600 8.8637900 0.218977 4.339880 -1.73716 -0.195435 -1.00440
## 3 -5.8156800 -3.2029200 5.156680 6.502330 4.85750 3.231060 3.45481
## 4 -0.0339514 -0.3514830 -8.126020 -0.765441 -9.24012 -0.911206 6.01267
## 5 -0.6752500 -0.3826510 2.898030 -6.756070 5.65613 10.711500 -1.46959
## 6 -3.4409200 -7.8875400 -1.906630 11.560300 8.24444 1.703990 -15.19740
## PC159 PC160 PC161 PC162 PC163 PC164 PC165 PC166
## 1 8.57231 4.286060 -6.22192 6.412460 -3.436300 3.124610 -2.38407 -0.0743992
## 2 6.85697 -2.107580 -5.03154 1.970930 0.515066 5.202650 -5.59314 0.9786730
## 3 -1.84488 -0.618533 -4.09814 0.739014 5.391380 6.302800 10.75090 2.6092000
## 4 -6.31869 -1.206350 4.71626 3.385330 3.454830 3.780330 -2.61666 -9.5811300
## 5 -4.75707 9.420320 -7.47857 -6.745560 1.925900 -6.273810 4.48416 -7.5999400
## 6 -3.95660 0.290584 3.61267 -2.875150 -4.373350 0.846221 -2.02132 3.3961300
## PC167 PC168 PC169 PC170 PC171 PC172 PC173 PC174
## 1 -3.37632 -8.65957 8.410160 -1.28420 -4.957490 -9.86513 -7.99086 -0.902778
## 2 -1.62580 -6.29061 4.311840 -9.20611 -0.261691 -6.60737 -6.71567 -1.582530
## 3 -1.99270 -1.97154 -5.766560 -7.14754 9.291180 -1.47950 2.87915 -8.384660
## 4 -9.68854 -4.86442 -3.395170 -4.30684 -5.989000 -6.77772 1.53557 -2.498750
## 5 -5.66323 4.53276 0.309445 2.92760 -10.052000 2.31203 12.53380 1.830930
## 6 2.03946 -4.51190 4.294950 7.86477 -0.462650 2.75597 3.93608 1.126650
## PC175 PC176 PC177 PC178 PC179 PC180 PC181 PC182
## 1 -3.76400 -0.535447 -11.07330 1.76248 0.624051 -2.89583 -2.42015 2.514120
## 2 -7.16571 -7.016380 2.08440 6.67386 -5.112840 1.51242 -1.64664 3.097170
## 3 -3.61787 5.369630 5.24604 -3.06494 3.809300 -1.91191 3.39554 9.510150
## 4 -9.15582 5.580740 -11.41580 6.42779 5.493620 3.79652 -9.30633 -4.461860
## 5 -2.85959 2.125850 4.97769 7.12612 -2.058650 7.41671 7.69743 -0.983422
## 6 -15.13650 2.047540 -2.34772 1.04986 -2.748390 -5.47255 1.12465 4.138490
## PC183 PC184 PC185 PC186 PC187 PC188 PC189
## 1 -5.995520 -11.90190 0.866295 -7.605300 0.7782150 -8.63817 7.25269
## 2 -1.345180 -2.32974 -9.487760 -0.410367 -6.7688300 6.15010 -10.13820
## 3 -0.146179 6.86151 -4.465230 -2.973670 -0.0564168 15.59260 -8.86422
## 4 -8.010760 -2.18257 10.499300 2.970940 2.1763600 2.85204 1.51159
## 5 -7.857970 -10.62900 20.013000 3.828900 -17.6671000 5.95325 9.98656
## 6 1.313790 -5.94180 1.723900 2.865960 0.2075390 -4.55138 -3.59848
## PC190 PC191 PC192 PC193 PC194 PC195 PC196 PC197
## 1 1.63709 -5.045830 -3.2185700 0.417710 -3.36301 4.92474 10.121400 -5.99975
## 2 2.72385 -2.836020 2.1050300 -2.721190 -5.30938 -3.41458 6.458500 -1.13680
## 3 -2.87082 2.858950 7.6057100 -0.772679 6.37692 1.80985 2.838930 3.87239
## 4 18.64120 -0.809942 -1.6737800 3.432980 4.89215 -2.16179 -3.820910 8.56546
## 5 9.40218 13.214300 6.3401900 0.982574 -13.26430 -7.43342 2.377820 -3.68807
## 6 1.79455 5.003800 -0.0460385 -0.700963 1.55132 -2.21679 -0.427266 -5.59028
## PC198 PC199 PC200 PC201 PC202 PC203 PC204 PC205
## 1 -8.329010 3.084970 6.06393 1.58537 3.45384 6.476630 4.77097 4.91583
## 2 2.413100 -4.290930 1.50770 -8.43390 -3.40258 1.262370 1.21918 5.12172
## 3 3.328860 0.627403 -9.22015 2.60473 -4.28864 0.765437 -8.21244 -2.89425
## 4 -0.950113 0.750771 11.32810 -6.16751 1.05991 -3.229290 6.46602 -1.44482
## 5 3.329510 -8.639140 9.98636 -5.50307 9.32961 1.189780 -6.69372 1.88490
## 6 1.358300 1.322150 2.11656 -4.30790 4.25234 1.863890 -3.49724 -1.83016
## PC206 PC207 PC208 PC209 PC210 PC211 PC212
## 1 -3.48199 -5.29132 -0.0542595 2.504250 -1.55845 2.46892 5.970370
## 2 -6.61383 -6.74388 -4.1538400 -1.301270 2.29149 6.68704 4.780920
## 3 3.86647 -6.52522 -9.4877500 -2.383810 -1.46861 3.97522 -0.705368
## 4 -2.74389 -1.18694 5.0295400 -3.094740 -9.47931 -15.12050 -2.059670
## 5 -10.29620 2.54475 -16.3906000 -9.532990 -10.69040 -1.68951 -2.927220
## 6 -6.07409 4.54792 0.4110090 0.404948 -3.72848 -3.42834 -5.187530
## PC213 PC214 PC215 PC216 PC217 PC218 PC219
## 1 -2.42070 6.48670 -3.973550 -4.7721600 4.660840 -2.60235 -14.653100
## 2 -4.30141 -2.98673 -8.709530 2.0847900 -12.326900 -1.48172 -13.695600
## 3 9.00043 3.57021 -2.005140 -1.7206000 5.750570 2.90068 0.209276
## 4 -2.48908 -8.69769 -4.212810 6.6498400 -11.340100 6.09299 1.147360
## 5 1.05449 -14.73440 8.896820 -2.1816200 0.569268 3.43031 -7.049560
## 6 -6.58644 2.32351 0.870668 -0.0806979 -1.284990 3.12712 4.691680
## PC220 PC221 PC222 PC223 PC224 PC225 PC226 PC227
## 1 5.32424 -1.81868 3.822470 -0.275304 2.907750 3.842280 -8.259580 -3.42201
## 2 -2.74035 9.94432 0.453123 4.848000 3.556840 1.961970 -4.038620 -4.46039
## 3 4.13684 4.42842 -0.313577 3.765110 4.595700 4.061720 0.162896 10.20030
## 4 -7.18844 -8.60654 -1.823810 -2.210670 0.117209 3.811730 -0.562794 -5.90821
## 5 5.25829 2.68741 -2.751040 10.365400 1.267740 0.663535 -2.298980 -2.41475
## 6 1.30701 3.64990 6.313110 -0.895773 0.706628 -2.402200 2.331180 -3.94659
## PC228 PC229 PC230 PC231 PC232 PC233 PC234 PC235
## 1 -5.75789 2.82847 -6.05264 3.202620 2.764870 3.180620 0.844975 -1.732440
## 2 2.37753 -1.27006 -6.63598 -0.862593 -4.765190 2.720740 -3.293410 2.199580
## 3 -2.42518 -1.59755 3.92775 5.014570 0.416147 -0.379688 -2.750140 -0.428346
## 4 2.98621 1.76162 -12.24470 0.691565 -4.574500 4.282660 6.595130 10.849900
## 5 -1.38333 -1.52500 -2.46723 -1.924180 4.370950 5.731540 -7.233430 -1.737960
## 6 1.18406 -1.30812 2.80719 -1.418090 -1.958860 5.774980 -1.396080 3.763940
## PC236 PC237 PC238 PC239 PC240 PC241 PC242
## 1 1.50830 0.0188626 -3.241340 -2.088240 -0.145485 0.442058 -3.695300
## 2 -1.39437 -4.1411800 1.810060 0.452486 3.708480 -1.143840 -1.412840
## 3 2.13987 -1.3277700 0.214955 -2.610540 0.547971 -1.712250 2.423510
## 4 4.37107 -1.8006500 4.993960 4.129340 4.174540 0.943437 -0.612052
## 5 3.19083 4.6464400 -0.664133 1.854380 -10.472100 0.237792 6.324880
## 6 -2.99110 1.9328700 6.188620 -4.508480 7.227320 -5.453790 -3.312970
## PC243 PC244 PC245 PC246 PC247 PC248 PC249
## 1 -0.633572 -10.757000 10.408100 -2.937290 5.34183 -0.889481 9.070440
## 2 3.575110 0.545916 7.352760 -0.644789 4.83957 1.543980 -1.261820
## 3 -1.275890 -3.528040 -0.899813 -1.176770 -1.89888 -2.911280 -4.947130
## 4 1.588890 1.716130 -6.014590 -7.631690 2.20297 4.488490 0.172609
## 5 3.029960 1.477930 -1.065330 -0.907359 -7.25785 -1.496780 -6.771940
## 6 0.850354 -3.366880 -2.951220 -6.438890 2.56175 -1.645320 1.155130
## PC250 PC251 PC252 PC253 PC254 PC255 PC256
## 1 5.645780 2.775420 5.36163 3.207840 -5.556680 1.906810 3.29164
## 2 3.980440 -0.792701 3.09064 -1.012240 6.586170 7.407260 -4.00832
## 3 -4.385010 2.395610 3.62373 0.312608 -0.915628 -6.904650 -3.04102
## 4 2.945810 -2.617730 -3.31902 6.399660 -3.338150 8.499030 3.04215
## 5 -5.018150 -6.278210 -1.69965 -4.143880 4.157770 5.141980 5.47467
## 6 0.286806 -1.717780 -7.71334 1.654840 5.777410 0.100914 3.99665
## PC257 PC258 PC259 PC260 PC261 PC262 PC263
## 1 1.908170 -0.346729 -2.068380 -0.454116 2.8275300 -4.2795000 2.68967
## 2 -2.836050 -5.521990 2.869430 1.831010 -0.8361630 0.6834880 4.36687
## 3 3.341050 1.700230 -1.203630 8.805090 -1.1615000 -0.0520601 -11.08250
## 4 -0.223479 -4.666480 -1.459280 -3.822090 0.0320726 6.3173300 6.13062
## 5 -13.373000 2.784190 -0.888151 3.354070 2.4945800 1.0092500 6.92787
## 6 -1.723720 -5.078500 -0.808831 1.395500 -2.6842600 0.9101180 2.47404
## PC264 PC265 PC266 PC267 PC268 PC269 PC270
## 1 -3.896970 -7.424880 -2.61903 -1.80304 -1.432350 0.302514 4.886840
## 2 8.623130 9.048980 -12.18990 2.79658 -2.591140 5.860040 2.154690
## 3 -2.877490 -3.986740 3.98942 -1.88482 -0.159315 0.690177 2.619790
## 4 -4.059760 9.016520 -7.47800 3.57368 -9.125700 -3.264310 8.668520
## 5 -0.308475 -8.934990 5.28209 7.24398 0.887045 -3.539820 -1.098510
## 6 -0.105430 0.872292 4.34957 -3.08185 2.435040 1.939330 0.502166
## PC271 PC272 PC273 PC274 PC275 PC276 PC277
## 1 7.881900 6.471360 -0.2928420 -2.40316 1.5879000 7.22920 -5.93816
## 2 -0.698949 1.328250 -4.8536200 -6.92171 -2.1934300 -3.14084 -4.78602
## 3 -0.594129 -7.201850 -5.2439200 2.54604 -0.0110208 -6.46526 1.95366
## 4 0.632097 2.090430 4.6021000 4.15350 -2.5705000 -1.12302 1.11706
## 5 -1.950900 2.576340 5.5259800 -4.47285 2.6125900 3.35120 4.27327
## 6 -5.470910 -0.504452 -0.0128675 -2.92769 -3.1319900 3.44109 -2.47637
## PC278 PC279 PC280 PC281 PC282 PC283 PC284
## 1 4.099100 -7.48571 -0.167417 -4.87263 3.045790 4.1147900 -9.146650
## 2 -7.241290 -1.50634 5.497440 -1.74617 0.592121 1.3182200 0.345064
## 3 -2.509380 5.66482 10.094600 3.69168 8.531050 -1.6317600 2.625820
## 4 -9.687600 -6.44805 -8.565540 -7.51890 1.679170 9.6135700 -0.327262
## 5 0.582061 2.59476 3.509210 -5.19840 -5.738880 0.0458189 1.256320
## 6 0.557862 2.20280 1.134320 0.56882 -0.633496 -3.2924300 2.158030
## PC285 PC286 PC287 PC288 PC289 PC290 PC291
## 1 0.454188 4.174470 0.237223 0.863101 -1.555120 5.878520 5.212820
## 2 4.232350 3.455010 4.458980 -4.171710 -1.518360 -0.932421 -4.366360
## 3 0.999888 -4.805370 7.659070 6.602280 3.226320 3.734250 -1.617640
## 4 0.308426 4.786080 4.799290 3.504770 0.738245 -0.682937 4.971720
## 5 15.276800 1.583850 -1.758480 -7.976630 5.585800 -4.134900 -6.646940
## 6 4.556510 0.369715 -1.702920 -2.950710 -2.221540 7.283740 -0.979947
## PC292 PC293 PC294 PC295 PC296 PC297 PC298
## 1 3.2347100 -1.511000 8.81482 3.072580 5.3515800 -0.379514 -5.616130
## 2 0.0841457 1.997390 1.07355 2.846150 -1.1478300 -6.357120 -4.880070
## 3 -1.2987100 2.867530 -15.14560 4.212820 0.0752962 -1.180580 3.794550
## 4 0.5199970 3.782840 12.99830 -0.107392 -2.0496400 -3.060520 -7.168100
## 5 4.6260600 8.120040 5.14818 -5.493950 4.1595300 3.369220 6.329190
## 6 -1.6966700 0.487841 1.53241 -1.820870 -2.1325100 4.722500 0.398797
## PC299 PC300 PC301 PC302 PC303 PC304 PC305 PC306
## 1 1.861230 3.74775 -2.460380 -2.775260 1.41265 -2.971920 2.068410 1.36384
## 2 6.735010 -8.45645 1.775710 -3.849100 3.05681 -9.581560 -5.319420 -8.04805
## 3 -0.694424 1.22018 -7.041780 2.304910 -1.24076 -0.331240 2.131310 -5.59945
## 4 2.861800 -8.04751 0.431195 0.353142 2.29742 -2.193920 6.040010 8.14027
## 5 -2.040470 1.27581 5.150070 0.908976 6.11362 0.956813 4.235160 2.17671
## 6 -1.420090 -3.30534 4.593290 -4.022630 -1.87184 -2.147600 -0.095247 5.30675
## PC307 PC308 PC309 PC310 PC311 PC312 PC313
## 1 2.666430 -8.91234000 6.413650 4.85648 -1.01690 -12.27880 10.88790
## 2 0.779983 -0.26894600 -2.635360 1.02629 6.27035 1.44025 -1.29713
## 3 -2.735310 2.14209000 -0.543058 -10.04880 -3.71257 -6.62850 -8.11757
## 4 2.625690 -0.00487239 3.818260 -1.98443 3.03673 -7.61543 -1.92937
## 5 -2.550460 5.10258000 9.863550 5.33091 -2.23226 1.08442 7.03003
## 6 3.398430 -3.44865000 -1.505560 4.73401 5.69713 1.02146 1.17421
## PC314 PC315 PC316 PC317 PC318 PC319 PC320 PC321
## 1 6.27147 -7.87005 -2.19640 3.75274 -2.35320 -0.154202 -4.74634 6.04989
## 2 1.98465 -2.75595 1.02746 -5.13690 -5.21263 -4.834630 2.48781 2.85248
## 3 6.46946 4.07161 5.10265 1.22482 -3.74938 2.692430 3.66822 -5.38208
## 4 -4.51037 -1.79956 -2.52217 4.26051 7.77052 -6.219070 -9.51949 6.37174
## 5 1.34634 2.01760 -1.35013 -1.03651 10.55990 -6.811470 -2.31631 -1.99656
## 6 -1.90534 1.38766 1.74802 3.24850 -3.13560 1.777040 1.93866 -4.47499
## PC322 PC323 PC324 PC325 PC326 PC327 PC328 PC329
## 1 -9.10108 3.549700 -5.680010 4.48874 1.181740 -3.97591 1.257650 5.58334
## 2 -7.64026 0.230071 0.342849 7.77410 -4.722510 -2.35469 -0.361317 -2.08259
## 3 1.01563 -1.167460 0.789843 -10.34660 -0.977359 7.06295 0.830032 -1.26368
## 4 3.48818 3.681180 4.046320 9.38505 4.554350 0.47823 4.633000 -3.47130
## 5 2.99746 -3.148820 -2.146860 -8.20774 -0.570330 -4.22945 3.846020 -1.66290
## 6 3.75454 -1.264480 0.651504 3.85269 0.445843 1.18858 -4.492820 1.95275
## PC330 PC331 PC332 PC333 PC334 PC335 PC336
## 1 4.725620 0.689088 3.824080 0.1544140 10.934100 1.960290 4.24537
## 2 4.529400 5.010020 0.113042 -2.6587500 1.395130 0.909263 4.34849
## 3 0.296213 -6.934480 -4.635410 0.0876124 -4.181550 5.729710 -8.11063
## 4 3.086830 -7.170510 -1.702540 0.3362310 -1.141530 -9.095320 8.75012
## 5 11.527500 -0.808551 3.888850 -0.8690900 5.919560 3.828840 -6.15706
## 6 -4.084990 4.078120 0.842066 1.1584300 -0.949974 -2.559420 -4.09158
## PC337 PC338 PC339 PC340 PC341 PC342 PC343 PC344
## 1 6.19540 1.945560 -1.28862 -6.573360 -4.13726 -8.009540 -5.145880 -6.836020
## 2 -2.72169 3.171410 1.48908 11.547600 -6.78609 -0.150857 -0.738223 -3.725380
## 3 -8.39591 2.084090 1.21955 -5.032480 -2.83132 -0.963568 -4.870210 6.383300
## 4 0.75686 -9.706950 10.48390 0.494418 9.77365 3.238740 5.351960 -10.035500
## 5 12.14440 0.260172 -3.43320 4.804190 8.37955 -1.097120 0.981563 3.782490
## 6 3.22115 0.742774 -1.06034 -3.422170 1.74194 -0.811232 -1.507280 0.945183
## PC345 PC346 PC347 PC348 PC349 PC350 PC351 PC352
## 1 15.642800 5.91846 7.50982 -2.67570 15.83610 -5.4801600 4.11919 -1.7759500
## 2 0.491086 -2.19816 2.25648 -12.88000 -1.21171 5.9196000 2.78346 -0.7777890
## 3 0.731040 5.25730 -4.80865 8.05605 -6.00373 2.1341400 5.14207 8.5583400
## 4 6.410900 -4.66821 3.41355 -1.43975 10.43480 3.3980600 -9.29563 -5.0634000
## 5 -0.942191 3.12428 2.47403 7.10411 5.81264 -0.4196320 -5.08933 -0.0112671
## 6 -1.250920 2.68590 2.13484 -2.36689 2.07554 -0.0453647 -1.24654 0.7466450
## PC353 PC354 PC355 PC356 PC357 PC358 PC359
## 1 -6.271610 -3.6575200 -2.02014 5.899610 8.67169 11.37450 4.8255700
## 2 1.483500 -8.3701500 5.59213 2.834920 -5.95256 -1.84332 6.2469700
## 3 -0.886421 3.0260800 1.24496 -1.768100 -2.88226 -1.34889 -5.7470200
## 4 3.287070 0.0440818 -2.25638 2.248700 -3.60181 2.44536 9.8320600
## 5 0.541913 -9.3910500 -11.30110 -0.308222 10.47090 12.93630 5.2654400
## 6 1.377450 2.8188000 1.07941 -4.605620 -3.07809 1.60296 0.0516436
## PC360 PC361 PC362 PC363 PC364 PC365 PC366 PC367
## 1 4.97528 1.92816 3.26170 -5.13914 8.99605 5.77021 -0.8794830 1.29226
## 2 15.77110 -4.78012 4.94689 1.43904 9.70702 5.01752 3.8053200 -10.43710
## 3 0.52344 -4.92525 -7.49090 7.74957 -2.71925 1.01535 -3.0264500 1.40407
## 4 -1.16671 3.95125 1.05050 -4.05431 3.17385 9.23509 -1.8440800 -1.28007
## 5 3.62068 1.77314 6.63181 -14.03620 -1.75186 -6.48401 -2.4602700 4.02054
## 6 -5.54325 -1.22803 -4.00229 2.64747 -3.82953 -3.51300 -0.0722511 -1.08497
## PC368 PC369 PC370 PC371 PC372 PC373 PC374 PC375
## 1 5.05176 -2.280400 -1.28818 -1.98936 2.25610 -10.4267000 -8.52831 -4.14355
## 2 -9.02535 3.330870 4.90899 -3.33300 -2.10591 4.8554800 -1.97702 5.05386
## 3 8.59315 3.874120 -2.73169 4.81824 -2.00050 1.2452200 8.20590 -9.49484
## 4 -4.29816 -0.900339 5.13210 2.73667 -5.40409 1.0662300 5.28201 -2.18340
## 5 -2.78301 -10.314800 -1.20401 9.62003 7.94152 4.7686100 2.80937 -11.35100
## 6 -3.88405 1.491120 4.43727 -2.91086 -3.67505 0.0924878 4.06388 -3.60469
## PC376 PC377 PC378 PC379 PC380 PC381 PC382 PC383
## 1 1.93287 -3.791200 -6.18594 -4.41144 7.48194 22.09360 4.31477000 5.48440
## 2 13.54250 9.281260 -9.10043 10.85510 9.88682 3.00223 -0.00964726 5.88384
## 3 -2.38920 -0.871791 3.70292 -1.97158 -7.64226 4.68540 -5.15182000 2.47221
## 4 6.10118 -4.475620 2.28105 4.29631 4.57648 1.05555 -1.32495000 11.20620
## 5 2.11811 3.496590 9.99898 3.49704 -6.31428 -12.32150 1.98178000 -3.65927
## 6 -2.60094 -3.825030 -3.28517 3.45940 -1.60285 -2.71120 1.30714000 4.17026
## PC384 PC385 PC386 PC387 PC388 PC389 PC390 PC391
## 1 -5.28089 -5.80310 -10.29710 -2.42917 3.15763 2.783010 -3.184410 2.931380
## 2 10.68720 8.17930 2.16115 6.30984 -11.06560 5.941180 -2.447950 5.736400
## 3 -9.00153 -7.83689 -3.41300 5.76060 -2.89051 3.687680 6.391530 -5.684660
## 4 6.98265 5.46019 7.93793 3.34408 -3.52210 0.375499 5.429060 -0.179978
## 5 -2.23224 -1.23719 2.70152 -7.85225 15.14510 0.535925 2.373610 -6.279400
## 6 2.07067 -0.71867 2.62505 2.06858 -4.47926 3.610480 0.586538 -3.261380
## PC392 PC393 PC394 PC395 PC396 PC397 PC398 PC399
## 1 1.62689 -1.12392 -5.635980 2.17522 -9.73348 -3.600210 6.35699 -0.612908
## 2 2.18451 9.80940 -4.209880 7.96108 -3.24413 0.656293 -1.18129 -2.038950
## 3 1.33877 2.12779 3.777860 -2.75396 2.24012 -6.166620 5.39365 -4.671400
## 4 1.81005 -1.17723 -1.805410 -9.10131 6.23361 2.355480 -2.08915 1.329180
## 5 1.70402 -1.28832 -0.523205 -15.77560 -1.26890 -9.754110 -10.30850 -0.843023
## 6 -1.26336 5.35840 0.470259 -2.92586 5.59011 5.356410 1.32668 -0.179859
## PC400 PC401 PC402 PC403 PC404 PC405 PC406
## 1 -5.80481 4.66318 3.906940 -3.77582 10.973500 -13.75900 -9.040080
## 2 -13.22000 7.09513 12.945200 -6.56272 8.047690 -2.30640 4.000400
## 3 -4.02758 4.53056 2.418540 6.50730 -0.782111 -1.88766 1.221530
## 4 13.66800 -10.39270 1.209900 6.06472 5.519690 -2.19619 -10.185100
## 5 5.65307 2.39825 -0.964182 2.68066 -6.448430 6.36762 2.408300
## 6 -3.10339 0.56562 -0.322496 3.43305 -3.003290 1.60209 0.681346
## PC407 PC408 PC409 PC410 PC411 PC412 PC413
## 1 -10.267300 -15.47170 -12.94810 14.20050 -6.03461 -6.497190 -7.971070
## 2 -5.511570 -1.86883 1.98164 0.48436 1.67525 7.565890 3.094270
## 3 10.794200 -4.99562 -4.91824 -1.54396 10.81960 3.548870 3.991310
## 4 -0.661768 11.45950 -4.36517 -7.25156 -5.90374 3.113560 0.613933
## 5 4.552980 -1.58269 3.86753 11.88360 -14.28830 1.956870 9.093050
## 6 1.607460 3.66859 3.26148 -3.45277 4.53780 0.879847 -0.969275
## PC414 PC415 PC416 PC417 PC418 PC419 PC420
## 1 6.745260 7.786240 2.539430 7.386510 0.796349 -8.530330 1.31143
## 2 -0.573863 1.438410 10.799800 -4.000170 2.457460 -2.701190 -14.11400
## 3 -4.907460 -0.238353 10.316300 1.994040 -4.105150 -5.300350 3.60273
## 4 1.147570 5.171590 -2.661040 -1.330040 -4.218330 4.052830 -2.29086
## 5 0.644487 2.719560 -2.868220 -8.205280 8.984360 9.502400 11.44930
## 6 0.498546 -0.795544 0.731469 0.284111 -0.973679 0.605435 3.13568
## PC421 PC422 PC423 PC424 PC425 PC426 PC427 PC428
## 1 5.932930 10.28400 3.00428 3.491630 -1.241720 0.756074 -3.69073 4.940750
## 2 6.267310 -6.59150 -11.83540 1.877340 3.315520 -1.095360 -9.36767 -1.251710
## 3 -1.315130 1.71957 3.83480 -0.542492 1.522560 1.758520 -3.09330 1.202350
## 4 10.180800 -5.05624 2.72519 0.814774 3.640540 -0.199441 -5.78517 11.372400
## 5 -2.604610 -6.54816 7.74349 -5.643030 -6.290190 7.583280 1.61067 -4.844120
## 6 0.461433 -2.24463 2.94083 2.804820 0.806128 2.214190 1.94373 -0.398188
## PC429 PC430 PC431 PC432 PC433 PC434 PC435
## 1 4.50463 9.871870 0.863604 -6.77771 0.367013 1.44034 1.084970
## 2 -7.48562 -7.558070 -7.248220 3.82590 3.491890 -0.77650 -9.518170
## 3 -9.53804 8.274840 5.279000 -2.58229 -2.891920 -1.22442 8.871370
## 4 -2.46774 7.618920 3.777050 6.55248 -7.665490 8.40682 -0.231761
## 5 -3.63503 -1.699960 -7.177160 8.83438 -4.696010 7.11644 0.771812
## 6 -4.81922 -0.120496 -0.321027 -1.81872 1.108510 -0.46094 0.710387
## PC436 PC437 PC438 PC439 PC440 PC441 PC442
## 1 1.88670000 7.91132 0.673881 6.1244400 -9.399960 -5.818550 11.57570
## 2 0.00992825 -2.80371 -4.358480 -6.3011400 7.145170 6.002420 -6.78048
## 3 -6.09916000 -7.93747 2.272930 0.0476828 0.137623 -0.773906 1.89659
## 4 -1.07342000 11.85260 7.378460 -4.0792400 0.583603 -8.627960 5.53160
## 5 0.97579100 -1.60118 -11.032900 4.8823600 3.963900 -0.874974 8.81791
## 6 1.83387000 -3.83216 -2.395240 1.7101300 0.200175 -3.708030 -2.63624
## PC443 PC444 PC445 PC446 PC447 PC448 PC449
## 1 -4.8587400 2.08689 4.766590 5.933090 -7.266390 -12.715800 -1.758400
## 2 1.3783900 -12.17750 -0.816036 0.797621 -1.594840 -0.147152 0.436357
## 3 6.5496200 -2.87627 -3.995170 4.005200 -2.612150 -2.000990 6.135440
## 4 -8.6033800 8.32401 1.399960 -6.030020 -9.848930 8.411860 0.306252
## 5 2.3463600 6.74193 7.527560 -3.834920 0.196744 -7.705480 -5.067370
## 6 0.0120264 1.95481 2.120640 -0.525637 -3.309470 0.620707 0.632064
## PC450 PC451 PC452 PC453 PC454 PC455 PC456
## 1 -11.689200 -4.605370 9.33549 -7.24484 -4.4733200 1.07824 8.2099200
## 2 -10.858200 -11.098400 6.48050 -7.65478 5.4590500 -3.44983 7.1966500
## 3 -3.026670 -2.304850 2.88004 -7.15739 -4.8599300 -2.40202 -3.7875900
## 4 0.819696 0.791209 -2.08938 1.07110 -2.8286500 -6.30264 0.4335760
## 5 13.739400 5.115580 -2.93432 -3.58422 -4.0064800 4.25010 1.1255600
## 6 0.293074 -1.371050 5.19409 -3.59776 0.0670635 1.01360 0.0164702
## PC457 PC458 PC459 PC460 PC461 PC462 PC463 PC464
## 1 9.60658 5.973530 7.29880 10.782200 -5.00370 -1.42603 5.32504 -1.93291
## 2 -4.23453 -1.238000 4.32960 -10.681800 -2.41898 5.43427 2.42697 9.20443
## 3 1.93164 -0.403249 -1.60155 -9.853220 8.67881 -3.66926 2.95450 6.17350
## 4 1.91621 -5.543340 -11.11750 0.815227 -4.21960 2.59885 -2.49242 8.08769
## 5 -5.60659 -4.465390 -2.15803 -0.138856 -7.58051 1.66757 3.00453 -4.46139
## 6 -1.81341 1.627030 3.22371 -2.616790 -2.89281 2.25168 1.46636 6.23832
## PC465 PC466 PC467 PC468 PC469 PC470 PC471
## 1 -0.8595550 -8.121010 4.350920 -8.19652 -6.18350 4.31932 2.25756
## 2 -1.6582400 -0.759506 -1.826000 1.28070 4.85208 -14.27040 -3.75016
## 3 -3.1546500 3.949060 0.833705 8.84442 -11.63850 3.87160 8.93423
## 4 4.5676100 2.243230 2.695440 4.38762 -2.73565 10.22340 9.48760
## 5 6.4021700 7.530270 -4.207430 5.05809 3.74303 5.56313 -11.32940
## 6 0.0534804 -2.098820 0.879289 7.01229 -1.25283 1.59843 2.22859
## PC472 PC473 PC474 PC475 PC476 PC477 PC478
## 1 7.251250 -10.5307000 -13.16660 7.57463000 0.8817980 1.615600 3.56190
## 2 -6.001540 5.1884200 -1.68037 -0.00133996 -11.3358000 -0.496895 -5.49822
## 3 0.875364 -7.8378000 -5.09272 3.23684000 1.9439400 -1.600750 7.20952
## 4 -2.734310 1.1012500 1.62950 1.95918000 -0.0215181 -6.678140 11.79640
## 5 -10.640100 10.7421000 8.44604 5.37840000 -7.9373300 -5.275570 -6.24019
## 6 -4.977770 -0.0474631 2.18063 2.81963000 0.4887730 -1.164540 -1.01950
## PC479 PC480 PC481 PC482 PC483 PC484 PC485
## 1 -2.480880 -3.143040 4.275180 7.38254 -8.756690 -6.248620 12.62860
## 2 2.959950 7.075270 10.358600 -1.92343 9.125270 -3.130580 1.12998
## 3 0.737442 4.028600 -2.381780 -2.44927 2.394750 6.570870 1.18524
## 4 7.204280 -9.550180 -1.384020 2.98877 -7.534780 12.913600 2.40848
## 5 2.790070 1.301020 -9.736830 -9.14713 1.611010 1.028350 3.16043
## 6 -0.139831 0.870771 0.122987 2.90008 -0.932203 0.602586 -2.58757
## PC486 PC487 PC488 PC489 PC490 PC491 PC492
## 1 8.3506700 -4.273670 -2.285900 14.299500 -0.645002 1.525670 10.16080
## 2 -13.7747000 5.280480 -0.302640 -13.785400 0.930290 0.357322 1.94235
## 3 -2.2216700 -1.484910 -0.478184 -0.936645 -2.457360 1.994800 -2.99770
## 4 0.0860032 -2.037850 -1.847600 -6.067140 8.656050 3.329810 -7.81211
## 5 -1.7489000 10.983400 1.928100 0.709953 -6.431470 1.777930 8.80344
## 6 -1.6356200 0.587272 -1.084890 -2.117870 -2.745180 2.366730 1.15418
## PC493 PC494 PC495 PC496 PC497 PC498 PC499 PC500
## 1 7.37287 0.176354 4.677560 1.84540 -3.119430 -4.82422 -4.018130 -2.041050
## 2 -5.93754 1.039890 3.069200 3.23511 6.167540 -2.54592 -7.767320 2.111700
## 3 -0.45086 2.726000 8.473720 -11.14640 -9.234550 6.74055 -1.121020 -8.940670
## 4 -3.17611 -3.790700 0.453662 -9.07701 -8.398090 12.10130 0.889802 8.020890
## 5 -1.76968 -6.958450 -0.673026 2.90539 2.844770 -3.53862 -8.190720 7.723010
## 6 -1.16890 3.543110 2.554090 2.65964 -0.569137 -1.28228 2.266770 -0.276759
## PC501 PC502 PC503 PC504 PC505 PC506 PC507
## 1 -11.466600 4.61609 1.5441200 -2.42249 0.508865 -10.256700 -6.23454
## 2 3.524180 -1.77061 1.4710500 -2.67078 -2.123460 18.583800 2.30523
## 3 -5.712750 4.13302 1.1619900 -7.85439 -3.684590 -1.116720 4.81702
## 4 3.111240 4.54086 -4.0525000 2.62071 2.244780 -7.115510 2.93446
## 5 -1.523200 6.64615 -0.0821228 -2.74096 5.005300 -2.443910 2.80938
## 6 0.116642 -1.79876 2.2429900 1.43504 0.820537 -0.538336 2.06717
## PC508 PC509 PC510 PC511 PC512 PC513 PC514
## 1 -11.87630 -10.979200 6.691420 3.878870 -7.65737 -1.319220 6.557230
## 2 -3.44116 9.738840 -10.613500 -4.610280 13.61980 3.491740 0.255563
## 3 9.32026 8.795590 5.135210 -0.590614 5.46324 -0.372639 -1.104600
## 4 15.57590 -0.628516 9.957560 -14.486000 -1.74275 9.748290 -10.349800
## 5 -4.20695 1.157280 -5.326120 2.030270 -4.28111 2.153750 -7.172970
## 6 3.51556 2.089350 0.911302 0.985379 -1.40459 1.728870 4.395040
## PC515 PC516 PC517 PC518 PC519 PC520 PC521 PC522
## 1 -5.64497 4.889560 -6.77573 -7.80096 -6.49570 -2.299980 6.26671 -1.31589
## 2 11.18980 0.434659 -7.05622 7.41160 6.23250 0.715959 -12.45000 5.41268
## 3 5.78702 3.711290 7.03477 -2.30366 -2.89309 -5.057240 -3.58941 -4.90184
## 4 -7.00253 -3.985060 5.39715 3.04016 -8.84464 -6.509750 2.45062 1.76711
## 5 12.66740 -0.865975 6.40084 4.60174 -1.25770 8.229480 2.34765 -5.37326
## 6 1.73510 0.490490 2.10920 -2.62063 1.46998 -0.332803 1.30543 -1.07806
## PC523 PC524 PC525 PC526 PC527 PC528 PC529 PC530
## 1 2.945230 1.138730 -5.01213 -4.44029 10.943300 -1.93990 -5.86642 7.822330
## 2 -1.000660 0.874072 7.03955 6.47242 -9.324960 2.96740 5.53623 -17.398500
## 3 3.367260 -3.859840 11.02850 3.50521 1.829570 4.36137 5.49040 1.997790
## 4 -0.970277 2.748140 -9.13475 7.53063 -1.372760 -5.29689 7.10520 2.154380
## 5 -6.823530 -3.007080 7.92358 -9.34217 0.920635 9.77813 -3.83178 -2.551710
## 6 -3.014560 -1.846280 -2.01592 2.11683 1.404330 3.06232 1.12556 0.724992
## PC531 PC532 PC533 PC534 PC535 PC536 PC537
## 1 2.323650 5.801780 -10.26250 -4.90426 -2.992010 -0.971161 2.581790
## 2 6.846980 3.137600 3.26747 12.85440 5.447260 3.922390 -4.988790
## 3 -12.811800 -3.664390 7.13303 -7.04664 -3.836180 -5.814210 -0.255316
## 4 -3.514190 -4.665410 11.52220 -8.08442 -0.761186 -1.351220 -0.832300
## 5 5.560800 3.848090 4.30847 1.41211 2.398800 -2.827290 5.605320
## 6 -0.771688 -0.464441 -2.79526 -1.20121 -2.890720 1.716190 -0.618047
## PC538 PC539 PC540 PC541 PC542 PC543 PC544
## 1 -5.959560 -3.501270 -5.19679 0.794489 2.939730 0.300115 -0.372011
## 2 9.850100 1.798040 5.45823 2.245440 6.514940 10.342000 4.987210
## 3 -10.418700 1.877030 9.63294 -0.759066 -7.147520 -14.165300 1.656040
## 4 0.103646 4.092080 2.45996 -1.219830 5.036970 -1.929810 -0.558734
## 5 3.425080 0.461763 6.80273 -1.780450 -0.297549 -2.559580 4.184200
## 6 -2.669820 0.519892 -2.16603 0.222370 -0.555264 1.491080 1.899450
## PC545 PC546 PC547 PC548 PC549 PC550 PC551
## 1 8.596730 1.90680 -2.768640 3.892500 4.516370 3.753670 -1.818410
## 2 -3.373460 -6.47414 -8.070960 -4.283470 -1.319790 -3.101690 0.845312
## 3 -15.233900 5.33849 7.718630 4.104330 -3.104630 -6.484370 -1.206920
## 4 -4.923260 11.12820 0.777560 -12.153200 1.842120 -3.279480 7.573670
## 5 -0.249055 -2.89930 3.862600 -2.626490 0.115531 -0.304657 -3.409650
## 6 -1.699340 -1.89654 -0.990828 -0.248587 -0.366713 1.449200 -0.740623
## PC552 PC553 PC554 PC555 PC556 PC557 PC558
## 1 4.049500 -1.495450 -3.624230 -1.34729 -2.43588 1.804550 0.426419
## 2 -2.175330 7.641940 -1.313890 1.87972 2.78012 -3.094950 0.514300
## 3 5.954110 -14.187400 6.590610 -3.21690 1.15203 8.346890 -10.219600
## 4 -6.037380 0.958791 1.392710 5.49540 5.46041 -0.772404 8.587810
## 5 0.192326 -2.167610 -0.655948 -3.30742 -1.00708 -0.423641 -6.919280
## 6 -1.134870 1.823930 1.346370 1.15543 -1.46450 -0.383302 1.604070
## PC559 PC560 PC561 PC562 PC563 PC564 PC565
## 1 -0.888456 0.257742 1.038540 2.808230 -5.764230 -4.787640 4.876670
## 2 3.561430 -4.110920 -5.611150 -7.440650 10.514700 0.633140 -8.441390
## 3 -2.358930 -2.656010 0.185138 -1.445850 -14.351100 17.148300 11.326400
## 4 -3.561890 3.618090 -1.657060 1.604290 -1.112830 1.703900 -7.477850
## 5 0.117779 1.662100 1.995320 -6.466400 -3.115950 0.748186 7.706960
## 6 -2.316310 1.767560 0.239714 0.876397 -0.618647 1.905450 -0.306901
## PC566 PC567 PC568 PC569 PC570 PC571 PC572
## 1 -0.887465 -1.858450 -0.991821 -1.28782 -1.4095300 0.9321680 3.216610
## 2 4.588680 3.871030 1.648220 4.39023 -0.0919025 1.7332200 -5.565070
## 3 -2.986610 -8.071860 0.334433 -8.15559 5.5742400 0.0552569 0.404143
## 4 -2.704040 4.158040 4.521030 -2.94330 3.8994000 -1.2519600 1.594030
## 5 -0.855762 -1.864210 -3.602440 1.91566 -2.2370900 -0.7691340 -1.671210
## 6 2.520780 -0.519024 1.647360 -1.43841 2.0766900 -0.7250520 1.769540
## PC573 PC574 PC575 PC576 PC577 PC578 PC579
## 1 1.496480 -1.60765000 1.835570 -0.311554 1.6758400 1.44635 -2.656710
## 2 -1.825640 -3.48712000 -4.018310 -1.928760 3.8782400 -2.35807 -3.427110
## 3 1.656520 9.01008000 5.986710 8.663310 -1.4363500 2.52865 6.829940
## 4 0.271233 -0.82334800 1.392990 2.936590 -6.7774400 3.61576 1.093880
## 5 -0.623263 -0.00298675 -0.689481 -1.116620 1.4269300 -1.53743 5.411900
## 6 0.337914 -3.25942000 0.141403 1.424630 0.0278279 -3.64204 0.287007
## PC580 PC581 PC582 PC583 PC584 PC585 PC586 PC587
## 1 0.0364742 -0.673971 0.374328 -2.417630 -3.62892 -2.09375 -2.73483 -0.028330
## 2 2.7331800 -3.874590 1.700810 -0.382224 1.72387 4.34788 -2.56949 -1.864730
## 3 0.8755800 6.514110 -8.071340 2.443990 1.86322 -3.15398 11.21930 -2.462360
## 4 -1.3443800 2.392330 -1.364820 2.654820 2.92796 4.27132 2.94084 1.640400
## 5 2.0050500 -0.616753 2.817470 -1.002230 4.87625 -2.26725 4.54479 -0.719349
## 6 0.8808590 -0.149492 -0.638982 0.355468 1.05417 -1.38352 2.73011 -0.131356
## PC588 PC589 PC590 PC591 PC592 PC593 PC594
## 1 1.869680 1.046760 3.3797800 0.5383800 0.843932 1.5864500 -0.317305
## 2 3.769710 1.677300 -2.3516500 -3.0843900 1.391900 -1.9263600 0.749139
## 3 -11.209900 -7.578970 2.0437200 6.1070900 1.570600 -3.5635800 4.383560
## 4 -6.151220 -3.194720 -0.4319590 3.1596800 1.248980 -3.8401600 0.225465
## 5 -2.285420 -0.290165 0.3885680 0.0386186 1.339790 -0.5385550 4.759910
## 6 -0.215492 0.478535 -0.0664508 -0.0911585 -0.515249 0.0851132 0.918039
## PC595 PC596 PC597 PC598 PC599 PC600 PC601
## 1 -0.362545 -2.949700 -0.310311 1.668790 -2.523580 -1.5308200 -0.0377293
## 2 2.672630 1.273240 -1.482300 1.658700 2.208960 -0.5216440 -0.1209870
## 3 -2.354800 1.945270 5.952790 -4.633580 3.818910 3.0710100 0.7772900
## 4 -1.462290 2.769190 -1.792890 -2.236280 -2.578560 -0.5260140 -0.4195160
## 5 -2.123350 -2.003970 2.803530 0.577009 0.158948 2.5971500 0.0807232
## 6 -1.327320 -0.395127 1.411420 -0.992167 2.503670 -0.0913396 -0.1667980
## PC602 PC603 PC604 PC605 PC606 PC607 PC608
## 1 -0.304739 2.0450900 -1.732020 -0.111057 -2.124030 2.152210 -1.234130
## 2 1.805010 0.4760950 2.756400 2.929340 0.613717 -1.069050 -2.413670
## 3 -1.961670 -3.8785100 -0.403171 -2.773050 -0.821987 -2.087580 3.312900
## 4 -1.197300 -2.5238100 0.364061 0.456234 3.298650 -1.751270 0.102052
## 5 -1.098050 -0.0999618 1.809800 -0.971798 0.701285 0.195923 0.206270
## 6 1.285450 -0.1154760 0.909756 -1.508790 -0.719903 0.878298 -1.775600
## PC609 PC610 PC611 PC612 PC613 PC614 PC615
## 1 0.0955621 0.760985 -0.276028 -1.924340 -1.25493 1.147160 0.523616
## 2 2.2096100 1.552620 0.702700 1.197280 2.83933 -1.803140 0.294197
## 3 -1.7223200 -3.096790 1.704880 -1.673690 1.30840 -0.975726 -2.455110
## 4 0.4960720 -3.631510 -2.145910 0.706432 1.99772 1.129210 -1.991370
## 5 -1.3036800 -1.539820 1.114480 1.376400 -1.09089 -0.285640 1.223230
## 6 0.2844120 -1.458870 -0.345730 1.663580 1.16857 0.600104 0.706003
## PC616 PC617 PC618 PC619 PC620 PC621 PC622
## 1 1.8332200 1.562370 1.612280 -1.175330 0.0216826 1.006140 0.929279
## 2 -0.9053970 -2.024050 -1.980180 1.601660 -0.0189202 1.478750 -1.094460
## 3 -0.8383630 1.191360 1.355010 0.787760 0.5707440 0.313855 2.261460
## 4 -0.8475500 -0.421698 1.163030 0.909591 -0.8847680 1.234950 0.501141
## 5 0.0416817 0.737694 -0.367072 0.422090 1.4115900 2.007890 0.573467
## 6 0.8153970 0.423953 0.711886 -0.503651 -1.3974000 0.108223 1.066580
## PC623 PC624 PC625 PC626 PC627 PC628 PC629
## 1 -0.9999710 -0.228736 0.704697 1.544990 0.0104346 1.365800 -0.202931
## 2 0.8966440 0.772229 -1.372110 0.586026 1.3146800 0.465005 -1.468590
## 3 -0.0854745 1.276980 -1.224610 -0.936766 0.3474380 -0.498030 0.781507
## 4 -0.9593130 -1.268390 2.439610 -1.104860 -0.2633000 -1.856460 0.707273
## 5 -1.0007400 -0.563482 -0.245787 1.427570 -1.7325300 0.187427 1.367160
## 6 1.0558100 -2.575940 0.430193 0.311661 0.0586203 0.996500 0.153715
## PC630 PC631 PC632 PC633 PC634 PC635 PC636
## 1 0.992440 -0.08316820 0.1058400 -2.081650 0.859041 0.2855410 2.134560
## 2 -1.141770 0.97480700 0.8667930 2.065520 0.871858 0.8486220 -0.593652
## 3 0.709320 -0.00153648 -0.9135030 -0.356353 -1.326770 -0.6266120 -1.961070
## 4 0.532257 -0.17556200 0.3786760 0.878349 1.323040 -0.2336970 -0.631268
## 5 -0.776238 -0.79250300 -0.0519582 1.281300 -2.137560 0.0859874 0.104304
## 6 0.369187 1.09454000 0.3900930 -0.711197 -1.871700 2.4187500 0.847491
## PC637 PC638 PC639 PC640 PC641 PC642 PC643
## 1 -0.4754800 0.119019 1.084280 -1.0022400 -0.134867 0.237516 0.913197
## 2 -0.0387455 0.773562 0.075502 -0.0604612 1.178640 -1.056790 -0.876658
## 3 -0.5904150 -1.381190 -0.123490 -0.4848100 -0.799971 -0.118841 -0.716481
## 4 0.4127430 0.214456 -0.462740 -0.5274100 -2.357560 -2.198860 -0.220158
## 5 -0.3526710 0.757678 -0.148562 0.3279850 0.465572 0.841716 0.112466
## 6 -1.1454800 0.593687 2.358600 -1.3738800 -1.556790 -0.719843 2.627710
## PC644 PC645 PC646 PC647 PC648 PC649 PC650
## 1 -0.0298159 -0.0301118 0.7362140 0.423934 -0.7878370 -0.0601018 -0.1994670
## 2 -0.4515030 0.4289420 1.2191200 1.215450 0.4249400 0.1166390 -0.3358370
## 3 0.3272280 -0.2396700 -0.0966545 -0.200439 -0.0777307 -0.3134910 -0.3891740
## 4 -1.0070500 0.4317410 0.3506900 1.975630 0.0405942 -1.4282100 0.0747299
## 5 -1.0477900 0.0991487 0.2981610 -0.493125 -0.2079020 -0.2323220 -1.1393900
## 6 1.1811300 -0.1388410 1.5783100 -1.528600 -0.8320670 0.0684478 -1.5233300
## PC651 PC652 PC653 PC654 PC655 PC656 PC657
## 1 0.2841670 -0.345722 -0.3721110 -0.176386 0.353736 0.633902 0.486523
## 2 0.4595890 1.527450 1.3729600 0.588080 0.904454 0.113773 0.466722
## 3 0.8857030 0.627657 0.7609450 0.841837 0.349403 -0.138947 -0.989558
## 4 -0.1274200 0.738954 -0.1443750 -0.387653 -0.690265 -0.824392 -0.577596
## 5 0.0882755 -0.301358 0.0866904 0.863115 -0.581216 -1.302260 0.402338
## 6 -2.5241100 0.432479 -2.9113700 0.597232 1.346460 0.132135 3.120740
## PC658 PC659 PC660 PC661 PC662 PC663 PC664
## 1 -1.412490 0.3808940 -0.5036700 -0.749166 -0.672211 0.1084770 -0.2851890
## 2 0.380635 -0.4648890 -0.2210650 -0.442385 -0.733839 0.4720470 0.6990380
## 3 0.868649 0.2134630 -0.6473910 0.524091 1.228340 -0.5587860 -0.0353689
## 4 0.761675 0.4236070 -0.1206910 -0.388974 -0.635118 0.0567572 -0.4692930
## 5 1.154090 0.0332251 0.0640611 -0.192103 0.126554 -0.5142230 -0.3111330
## 6 -2.914530 5.1726300 2.5903900 6.830200 -2.036000 4.4711400 -0.3952730
## PC665 PC666 PC667 PC668 PC669 PC670
## 1 -0.00682654 -0.1172970 0.165462 -0.3085690 -0.977771 -0.1157990
## 2 -1.15614000 -0.5562380 0.646590 0.1819240 2.369690 -0.2985400
## 3 0.03809490 0.4095490 0.356780 -0.0517369 1.355550 -0.0613148
## 4 0.49917600 0.2976380 -0.696421 -0.1653140 -1.004470 -0.4682080
## 5 -0.64443300 -0.0728556 0.475144 -0.4606660 0.677836 -0.2047800
## 6 8.66924000 2.1869300 -18.597900 -0.7709110 -38.480100 12.5468000
## PC671 PC672 PC673 PC674 PC675 PC676 PC677
## 1 0.1865840 -0.2750550 0.306220 -0.221516 -0.102784 -0.2207100 -0.1840940
## 2 0.5650000 -0.3958730 0.135673 -0.266523 -0.204872 0.0231147 -0.4386240
## 3 0.4147100 -0.0288715 -0.388242 0.111694 -1.274040 1.3153700 0.7371570
## 4 0.0534088 0.6366900 0.194803 1.190450 -0.457648 0.2328710 -0.0647057
## 5 0.1784740 -0.2428200 -0.389587 -1.159910 0.863640 0.1489970 -0.5326790
## 6 -11.7122000 2.9120900 -2.295190 1.574010 2.193640 1.2128000 -3.1024400
## PC678 PC679 PC680 PC681 PC682 PC683 PC684
## 1 0.0640422 -0.2710100 -0.000236821 0.0546136 -0.1848390 0.301509 -0.108393
## 2 -0.3287060 0.0539233 0.394181000 0.0925843 -0.1991700 0.184817 0.223752
## 3 -0.7678080 0.0527573 0.059858600 0.5556500 -0.1576540 -0.341618 -0.128898
## 4 0.7049970 0.1382530 0.299252000 0.2478450 0.0147967 -0.313619 0.155991
## 5 0.3404060 -0.1041340 -0.729199000 0.3316830 0.4132190 -0.332750 -0.111544
## 6 11.3435000 -0.6586940 1.011680000 0.6987820 0.5028950 -0.502816 -0.538584
## PC685 PC686 PC687 PC688 Individual region
## 1 0.559724 -0.0521266 -0.2548120 1.60154e-06 801 Southern Europe
## 2 -0.242465 0.3617630 -0.0776254 1.60154e-06 802 Southern Europe
## 3 -0.321339 -0.2971330 0.3965070 1.60154e-06 803 Southern Europe
## 4 -0.165811 0.0432563 -0.0400723 1.60154e-06 804 Southern Europe
## 5 0.147862 0.0761560 0.0152784 1.60154e-06 805 Southern Europe
## 6 -0.490679 -0.5041310 0.0754175 1.60154e-06 806 Southern Europe
## Pop_City Country Latitude Longitude Continent Year Region
## 1 Durres Albania 41.29704 19.50373 Europe 2018 Southern Europe
## 2 Durres Albania 41.29704 19.50373 Europe 2018 Southern Europe
## 3 Durres Albania 41.29704 19.50373 Europe 2018 Southern Europe
## 4 Durres Albania 41.29704 19.50373 Europe 2018 Southern Europe
## 5 Durres Albania 41.29704 19.50373 Europe 2018 Southern Europe
## 6 Durres Albania 41.29704 19.50373 Europe 2018 Southern Europe
## Subregion order
## 1 East Europe 25
## 2 East Europe 25
## 3 East Europe 25
## 4 East Europe 25
## 5 East Europe 25
## 6 East Europe 25
PCS 1 & 2
#save the pca plot
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "PCA_lea_euro_global_pc1_pc2_r1.pdf"
),
width = 8,
height = 8,
units = "in"
)
PC1 and PC3
(r2<0.1)
We will do 5 repetitions
# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
genotype,
K = 1:30,
project = "new",
repetitions = 5,
percentage = 0.25,
iterations = 500,
CPU = 10,
entropy = TRUE
)
If need to combine projects: https://www.rdocumentation.org/packages/LEA/versions/1.4.0/topics/snmf combine.snmfProject(file.snmfProject,toCombine.snmfProject) combine.snmfProject(r2_0.1.snmfProject,new_r2_0.1.snmfProject)
or use project=“continue” instead of “new” when adding remaining repetitions chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/https://www.bioconductor.org/packages/devel/bioc/manuals/LEA/man/LEA.pdf
Cross entropy
# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","lea_cross_entropy_euro_global_r1.pdf"), width = 6, height = 4)
# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)
# Close the pdf file
dev.off()
## png
## 2
Summary of project
check with run is best for K=20
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 4 4 4
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 4 4 4
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 4 4 4 4 4 4 4 4
## without cross-entropy 0 0 0 0 0 0 0 0
## total 4 4 4 4 4 4 4 4
## K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy 4 4 4 4 4
## without cross-entropy 0 0 0 0 0
## total 4 4 4 4 4
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.8608505 0.8402761 0.8296083 0.8234885 0.8202643 0.8181505 0.8164076
## mean 0.8613624 0.8407854 0.8301619 0.8239896 0.8214234 0.8187226 0.8171335
## max 0.8618443 0.8412847 0.8306325 0.8244414 0.8228964 0.8191034 0.8176953
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8140960 0.8126303 0.8109797 0.8099917 0.8081587 0.8072942 0.8072585
## mean 0.8150810 0.8132653 0.8117269 0.8103243 0.8091718 0.8081472 0.8076578
## max 0.8153903 0.8141409 0.8124759 0.8106736 0.8107374 0.8090892 0.8081356
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8064940 0.8059477 0.8055170 0.8051042 0.8045488 0.8046743 0.8039141
## mean 0.8070258 0.8064300 0.8060372 0.8054720 0.8048453 0.8048238 0.8045790
## max 0.8074999 0.8071925 0.8064792 0.8058825 0.8050728 0.8049788 0.8048851
## K = 22 K = 23 K = 24 K = 25 K = 26 K = 27 K = 28
## min 0.8041131 0.8041347 0.8037093 0.8033090 0.8036510 0.8033662 0.8035527
## mean 0.8044549 0.8046738 0.8040990 0.8039812 0.8042326 0.8039872 0.8041484
## max 0.8050793 0.8050589 0.8043927 0.8044319 0.8049499 0.8048407 0.8046351
## K = 29 K = 30
## min 0.8039001 0.8038556
## mean 0.8044645 0.8041767
## max 0.8047952 0.8044480
# get the cross-entropy of all runs for K = 20
ce = cross.entropy(project, K = 20)
ce #run is best for k=20
## K = 20
## run 1 0.8046743
## run 2 0.8048026
## run 3 0.8048396
## run 4 0.8049788
color_palette_20 <-
c(
"#FF8C1A",
"yellow2",
"#77DD77",
"#1E90FF",
"#B22222",
"#AE8333",
"#B20CC9",
"#F49AC2",
"blue",
"#008080",
"purple4",
"#FFFF99",
"#75FAFF",
"#AE9393",
"magenta",
"green4",
"navy",
"green",
"purple",
"orangered"
)
Default plot for K=20
# select the best run for chosen K
best = which.min(cross.entropy(project, K = 20))
# best is run 1
barchart(project, K = 20, run = best,
border = NA, space = 0,
col = color_palette_20,
xlab = "Individuals",
ylab = "Ancestry proportions",
main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
labels = bp$order, las=1,
cex.axis = .4)
#pops_inds<-read.delim("/vast/palmer/scratch/caccone/mkc54/microsats/pops_inds.txt")
#pops <- as.factor($pops)
#inds <- as.factor(pops_inds$inds)
using ggplot
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 20, run = best))
# Create a named vector to map countries to regions
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:20)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_20[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_20)
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "LEA_admixture_by_country_euro_global_k20_r1.pdf"
),
width = 10,
height = 7,
units = "in"
)
Using ggplot2 for individual admixtures
change to correct matrix
leak20 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.snmf/K20/run1/r2_0.1_r1.20.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak20)
## # A tibble: 6 × 20
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.467 5.65e-3 2.91e-2 2.08e-2 2.06e-2 2.30e-2 7.48e-2 7.89e-3 5.27e-2 3.98e-2
## 2 0.311 2.01e-2 8.34e-2 1.82e-2 2.75e-2 2.10e-2 1.22e-1 1.00e-4 4.56e-2 1.27e-2
## 3 0.990 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## 4 0.845 9.99e-5 3.00e-4 9.99e-5 6.99e-3 9.99e-5 2.16e-2 6.00e-2 9.99e-5 1.42e-2
## 5 0.878 9.99e-5 9.99e-5 9.99e-5 1.74e-2 4.22e-3 1.23e-2 5.27e-2 2.50e-3 1.17e-2
## 6 0.998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 10 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## # X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.1.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 0.466583 5.64551e-03 2.91126e-02 2.07730e-02 2.06457e-02 2.29631e-02
## 2 1002 OKI 0.310824 2.01057e-02 8.33858e-02 1.82188e-02 2.74903e-02 2.10350e-02
## 3 1003 OKI 0.989852 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05
## 4 1004 OKI 0.844984 9.99280e-05 3.00413e-04 9.99280e-05 6.98909e-03 9.99280e-05
## 5 1005 OKI 0.878223 9.99262e-05 9.99262e-05 9.99262e-05 1.73929e-02 4.21508e-03
## 6 1006 OKI 0.998103 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
## X7 X8 X9 X10 X11 X12
## 1 7.47906e-02 7.89411e-03 5.27420e-02 3.98423e-02 8.68477e-03 1.01131e-02
## 2 1.22019e-01 9.99730e-05 4.56474e-02 1.26797e-02 2.23758e-02 9.99730e-05
## 3 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05
## 4 2.15622e-02 6.00394e-02 9.99280e-05 1.41907e-02 3.47921e-03 1.23783e-02
## 5 1.22851e-02 5.26514e-02 2.50057e-03 1.16512e-02 4.90798e-03 9.99262e-05
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
## X13 X14 X15 X16 X17 X18
## 1 9.37350e-02 4.03754e-02 1.32796e-02 2.91868e-02 1.85944e-03 5.56498e-02
## 2 1.60111e-01 5.60752e-02 2.10341e-02 8.74184e-03 1.79302e-02 4.49698e-02
## 3 9.98380e-05 9.98380e-05 8.35050e-03 9.98380e-05 9.98380e-05 9.98380e-05
## 4 1.11324e-02 1.59733e-03 8.88493e-03 9.99280e-05 9.99280e-05 9.99280e-05
## 5 4.65299e-03 9.99262e-05 9.99262e-05 9.99262e-05 5.12692e-04 9.99262e-05
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
## X19 X20
## 1 1.78205e-03 4.34181e-03
## 2 7.05643e-03 9.99730e-05
## 3 9.98380e-05 9.98380e-05
## 4 1.36630e-02 9.99280e-05
## 5 1.01074e-02 9.99262e-05
## 6 9.98289e-05 9.98289e-05
Rename the columns
# Rename the columns starting from the third one
leak20 <- leak20 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak20)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 0.466583 5.64551e-03 2.91126e-02 2.07730e-02 2.06457e-02 2.29631e-02
## 2 1002 OKI 0.310824 2.01057e-02 8.33858e-02 1.82188e-02 2.74903e-02 2.10350e-02
## 3 1003 OKI 0.989852 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05
## 4 1004 OKI 0.844984 9.99280e-05 3.00413e-04 9.99280e-05 6.98909e-03 9.99280e-05
## 5 1005 OKI 0.878223 9.99262e-05 9.99262e-05 9.99262e-05 1.73929e-02 4.21508e-03
## 6 1006 OKI 0.998103 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
## v7 v8 v9 v10 v11 v12
## 1 7.47906e-02 7.89411e-03 5.27420e-02 3.98423e-02 8.68477e-03 1.01131e-02
## 2 1.22019e-01 9.99730e-05 4.56474e-02 1.26797e-02 2.23758e-02 9.99730e-05
## 3 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05
## 4 2.15622e-02 6.00394e-02 9.99280e-05 1.41907e-02 3.47921e-03 1.23783e-02
## 5 1.22851e-02 5.26514e-02 2.50057e-03 1.16512e-02 4.90798e-03 9.99262e-05
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
## v13 v14 v15 v16 v17 v18
## 1 9.37350e-02 4.03754e-02 1.32796e-02 2.91868e-02 1.85944e-03 5.56498e-02
## 2 1.60111e-01 5.60752e-02 2.10341e-02 8.74184e-03 1.79302e-02 4.49698e-02
## 3 9.98380e-05 9.98380e-05 8.35050e-03 9.98380e-05 9.98380e-05 9.98380e-05
## 4 1.11324e-02 1.59733e-03 8.88493e-03 9.99280e-05 9.99280e-05 9.99280e-05
## 5 4.65299e-03 9.99262e-05 9.99262e-05 9.99262e-05 5.12692e-04 9.99262e-05
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
## v19 v20
## 1 1.78205e-03 4.34181e-03
## 2 7.05643e-03 9.99730e-05
## 3 9.98380e-05 9.98380e-05
## 4 1.36630e-02 9.99280e-05
## 5 1.01074e-02 9.99262e-05
## 6 9.98289e-05 9.98289e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak20 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
color_palette_20 <-
c(
"#FF8C1A",
"#B20CC9",
"#77DD77",
"#1E90FF",
"#B22222",
"green",
"yellow2",
"#F49AC2",
"blue",
"#008080",
"purple4",
"#FFFF99",
"#75FAFF",
"#AE9393",
"magenta",
"green4",
"navy",
"#AE8333",
"purple",
"orangered"
)
# Generate all potential variable names
all_variables <- paste0("v", 1:20)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_20[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=20.\n LEA inference for k20 with 56,384 SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_20) +
expand_limits(y = c(0, 1.5))
Try plotting k=5 (See if the 5 original Asian clusters hold)
Summary of project
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 4 4 4
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 4 4 4
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 4 4 4 4 4 4 4 4
## without cross-entropy 0 0 0 0 0 0 0 0
## total 4 4 4 4 4 4 4 4
## K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy 4 4 4 4 4
## without cross-entropy 0 0 0 0 0
## total 4 4 4 4 4
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.8608505 0.8402761 0.8296083 0.8234885 0.8202643 0.8181505 0.8164076
## mean 0.8613624 0.8407854 0.8301619 0.8239896 0.8214234 0.8187226 0.8171335
## max 0.8618443 0.8412847 0.8306325 0.8244414 0.8228964 0.8191034 0.8176953
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8140960 0.8126303 0.8109797 0.8099917 0.8081587 0.8072942 0.8072585
## mean 0.8150810 0.8132653 0.8117269 0.8103243 0.8091718 0.8081472 0.8076578
## max 0.8153903 0.8141409 0.8124759 0.8106736 0.8107374 0.8090892 0.8081356
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8064940 0.8059477 0.8055170 0.8051042 0.8045488 0.8046743 0.8039141
## mean 0.8070258 0.8064300 0.8060372 0.8054720 0.8048453 0.8048238 0.8045790
## max 0.8074999 0.8071925 0.8064792 0.8058825 0.8050728 0.8049788 0.8048851
## K = 22 K = 23 K = 24 K = 25 K = 26 K = 27 K = 28
## min 0.8041131 0.8041347 0.8037093 0.8033090 0.8036510 0.8033662 0.8035527
## mean 0.8044549 0.8046738 0.8040990 0.8039812 0.8042326 0.8039872 0.8041484
## max 0.8050793 0.8050589 0.8043927 0.8044319 0.8049499 0.8048407 0.8046351
## K = 29 K = 30
## min 0.8039001 0.8038556
## mean 0.8044645 0.8041767
## max 0.8047952 0.8044480
# get the cross-entropy of all runs for K = 5
ce = cross.entropy(project, K = 5)
ce #run 1 is best for k=5
## K = 5
## run 1 0.8202643
## run 2 0.8228964
## run 3 0.8206233
## run 4 0.8212199
## run 5 0.8221128
choose best run here
leak5 <- read_delim(
here("euro_global/output/snps_sets/r2_0.1.snmf/K5/run1/r2_0.1_r1.5.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak5)
## # A tibble: 6 × 5
## X1 X2 X3 X4 X5
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0100 0.432 0.201 0.137 0.220
## 2 0.0200 0.435 0.204 0.121 0.220
## 3 0.0157 0.464 0.175 0.147 0.198
## 4 0.000100 0.452 0.226 0.0883 0.233
## 5 0.000100 0.462 0.199 0.109 0.230
## 6 0.0234 0.457 0.192 0.116 0.211
The fam file
fam_file <- here(
"euro_global/output/snps_sets/r2_0.1.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 0.01004940 0.432031 0.201044 0.1370630 0.219812
## 2 1002 OKI 0.01995590 0.434927 0.203757 0.1211740 0.220186
## 3 1003 OKI 0.01567250 0.464427 0.174829 0.1474360 0.197636
## 4 1004 OKI 0.00009999 0.452051 0.226127 0.0883153 0.233406
## 5 1005 OKI 0.00009999 0.462282 0.199097 0.1087080 0.229812
## 6 1006 OKI 0.02340520 0.457148 0.191711 0.1164300 0.211305
Rename the columns
# Rename the columns starting from the third one
leak5 <- leak5 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak5)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 0.01004940 0.432031 0.201044 0.1370630 0.219812
## 2 1002 OKI 0.01995590 0.434927 0.203757 0.1211740 0.220186
## 3 1003 OKI 0.01567250 0.464427 0.174829 0.1474360 0.197636
## 4 1004 OKI 0.00009999 0.452051 0.226127 0.0883153 0.233406
## 5 1005 OKI 0.00009999 0.462282 0.199097 0.1087080 0.229812
## 6 1006 OKI 0.02340520 0.457148 0.191711 0.1164300 0.211305
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
Default plot
color_palette_5 <-
c(
"#FF8C1A",
"#77DD37",
"#1E90FF",
"purple3",
"#FFFF19"
)
# select the best run for K = 15 clusters
best = which.min(cross.entropy(project, K = 5))
# best is run 4
barchart(project, K = 5, run = best,
border = NA, space = 0,
col = color_palette_5,
xlab = "Individuals",
ylab = "Ancestry proportions",
main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
labels = bp$order, las=1,
cex.axis = .4)
color_palette_5 <-
c(
"#FF8C1A",
"#77DD37",
"#1E90FF",
"purple3",
"#FFFF19"
)
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 5, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:5)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_5[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=5.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_5)
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak5 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
color_palette_5 <-
c(
"#FF8C1A",
"#77DD37",
"#1E90FF",
"purple3",
"#FFFF19"
)
# Generate all potential variable names
all_variables <- paste0("v", 1:5)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_5[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 10
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=5.\n LEA inference for k5 with 56,384 SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_5) +
expand_limits(y = c(0, 1.5))
for SNP Set 2 (r2<0.1)
Summary of project check with run is best
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 4 4 4
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 4 4 4
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 4 4 4 4 4 4 4 4
## without cross-entropy 0 0 0 0 0 0 0 0
## total 4 4 4 4 4 4 4 4
## K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy 4 4 4 4 4
## without cross-entropy 0 0 0 0 0
## total 4 4 4 4 4
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.8608505 0.8402761 0.8296083 0.8234885 0.8202643 0.8181505 0.8164076
## mean 0.8613624 0.8407854 0.8301619 0.8239896 0.8214234 0.8187226 0.8171335
## max 0.8618443 0.8412847 0.8306325 0.8244414 0.8228964 0.8191034 0.8176953
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8140960 0.8126303 0.8109797 0.8099917 0.8081587 0.8072942 0.8072585
## mean 0.8150810 0.8132653 0.8117269 0.8103243 0.8091718 0.8081472 0.8076578
## max 0.8153903 0.8141409 0.8124759 0.8106736 0.8107374 0.8090892 0.8081356
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8064940 0.8059477 0.8055170 0.8051042 0.8045488 0.8046743 0.8039141
## mean 0.8070258 0.8064300 0.8060372 0.8054720 0.8048453 0.8048238 0.8045790
## max 0.8074999 0.8071925 0.8064792 0.8058825 0.8050728 0.8049788 0.8048851
## K = 22 K = 23 K = 24 K = 25 K = 26 K = 27 K = 28
## min 0.8041131 0.8041347 0.8037093 0.8033090 0.8036510 0.8033662 0.8035527
## mean 0.8044549 0.8046738 0.8040990 0.8039812 0.8042326 0.8039872 0.8041484
## max 0.8050793 0.8050589 0.8043927 0.8044319 0.8049499 0.8048407 0.8046351
## K = 29 K = 30
## min 0.8039001 0.8038556
## mean 0.8044645 0.8041767
## max 0.8047952 0.8044480
# get the cross-entropy of all runs for K = 16
ce = cross.entropy(project, K = 16)
ce #run 1 is best for k=
## K = 16
## run 1 0.8059477
## run 2 0.8064777
## run 3 0.8061023
## run 4 0.8071925
color_palette_16 <-
c(
"#FF8C1A",
"#B20CC9",
"#77DD77",
"#1E90FF",
"#B22222",
"green",
"yellow2",
"#F49AC2",
"blue",
"#FFFF99",
"#75FAFF",
"magenta",
"green4",
"navy",
"purple",
"orangered"
)
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 16, run = best))
# Create a named vector to map countries to regions
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:20)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_16[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=16.") +
#scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_16)
Extract ancestry coefficients for k=16 change to correct matrix
leak16 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.snmf/K16/run1/r2_0.1_r1.16.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak16)
## # A tibble: 6 × 16
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2.37e-2 8.74e-2 4.86e-2 9.70e-3 2.66e-2 0.488 1.74e-2 1.10e-2 6.44e-2 6.09e-3
## 2 2.38e-2 1.64e-1 2.72e-2 2.06e-2 9.94e-2 0.349 1.34e-2 5.74e-2 5.37e-2 1.28e-2
## 3 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.994 9.99e-5 3.45e-3 9.99e-5 9.99e-5
## 4 1.56e-2 1.97e-2 9.99e-5 5.05e-2 9.99e-5 0.864 9.99e-5 9.99e-5 4.52e-3 9.99e-5
## 5 1.01e-2 9.05e-3 3.70e-3 4.65e-2 9.99e-5 0.895 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 6 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## # ℹ 6 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## # X16 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.1.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 2.36683e-02 8.73786e-02 4.85907e-02 9.70212e-03 2.66047e-02 0.487992
## 2 1002 OKI 2.37829e-02 1.63880e-01 2.72121e-02 2.05985e-02 9.93526e-02 0.348696
## 3 1003 OKI 9.98830e-05 9.98830e-05 9.98830e-05 9.98830e-05 9.98830e-05 0.993884
## 4 1004 OKI 1.55944e-02 1.97235e-02 9.99280e-05 5.05453e-02 9.99280e-05 0.864136
## 5 1005 OKI 1.00754e-02 9.05345e-03 3.70392e-03 4.64856e-02 9.99280e-05 0.894758
## 6 1006 OKI 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 0.998502
## X7 X8 X9 X10 X11 X12
## 1 1.74042e-02 1.09914e-02 6.43721e-02 6.08818e-03 7.88133e-02 5.26037e-02
## 2 1.34422e-02 5.73794e-02 5.37438e-02 1.27509e-02 6.08909e-02 8.98258e-02
## 3 9.98830e-05 3.45188e-03 9.98830e-05 9.98830e-05 9.98830e-05 9.98830e-05
## 4 9.99280e-05 9.99280e-05 4.52242e-03 9.99280e-05 9.81981e-03 1.59261e-02
## 5 9.99280e-05 9.99280e-05 9.99280e-05 9.99280e-05 2.06442e-02 9.99280e-05
## 6 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
## X13 X14 X15 X16
## 1 9.90955e-03 4.10494e-03 5.79049e-02 1.38716e-02
## 2 3.08931e-03 1.28189e-02 9.42796e-03 3.10916e-03
## 3 9.98830e-05 1.36576e-03 9.98830e-05 9.98830e-05
## 4 1.89333e-02 9.99280e-05 9.99280e-05 9.99280e-05
## 5 1.08838e-02 9.99280e-05 9.99280e-05 3.59637e-03
## 6 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
Rename the columns
# Rename the columns starting from the third one
leak16 <- leak16 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak16)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 2.36683e-02 8.73786e-02 4.85907e-02 9.70212e-03 2.66047e-02 0.487992
## 2 1002 OKI 2.37829e-02 1.63880e-01 2.72121e-02 2.05985e-02 9.93526e-02 0.348696
## 3 1003 OKI 9.98830e-05 9.98830e-05 9.98830e-05 9.98830e-05 9.98830e-05 0.993884
## 4 1004 OKI 1.55944e-02 1.97235e-02 9.99280e-05 5.05453e-02 9.99280e-05 0.864136
## 5 1005 OKI 1.00754e-02 9.05345e-03 3.70392e-03 4.64856e-02 9.99280e-05 0.894758
## 6 1006 OKI 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 0.998502
## v7 v8 v9 v10 v11 v12
## 1 1.74042e-02 1.09914e-02 6.43721e-02 6.08818e-03 7.88133e-02 5.26037e-02
## 2 1.34422e-02 5.73794e-02 5.37438e-02 1.27509e-02 6.08909e-02 8.98258e-02
## 3 9.98830e-05 3.45188e-03 9.98830e-05 9.98830e-05 9.98830e-05 9.98830e-05
## 4 9.99280e-05 9.99280e-05 4.52242e-03 9.99280e-05 9.81981e-03 1.59261e-02
## 5 9.99280e-05 9.99280e-05 9.99280e-05 9.99280e-05 2.06442e-02 9.99280e-05
## 6 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
## v13 v14 v15 v16
## 1 9.90955e-03 4.10494e-03 5.79049e-02 1.38716e-02
## 2 3.08931e-03 1.28189e-02 9.42796e-03 3.10916e-03
## 3 9.98830e-05 1.36576e-03 9.98830e-05 9.98830e-05
## 4 1.89333e-02 9.99280e-05 9.99280e-05 9.99280e-05
## 5 1.08838e-02 9.99280e-05 9.99280e-05 3.59637e-03
## 6 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak16 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
color_palette_16 <-
c(
"#FF8C1A",
"#B20CC9",
"#F49AC2",
"#1E90FF",
"#B22222",
"green",
"yellow2",
"#77DD77",
"blue",
"#FFFF99",
"#75FAFF",
"magenta",
"green4",
"navy",
"purple",
"orangered"
)
# Generate all potential variable names
all_variables <- paste0("v", 1:16)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_16[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=16.\n LEA inference for k16 with 56,384 SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_16) +
expand_limits(y = c(0, 1.5))
for LEA SNP Set 2 (r2<0.1)
Extract ancestry coefficients for k=22
color_palette_22 <-
c(
"#FF8C1A",
"yellow2",
"#77DD77",
"#1E90FF",
"#B22222",
"chocolate4",
"#B20CC9",
"#F49AC2",
"blue",
"#008080",
"purple4",
"#FFFF99",
"#75FAFF",
"#AE9393",
"magenta",
"green4",
"navy",
"green",
"purple",
"orangered2",
"goldenrod3",
"coral")
Default plot
# select the best run for chosen K
best = which.min(cross.entropy(project, K = 22))
# best is run 1
barchart(project, K = 22, run = best,
border = NA, space = 0,
col = color_palette_22,
xlab = "Individuals",
ylab = "Ancestry proportions",
main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
labels = bp$order, las=1,
cex.axis = .4)
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 22, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:20)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_22[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=22.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_20)
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "LEA_admixture_by_country_euro_global_k22_r1.pdf"
),
width = 10,
height = 7,
units = "in"
)
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 4 4 4
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 4 4 4
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 4 4 4 4 4 4 4 4
## without cross-entropy 0 0 0 0 0 0 0 0
## total 4 4 4 4 4 4 4 4
## K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy 4 4 4 4 4
## without cross-entropy 0 0 0 0 0
## total 4 4 4 4 4
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.8608505 0.8402761 0.8296083 0.8234885 0.8202643 0.8181505 0.8164076
## mean 0.8613624 0.8407854 0.8301619 0.8239896 0.8214234 0.8187226 0.8171335
## max 0.8618443 0.8412847 0.8306325 0.8244414 0.8228964 0.8191034 0.8176953
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8140960 0.8126303 0.8109797 0.8099917 0.8081587 0.8072942 0.8072585
## mean 0.8150810 0.8132653 0.8117269 0.8103243 0.8091718 0.8081472 0.8076578
## max 0.8153903 0.8141409 0.8124759 0.8106736 0.8107374 0.8090892 0.8081356
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8064940 0.8059477 0.8055170 0.8051042 0.8045488 0.8046743 0.8039141
## mean 0.8070258 0.8064300 0.8060372 0.8054720 0.8048453 0.8048238 0.8045790
## max 0.8074999 0.8071925 0.8064792 0.8058825 0.8050728 0.8049788 0.8048851
## K = 22 K = 23 K = 24 K = 25 K = 26 K = 27 K = 28
## min 0.8041131 0.8041347 0.8037093 0.8033090 0.8036510 0.8033662 0.8035527
## mean 0.8044549 0.8046738 0.8040990 0.8039812 0.8042326 0.8039872 0.8041484
## max 0.8050793 0.8050589 0.8043927 0.8044319 0.8049499 0.8048407 0.8046351
## K = 29 K = 30
## min 0.8039001 0.8038556
## mean 0.8044645 0.8041767
## max 0.8047952 0.8044480
# get the cross-entropy of all runs for K = 16
ce = cross.entropy(project, K = 22)
ce #run 3 is best for k=22
## K = 22
## run 1 0.8042983
## run 2 0.8043288
## run 3 0.8041131
## run 4 0.8050793
Extract ancestry coefficients for k=22 change to correct matrix
leak22 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.snmf/K22/run3/r2_0.1_r3.22.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak22)
## # A tibble: 6 × 22
## X1 X2 X3 X4 X5 X6 X7 X8 X9
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.00433 0.0240 0.0000999 1.38e-3 2.26e-3 1.12e-2 5.04e-2 1.53e-2 6.20e-2
## 2 0.000100 0.0305 0.0136 5.11e-3 2.66e-2 1.58e-3 4.62e-2 1.00e-2 4.42e-2
## 3 0.0000998 0.0000998 0.0000998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 1.41e-4 9.98e-5
## 4 0.0000999 0.0000999 0.000972 9.99e-5 9.99e-5 4.85e-2 9.99e-5 9.99e-5 9.99e-5
## 5 0.0000999 0.00648 0.0000999 9.99e-5 1.58e-2 3.37e-2 9.99e-5 9.99e-5 9.99e-5
## 6 0.0000998 0.0000998 0.0000998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 13 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## # X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>,
## # X21 <dbl>, X22 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.1.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 4.32608e-03 2.39781e-02 9.99460e-05 1.37750e-03 2.25550e-03
## 2 1002 OKI 9.99820e-05 3.04932e-02 1.35758e-02 5.11275e-03 2.65567e-02
## 3 1003 OKI 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05
## 4 1004 OKI 9.99010e-05 9.99010e-05 9.72357e-04 9.99010e-05 9.99010e-05
## 5 1005 OKI 9.98740e-05 6.48490e-03 9.98740e-05 9.98740e-05 1.58013e-02
## 6 1006 OKI 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
## X6 X7 X8 X9 X10 X11
## 1 1.11820e-02 5.03599e-02 1.52698e-02 6.19554e-02 1.79845e-02 8.70910e-02
## 2 1.58466e-03 4.61989e-02 9.99813e-03 4.42420e-02 2.45108e-02 2.75577e-02
## 3 9.98201e-05 9.98201e-05 1.41240e-04 9.98201e-05 9.98201e-05 9.98201e-05
## 4 4.85039e-02 9.99010e-05 9.99010e-05 9.99010e-05 1.15443e-02 9.99010e-05
## 5 3.36562e-02 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05 6.80740e-03
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
## X12 X13 X14 X15 X16 X17
## 1 6.32903e-02 9.99460e-05 9.99460e-05 9.99460e-05 9.99460e-05 4.03509e-02
## 2 7.93744e-02 3.87164e-03 2.38563e-03 9.99820e-05 8.19030e-02 2.66488e-02
## 3 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05
## 4 8.45930e-03 9.99010e-05 2.28579e-03 2.34025e-02 4.88334e-03 9.99010e-05
## 5 9.98740e-05 9.98740e-05 9.98740e-05 3.36100e-02 9.98740e-05 9.98740e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
## X18 X19 X20 X21 X22
## 1 1.75520e-02 7.57056e-02 0.484006 4.27157e-02 9.99460e-05
## 2 3.89836e-02 1.49530e-01 0.333537 5.07033e-02 3.03177e-03
## 3 9.98201e-05 9.98201e-05 0.997862 9.98201e-05 9.98201e-05
## 4 9.99010e-05 1.85123e-02 0.840190 2.36356e-02 1.65116e-02
## 5 9.98740e-05 2.18772e-02 0.872565 7.79989e-03 9.98740e-05
## 6 9.98108e-05 9.98108e-05 0.997904 9.98108e-05 9.98108e-05
Rename the columns
# Rename the columns starting from the third one
leak22 <- leak22 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak22)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 4.32608e-03 2.39781e-02 9.99460e-05 1.37750e-03 2.25550e-03
## 2 1002 OKI 9.99820e-05 3.04932e-02 1.35758e-02 5.11275e-03 2.65567e-02
## 3 1003 OKI 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05
## 4 1004 OKI 9.99010e-05 9.99010e-05 9.72357e-04 9.99010e-05 9.99010e-05
## 5 1005 OKI 9.98740e-05 6.48490e-03 9.98740e-05 9.98740e-05 1.58013e-02
## 6 1006 OKI 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
## v6 v7 v8 v9 v10 v11
## 1 1.11820e-02 5.03599e-02 1.52698e-02 6.19554e-02 1.79845e-02 8.70910e-02
## 2 1.58466e-03 4.61989e-02 9.99813e-03 4.42420e-02 2.45108e-02 2.75577e-02
## 3 9.98201e-05 9.98201e-05 1.41240e-04 9.98201e-05 9.98201e-05 9.98201e-05
## 4 4.85039e-02 9.99010e-05 9.99010e-05 9.99010e-05 1.15443e-02 9.99010e-05
## 5 3.36562e-02 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05 6.80740e-03
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
## v12 v13 v14 v15 v16 v17
## 1 6.32903e-02 9.99460e-05 9.99460e-05 9.99460e-05 9.99460e-05 4.03509e-02
## 2 7.93744e-02 3.87164e-03 2.38563e-03 9.99820e-05 8.19030e-02 2.66488e-02
## 3 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05
## 4 8.45930e-03 9.99010e-05 2.28579e-03 2.34025e-02 4.88334e-03 9.99010e-05
## 5 9.98740e-05 9.98740e-05 9.98740e-05 3.36100e-02 9.98740e-05 9.98740e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
## v18 v19 v20 v21 v22
## 1 1.75520e-02 7.57056e-02 0.484006 4.27157e-02 9.99460e-05
## 2 3.89836e-02 1.49530e-01 0.333537 5.07033e-02 3.03177e-03
## 3 9.98201e-05 9.98201e-05 0.997862 9.98201e-05 9.98201e-05
## 4 9.99010e-05 1.85123e-02 0.840190 2.36356e-02 1.65116e-02
## 5 9.98740e-05 2.18772e-02 0.872565 7.79989e-03 9.98740e-05
## 6 9.98108e-05 9.98108e-05 0.997904 9.98108e-05 9.98108e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak22 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
color_palette_22 <-
c(
"#FF8C1A",
"yellow2",
"#77DD77",
"chocolate4",
"#B22222",
"purple",
"#B20CC9",
"#F49AC2",
"blue",
"#1E90FF",
"purple4",
"#FFFF99",
"#75FAFF",
"#AE9393",
"magenta",
"green4",
"navy",
"green",
"#008080",
"goldenrod3",
"coral",
"orangered2"
)
# Generate all potential variable names
all_variables <- paste0("v", 1:22)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_22[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=22.\n LEA inference for k22 with 56,384 SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_22) +
expand_limits(y = c(0, 1.5))
LEA SNP Set 2 (r2<0.1)
Extract ancestry coefficients for k=23 LEA r2<0.1 change to correct matrix
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 4 4 4
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 4 4 4
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 4 4 4 4 4 4 4 4
## without cross-entropy 0 0 0 0 0 0 0 0
## total 4 4 4 4 4 4 4 4
## K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy 4 4 4 4 4
## without cross-entropy 0 0 0 0 0
## total 4 4 4 4 4
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.8608505 0.8402761 0.8296083 0.8234885 0.8202643 0.8181505 0.8164076
## mean 0.8613624 0.8407854 0.8301619 0.8239896 0.8214234 0.8187226 0.8171335
## max 0.8618443 0.8412847 0.8306325 0.8244414 0.8228964 0.8191034 0.8176953
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8140960 0.8126303 0.8109797 0.8099917 0.8081587 0.8072942 0.8072585
## mean 0.8150810 0.8132653 0.8117269 0.8103243 0.8091718 0.8081472 0.8076578
## max 0.8153903 0.8141409 0.8124759 0.8106736 0.8107374 0.8090892 0.8081356
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8064940 0.8059477 0.8055170 0.8051042 0.8045488 0.8046743 0.8039141
## mean 0.8070258 0.8064300 0.8060372 0.8054720 0.8048453 0.8048238 0.8045790
## max 0.8074999 0.8071925 0.8064792 0.8058825 0.8050728 0.8049788 0.8048851
## K = 22 K = 23 K = 24 K = 25 K = 26 K = 27 K = 28
## min 0.8041131 0.8041347 0.8037093 0.8033090 0.8036510 0.8033662 0.8035527
## mean 0.8044549 0.8046738 0.8040990 0.8039812 0.8042326 0.8039872 0.8041484
## max 0.8050793 0.8050589 0.8043927 0.8044319 0.8049499 0.8048407 0.8046351
## K = 29 K = 30
## min 0.8039001 0.8038556
## mean 0.8044645 0.8041767
## max 0.8047952 0.8044480
# get the cross-entropy of all runs for K = 23
ce = cross.entropy(project, K = 23)
ce #run 1 is best for k=23
## K = 23
## run 1 0.8041347
## run 2 0.8046587
## run 3 0.8050589
## run 4 0.8048428
leak23 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.1.snmf/K23/run1/r2_0.1_r1.23.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak23)
## # A tibble: 6 × 23
## X1 X2 X3 X4 X5 X6 X7 X8 X9
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0128 0.0167 0.00201 1.00e-4 4.82e-2 2.09e-2 4.73e-2 1.46e-2 7.80e-2
## 2 0.0206 0.0877 0.00711 1.00e-4 1.39e-2 4.46e-2 3.96e-2 4.61e-2 4.25e-2
## 3 0.0000998 0.0000998 0.0000998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## 4 0.00999 0.0000999 0.0000999 1.58e-2 4.36e-2 4.53e-2 9.99e-5 9.99e-5 9.99e-5
## 5 0.0119 0.0000999 0.0000999 9.99e-5 3.63e-2 4.11e-2 1.64e-2 9.99e-5 9.99e-5
## 6 0.0000998 0.0000998 0.0000998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 14 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## # X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>,
## # X21 <dbl>, X22 <dbl>, X23 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.1.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 1.27639e-02 1.67423e-02 2.00565e-03 9.99550e-05 4.81760e-02
## 2 1002 OKI 2.05636e-02 8.77317e-02 7.10981e-03 9.99820e-05 1.38512e-02
## 3 1003 OKI 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 1004 OKI 9.99118e-03 9.98830e-05 9.98830e-05 1.58279e-02 4.36386e-02
## 5 1005 OKI 1.18692e-02 9.98740e-05 9.98740e-05 9.98740e-05 3.62593e-02
## 6 1006 OKI 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
## X6 X7 X8 X9 X10 X11
## 1 2.08513e-02 4.72904e-02 1.46362e-02 7.79676e-02 1.24348e-02 1.20796e-02
## 2 4.45759e-02 3.96060e-02 4.60880e-02 4.24541e-02 1.14773e-02 9.93311e-03
## 3 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 4.52657e-02 9.98830e-05 9.98830e-05 9.98830e-05 1.31127e-02 4.39800e-04
## 5 4.11295e-02 1.64199e-02 9.98740e-05 9.98740e-05 1.08600e-02 9.98740e-05
## 6 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
## X12 X13 X14 X15 X16 X17
## 1 9.99550e-05 0.468524 4.30887e-02 9.99550e-05 6.54457e-02 4.49180e-02
## 2 7.31831e-04 0.316644 3.43821e-02 1.79289e-02 9.94290e-02 1.10625e-02
## 3 9.98021e-05 0.997804 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 9.98830e-05 0.824120 9.98830e-05 9.98830e-05 3.81714e-02 9.98830e-05
## 5 4.26943e-03 0.862749 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05
## 6 9.98019e-05 0.997804 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
## X18 X19 X20 X21 X22 X23
## 1 9.99550e-05 2.37160e-03 9.99550e-05 1.91372e-02 1.77211e-02 7.33465e-02
## 2 2.15107e-02 1.24573e-02 6.39485e-03 9.99820e-05 1.84864e-02 1.37382e-01
## 3 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 9.98830e-05 9.98830e-05 1.60094e-03 9.98830e-05 9.98830e-05 6.53375e-03
## 5 9.98740e-05 9.98740e-05 3.72111e-03 9.98740e-05 9.98740e-05 1.13240e-02
## 6 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
Rename the columns
# Rename the columns starting from the third one
leak23 <- leak23 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak23)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 1.27639e-02 1.67423e-02 2.00565e-03 9.99550e-05 4.81760e-02
## 2 1002 OKI 2.05636e-02 8.77317e-02 7.10981e-03 9.99820e-05 1.38512e-02
## 3 1003 OKI 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 1004 OKI 9.99118e-03 9.98830e-05 9.98830e-05 1.58279e-02 4.36386e-02
## 5 1005 OKI 1.18692e-02 9.98740e-05 9.98740e-05 9.98740e-05 3.62593e-02
## 6 1006 OKI 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
## v6 v7 v8 v9 v10 v11
## 1 2.08513e-02 4.72904e-02 1.46362e-02 7.79676e-02 1.24348e-02 1.20796e-02
## 2 4.45759e-02 3.96060e-02 4.60880e-02 4.24541e-02 1.14773e-02 9.93311e-03
## 3 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 4.52657e-02 9.98830e-05 9.98830e-05 9.98830e-05 1.31127e-02 4.39800e-04
## 5 4.11295e-02 1.64199e-02 9.98740e-05 9.98740e-05 1.08600e-02 9.98740e-05
## 6 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
## v12 v13 v14 v15 v16 v17
## 1 9.99550e-05 0.468524 4.30887e-02 9.99550e-05 6.54457e-02 4.49180e-02
## 2 7.31831e-04 0.316644 3.43821e-02 1.79289e-02 9.94290e-02 1.10625e-02
## 3 9.98021e-05 0.997804 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 9.98830e-05 0.824120 9.98830e-05 9.98830e-05 3.81714e-02 9.98830e-05
## 5 4.26943e-03 0.862749 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05
## 6 9.98019e-05 0.997804 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
## v18 v19 v20 v21 v22 v23
## 1 9.99550e-05 2.37160e-03 9.99550e-05 1.91372e-02 1.77211e-02 7.33465e-02
## 2 2.15107e-02 1.24573e-02 6.39485e-03 9.99820e-05 1.84864e-02 1.37382e-01
## 3 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 4 9.98830e-05 9.98830e-05 1.60094e-03 9.98830e-05 9.98830e-05 6.53375e-03
## 5 9.98740e-05 9.98740e-05 3.72111e-03 9.98740e-05 9.98740e-05 1.13240e-02
## 6 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05 9.98019e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak23 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
color_palette_23 <-
c(
"purple4",
"blue",
"green4",
"#FF8C1A",
"green",
"#AE9393",
"#1E90FF",
"yellow2",
"#008080",
"purple",
"#FFFF99",
"magenta",
"navy",
"orangered",
"#B20CC9",
"orchid1",
"coral",
"#75FAFF",
"#B22222",
"goldenrod",
"#F49AC2",
"#77DD77",
"chocolate4"
)
# Generate all potential variable names
all_variables <- paste0("v", 1:23)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_23[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=23.\n LEA inference for k23 with 56,384 SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_23) +
expand_limits(y = c(0, 1.5))
Check data - we created vcf file with LD pruning r2<0.01 (Set 1): r2_0.01.vcf
## euro_global/output/snps_sets/neutral.vcf
## euro_global/output/snps_sets/r2_0.01.vcf
## euro_global/output/snps_sets/r2_0.01_b.vcf
## euro_global/output/snps_sets/r2_0.1.vcf
## euro_global/output/snps_sets/r2_0.1_b.vcf
Import the data for r2<0.01
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 19318
## column count: 697
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 19318
## Character matrix gt cols: 697
## skip: 0
## nrows: 19318
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant: 19318
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS GEL GES GRA GRC GRV
## 10 12 12 10 12 12 12 13 10 12 14 12 12 16 12 2 12 11 10 12
## HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC KAG KAN KAT KER KLP KRA KUN LAM
## 12 4 7 12 4 11 4 5 9 12 2 6 12 11 6 12 4 12 4 9
## MAL MAT OKI PAL POL POP QNC RAR REC ROM ROS SER SEV SIC SLO SOC SON SPB SPC SPM
## 12 12 12 11 2 12 11 12 11 4 11 4 12 9 12 12 3 8 6 5
## SPS SSK STS SUF SUU TAI TIK TIR TRE TUA TUH UTS YUN
## 8 12 12 6 6 7 12 4 12 9 12 12 9
Convert format
##
## - number of detected individuals: 688
## - number of detected loci: 19318
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.geno"
##
## - number of detected individuals: 688
## - number of detected loci: 19318
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.removed file, for more informations.
##
##
## - number of detected individuals: 688
## - number of detected loci: 19318
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.lfmm"
PCA for SNP Set 1
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
##
## -n (number of individuals) 688
## -L (number of loci) 19318
## -K (number of principal components) 688
## -x (genotype file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.lfmm
## -a (eigenvalue file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.pca/r2_0.01.eigenvalues
## -e (eigenvector file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.pca/r2_0.01.eigenvectors
## -d (standard deviation file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.pca/r2_0.01.sdev
## -p (projection file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.pca/r2_0.01.projections
## -c data centered
## * pca class *
##
## project directory: /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/
## pca result directory: r2_0.01.pca/
## input file: r2_0.01.lfmm
## eigenvalue file: r2_0.01.eigenvalues
## eigenvector file: r2_0.01.eigenvectors
## standard deviation file: r2_0.01.sdev
## projection file: r2_0.01.projections
## pcaProject file: r2_0.01.pcaProject
## number of individuals: 688
## number of loci: 19318
## number of principal components: 688
## centered: TRUE
## scaled: FALSE
Test
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
##
## -n (number of eigenvalues) 688
## -i (input file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.pca/r2_0.01.eigenvalues
## -o (output file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.pca/r2_0.01.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)
Get values
# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
Check R symbols for plot
#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,33:127)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
scale_shape_manual(values=good.shapes[1:N]) +
geom_point()
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_all.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation
## 1 Franceville Gabon -1.59207 13.53242 Africa GAB
## 2 Antananarivo Madagascar -18.87920 47.50790 Africa ANT
## 3 Diego ville Madagascar -12.27361 49.29372 Africa DGV
## 4 Morondava Madagascar -20.28420 44.27940 Africa MAD
## 5 Vohimasy Madagascar -22.81591 47.75026 Africa VOH
## 6 Dauguet Mauritius -20.18530 57.52154 Africa DAU
## Year Region Subregion order
## 1 2015 Central Africa Africa 72
## 2 2022 East Africa East Africa 76
## 3 2022 East Africa East Africa 77
## 4 2016 East Africa East Africa 78
## 5 2016 & 2017 East Africa East Africa 79
## 6 2022 Indian Ocean Indian Ocean 80
More sample data
# import sample attributes
samples2 <- read.delim(
here("scripts", "RMarkdowns",
"output", "Population_meta_data.txt"
),
head = TRUE
)
samples2<- samples2 |>
dplyr::select(
region, pop
)
# check head of the file
head(samples2)
## region pop
## 1 Central Africa GAB
## 2 East Africa ANT
## 3 East Africa MAD
## 4 East Africa DGV
## 5 East Africa VOH
## 6 Indian Ocean DAU
Merge with sampling_loc
## pop region Pop_City Country Latitude Longitude
## 1 AIZ East Asia Aizuwakamatsu City Japan 37.49240 139.99360
## 2 ALD Southern Europe Durres Albania 41.29704 19.50373
## 3 ALU Eastern Europe Alushta Ukraine 44.68289 34.40368
## 4 ALV Southern Europe Vlore Albania 40.46600 19.48970
## 5 ANT East Africa Antananarivo Madagascar -18.87920 47.50790
## 6 ARM Eastern Europe Ijevan Armenia 40.87971 45.14764
## Continent Year Region Subregion order
## 1 Asia 2008 East Asia NA
## 2 Europe 2018 Southern Europe East Europe 25
## 3 Europe 2021 Eastern Europe East Europe 35
## 4 Europe 2020 Southern Europe East Europe 24
## 5 Africa 2022 East Africa East Africa 76
## 6 Europe 2020 Eastern Europe East Europe 42
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 73 Levels: ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS ... YUN
## [1] 73
Check the regions
## [1] "East Asia" "Southern Europe" "Eastern Europe" "East Africa"
## [5] "West Africa" "North America" "South Asia" "Caribbean"
## [9] "Southeast Asia" "Indian Ocean" "Western Europe" "Central Africa"
## [13] "South America" "North Africa"
Merge
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 ALD 11.3046 3.85130 19.7833 1.687230 -31.9742 -7.03582 2.87450
## 2 ALD 11.3286 4.09234 21.6678 2.240930 -35.6509 -6.67780 1.30074
## 3 ALD 10.9877 2.96010 27.1215 1.722600 -27.0213 -8.93229 4.56509
## 4 ALD 12.5399 3.70106 23.5571 1.923170 -32.8265 -4.32701 -0.54051
## 5 ALD 10.6210 6.13863 18.5964 0.106578 -28.5055 -4.24365 -2.14243
## 6 ALD 10.3700 3.24124 23.0999 0.624742 -34.5679 -8.77115 -1.64540
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## 1 8.76051 -2.390600 -1.1133700 1.409550 -2.432570 2.21439 0.0831294
## 2 10.53680 0.572231 -0.0447769 5.148600 1.885100 6.72702 0.3359080
## 3 12.34000 2.083830 -2.5534900 5.418010 -3.295510 6.81957 -2.2182600
## 4 8.67068 3.930570 -1.8172500 -0.958173 3.130710 1.46826 3.9380100
## 5 6.97025 1.488870 -2.2270600 2.202370 -1.782360 -3.09520 -2.3885500
## 6 8.84329 -1.938790 -1.5921900 6.365010 0.288914 5.53801 -2.6206200
## PC15 PC16 PC17 PC18 PC19 PC20 PC21 PC22
## 1 1.591980 -3.89981 0.846729 -1.512010 1.275050 5.20659 0.355472 6.59573
## 2 2.506320 -1.82679 4.219520 -2.663210 0.722739 6.76211 4.321160 5.56285
## 3 -0.176634 -3.29138 3.994920 -3.303390 -3.444090 4.72517 2.293660 1.81743
## 4 1.702520 1.34122 -0.943790 0.525896 -0.857543 -2.31975 -2.554890 -1.97683
## 5 -0.245045 -4.12400 2.278240 -4.605470 4.697470 1.25214 3.550810 7.89109
## 6 3.673660 -1.73236 1.017960 -2.620330 -0.748756 4.38833 -0.176724 5.28283
## PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30
## 1 3.2310000 -3.782110 0.343321 3.452750 -0.397645 8.59238 -6.38829 2.67176
## 2 0.1545930 -0.973830 0.824802 5.002070 -1.803110 7.84339 -2.56195 2.51685
## 3 -0.3103470 0.104312 0.789867 5.413510 -3.317490 11.40880 -3.64877 3.30967
## 4 0.0302859 -0.871619 2.124920 5.503500 0.533652 -1.33150 -3.21115 2.07990
## 5 5.7531100 2.564260 -1.794590 -0.455631 -0.109703 12.61800 7.14574 -8.21520
## 6 2.0990500 -0.298802 -0.988705 7.396710 0.401363 5.97424 -9.19427 -3.69260
## PC31 PC32 PC33 PC34 PC35 PC36 PC37 PC38
## 1 6.86863 -1.089500 -4.44339 6.67840 -3.42491 -4.82034 8.93957 -4.88268
## 2 7.74545 2.853570 -3.56519 4.42324 -1.18201 -3.85741 4.39299 -6.69470
## 3 8.60304 6.357500 -7.46373 4.15771 -4.83301 -2.46457 7.54084 -8.38108
## 4 -4.69375 -0.207922 2.10898 2.65389 1.70503 5.73929 -4.27603 -3.66764
## 5 -6.60411 4.734720 -12.04430 4.07524 -3.22002 -2.26138 4.88504 -6.55756
## 6 4.72237 -0.926283 -3.27779 5.47676 -1.57894 -8.10418 7.07028 7.59312
## PC39 PC40 PC41 PC42 PC43 PC44 PC45 PC46
## 1 -6.78503 -4.42121 3.59483 -2.665010 -3.630720 15.46030 -3.96144 -6.4089000
## 2 -7.26412 -5.50867 7.42830 -2.154760 -0.773832 7.63320 -1.18746 -6.5392000
## 3 -5.62695 -5.01390 8.26299 1.203870 0.940659 1.56725 1.96505 0.5932230
## 4 -8.27641 -2.11220 7.04648 -2.259360 3.718930 1.64267 -7.67578 -0.5220810
## 5 -2.90173 0.32610 -3.06623 -0.994856 -8.462040 9.02437 1.05418 0.0444795
## 6 2.13255 8.45184 -12.27810 3.350630 0.377129 13.13020 -2.84936 -3.6098600
## PC47 PC48 PC49 PC50 PC51 PC52 PC53
## 1 -3.71097 -1.082220 -2.766790 7.672670 12.30180 -3.1054600 1.924130
## 2 -2.01765 1.189410 -5.822020 4.647830 4.41900 0.0153929 -0.433726
## 3 3.39743 0.471181 -5.686080 0.208139 1.09238 0.1296940 7.671860
## 4 7.34806 0.639311 -7.172720 -1.552730 1.99029 -1.8731700 -3.060680
## 5 -5.90926 4.810760 1.229390 1.441610 6.24433 -0.0726317 0.420500
## 6 -4.74439 8.944300 -0.487939 -3.048420 2.38544 0.9313740 -7.630770
## PC54 PC55 PC56 PC57 PC58 PC59 PC60
## 1 -0.683836 3.345480 17.363200 -1.485730 -0.0259309 1.56506 1.23083
## 2 -4.033930 2.998030 13.485500 -4.554240 2.5985500 3.97646 -2.31647
## 3 -3.867050 -0.788444 11.591500 -0.836219 2.9079800 4.70301 1.73630
## 4 4.549290 1.211220 -0.138582 -1.870110 -0.9434830 -12.62790 -5.61055
## 5 0.181434 4.545250 3.638140 -0.995323 -2.4282200 -3.61029 8.37504
## 6 -5.363230 -2.720780 -2.805080 5.570260 -0.5126940 8.06162 3.68012
## PC61 PC62 PC63 PC64 PC65 PC66 PC67
## 1 12.36460 8.14968 4.424100 0.675101 -4.946100 0.666866 -0.539953
## 2 7.21885 5.87628 2.239920 -5.329600 -4.809330 0.698679 -3.867710
## 3 10.40760 5.37401 5.918490 -2.324100 -5.898300 -4.353950 -3.038200
## 4 3.67099 2.43420 0.184676 2.806220 -6.231890 -3.045850 1.087490
## 5 4.05643 1.76671 6.028750 1.079830 -0.638482 -0.808931 -8.573490
## 6 -12.93130 -12.34680 -1.453650 -9.402500 25.534300 2.088730 3.960290
## PC68 PC69 PC70 PC71 PC72 PC73 PC74
## 1 3.626120 3.15212 3.178980 -0.322655 8.34466 -0.0736454 3.20536
## 2 6.192960 -1.63845 5.300660 -6.972940 10.38190 -3.4281300 2.21707
## 3 0.790238 -1.59328 10.640200 -4.497410 7.29813 -3.0353400 5.61416
## 4 -0.308131 -3.43526 -4.851840 -4.036010 8.68017 -4.5135500 -4.00738
## 5 4.745270 6.12391 0.487623 -5.248270 2.22806 4.2850400 -1.88139
## 6 5.207100 4.08905 -9.177160 14.790700 -10.49410 -14.7545000 -1.28016
## PC75 PC76 PC77 PC78 PC79 PC80 PC81
## 1 1.560740 -1.1318000 3.079280 -2.294070 0.330213 -0.849971 0.23262
## 2 2.419960 -3.5366100 -2.952500 -0.300429 1.180380 -3.288850 2.46454
## 3 4.474900 -3.2787500 2.369970 -4.133800 6.488080 -0.821653 -1.46916
## 4 -2.897600 0.0746116 -6.816260 -1.939580 4.969250 -7.470980 1.76947
## 5 -0.452728 -3.6357600 -0.414089 -4.228540 2.659440 -3.243120 -6.26291
## 6 3.161660 7.8113200 21.747800 2.844890 -10.529600 -1.050340 -10.94520
## PC82 PC83 PC84 PC85 PC86 PC87 PC88
## 1 3.676170 -2.114840 3.300130 9.77398 -0.501383 4.24391 2.821420
## 2 -2.625270 5.738870 7.397860 7.17553 -3.859310 4.76020 -4.966830
## 3 1.386140 4.294830 10.229400 1.45990 -6.178910 -2.86423 -3.815870
## 4 6.702490 2.631350 -0.272547 4.98237 -2.535640 2.31096 -3.023310
## 5 0.504438 0.151063 -0.428986 6.41210 4.387500 4.84532 -0.855451
## 6 -10.615300 -3.035320 -1.516010 -3.19946 -3.781570 -2.63837 -1.979470
## PC89 PC90 PC91 PC92 PC93 PC94 PC95
## 1 0.881157 -1.591860 5.38173 2.853400 2.381060 -1.304460 -5.221110
## 2 -4.828930 -4.081890 -1.09502 -0.126301 3.998150 5.205870 1.319620
## 3 -6.985200 -5.026990 -4.65174 -5.034190 0.994824 12.556600 4.545030
## 4 -5.524060 0.811997 1.02417 -6.209200 -0.934143 3.319320 -1.919300
## 5 -1.640050 1.103730 3.49897 -2.529840 2.613670 3.429880 -5.707880
## 6 -9.966890 -12.861300 5.42910 0.589571 2.157570 0.951197 -0.826276
## PC96 PC97 PC98 PC99 PC100 PC101 PC102
## 1 1.82553 2.370550 1.8149700 -0.9901620 -2.541920 -0.679209 -3.58714
## 2 -2.39237 1.638400 4.4738900 2.6497300 1.116190 -1.692700 1.95229
## 3 -5.22245 10.461600 -0.8455530 0.0263679 0.505576 -7.371870 1.94227
## 4 3.89914 4.791710 1.3550500 1.6973700 -1.274580 5.993940 -5.88560
## 5 -2.82533 0.932514 5.2839700 7.5794400 3.857620 1.928860 1.70852
## 6 -4.32066 -0.115444 0.0519455 -3.6473500 0.416684 2.038670 3.52975
## PC103 PC104 PC105 PC106 PC107 PC108 PC109 PC110
## 1 -1.242230 1.37588 -0.282432 1.009850 4.5056900 3.105340 -3.378790 3.24386
## 2 -0.388432 -4.76404 0.346802 -2.296200 -5.0945900 0.967458 4.496360 -1.54846
## 3 5.974930 4.55647 -1.570030 -0.563826 -0.1233150 2.944420 -1.330780 -6.44084
## 4 0.486571 -5.57529 1.298180 -5.440210 0.4837610 3.749750 0.621117 -6.45818
## 5 5.042380 -3.03734 12.192400 3.364350 0.0474162 1.490450 -3.737110 6.32788
## 6 -4.821870 -4.92392 -1.975290 2.803510 6.6819200 4.907960 3.215450 -5.80926
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## 1 1.831410 -3.56423 -0.829310 -6.09407 -1.41875 3.763020 -1.236830
## 2 -0.123996 2.05858 2.182990 -5.22162 -2.11596 -0.160551 1.039900
## 3 -2.496910 -2.32711 5.238510 -2.03523 -2.69885 7.127050 0.156205
## 4 1.704910 10.35100 -0.742679 7.69977 -1.23606 -3.155350 11.828700
## 5 5.810880 7.21787 3.713050 -4.69280 3.66361 -2.726250 -5.317630
## 6 8.105620 -10.90690 -6.465960 -1.26854 -1.50106 -5.159560 8.907690
## PC118 PC119 PC120 PC121 PC122 PC123 PC124
## 1 -6.16092000 1.699850 0.186924 -4.391350 3.11780 -0.422223 4.95131
## 2 -2.08942000 -1.546120 3.111010 -0.949337 -1.59902 1.958100 1.83929
## 3 -0.34188400 0.749496 2.910160 1.304050 4.03203 -1.654230 7.92005
## 4 -2.17334000 -4.921240 1.410030 -7.839490 4.07305 -2.265450 -8.38001
## 5 0.00684361 -3.118290 5.235230 6.032060 7.83041 -1.207320 -0.43200
## 6 4.07670000 5.702270 -3.817560 6.174360 -4.08069 -2.418970 -0.97293
## PC125 PC126 PC127 PC128 PC129 PC130 PC131
## 1 0.185832 0.261899 -6.282550 5.91919 6.23795 7.0226900 2.535890
## 2 -0.580664 5.848520 -1.924010 8.06666 -2.68879 -1.5571300 0.298089
## 3 7.351420 6.280290 -0.995556 7.52393 -5.38622 -0.0920857 -5.297550
## 4 -5.969310 -6.378260 -6.014520 -4.39762 3.20391 7.1223200 4.967480
## 5 -10.860400 -6.738010 0.320248 3.82667 -1.24171 -3.7832800 5.202910
## 6 -1.569440 5.852960 -0.186841 3.44772 -1.20364 -0.4914640 -0.200503
## PC132 PC133 PC134 PC135 PC136 PC137 PC138
## 1 -7.091640 -4.09787 -0.9431710 3.730210 -0.179847 -2.858830 1.118670
## 2 -0.353264 -6.52274 0.0324326 3.545020 0.543437 -0.982884 -3.196480
## 3 1.967810 -6.04081 -2.9640000 5.207930 3.043190 -2.378460 -0.665147
## 4 -2.844440 1.84523 2.1313300 1.505590 6.049240 -3.552260 -10.050800
## 5 -3.937810 9.15740 -4.9289600 -0.265273 0.282665 -2.600760 -0.921349
## 6 -6.911200 1.92793 1.3839200 -0.159293 -4.199880 -3.750490 -2.533260
## PC139 PC140 PC141 PC142 PC143 PC144 PC145 PC146
## 1 4.263590 6.099980 3.22012 -8.334070 1.299490 7.02878 0.628793 1.854180
## 2 -0.703574 7.821730 -2.22026 -1.010210 1.234940 -3.78188 2.323930 -3.586940
## 3 -2.674500 9.179140 -1.15101 -2.484550 -7.111730 -1.52159 6.152850 -8.405590
## 4 -5.241910 3.592440 5.13412 0.565826 -1.087230 -4.42264 1.112270 -0.866679
## 5 -4.036760 0.360688 -2.64851 -1.479770 3.857360 6.19061 -1.356600 2.374540
## 6 5.005410 -0.217569 3.18881 -5.517830 0.507129 -2.31484 5.017880 1.484370
## PC147 PC148 PC149 PC150 PC151 PC152 PC153 PC154
## 1 -3.169590 3.90917 5.48849 5.5814600 2.969340 -1.10906 0.375404 -1.01594
## 2 0.941444 -1.69402 -1.08083 -0.8090390 5.699030 1.34852 -4.540270 -2.32031
## 3 -3.784180 -5.59783 -1.90505 -1.8694100 2.849310 5.91475 -3.382540 -2.34757
## 4 -3.553470 -3.55506 -2.91962 -0.0703231 0.377022 1.92900 1.305020 -7.54519
## 5 12.438800 6.40531 -1.03165 3.5773900 4.051250 0.97918 0.427298 6.82716
## 6 -7.824840 -2.85133 2.15418 5.0165100 -2.573520 -3.33185 1.175740 5.80516
## PC155 PC156 PC157 PC158 PC159 PC160 PC161 PC162
## 1 -3.194590 6.344740 -4.716410 5.56698 -0.229742 -1.12558 -2.78257 -1.1763900
## 2 -2.299270 4.739130 -3.311780 -3.87053 -5.494820 -1.99021 2.77757 -6.3429000
## 3 0.131539 -0.691950 2.945900 -9.90679 0.222635 -4.41852 4.96841 -0.5344060
## 4 6.693760 -0.412623 -1.045450 2.78292 0.401828 -1.29800 -2.12093 0.0229272
## 5 0.645226 6.898780 -0.239179 -1.87662 -7.712930 -8.70063 -7.45579 3.9954700
## 6 -8.188180 -0.502378 -0.606133 8.85862 -6.430770 3.95163 3.67135 4.9203100
## PC163 PC164 PC165 PC166 PC167 PC168 PC169
## 1 2.407920 0.504277 -6.207460 -1.821530 -1.46783000 -2.2010600 0.284804
## 2 0.160678 9.304710 -2.631630 -5.545960 -0.14424700 -5.9512900 4.902430
## 3 1.266740 0.489838 -7.443820 0.357423 -0.00106332 -4.8939300 0.550162
## 4 1.272300 -9.188160 5.111440 -3.451410 -3.36530000 -1.3892100 -3.437740
## 5 -5.355580 3.933350 0.553361 1.089580 4.15026000 -0.0174637 0.393706
## 6 2.439100 1.310280 3.313470 5.025000 3.10215000 1.9033900 1.265480
## PC170 PC171 PC172 PC173 PC174 PC175 PC176 PC177
## 1 -2.06408 0.249062 3.206130 2.51127 -1.57129 2.806100 2.328480 0.940548
## 2 -2.31194 -3.141630 0.382582 2.01567 5.37382 0.446184 0.977171 1.229680
## 3 -1.36361 -0.628543 -3.132420 5.85005 -1.13194 -4.795800 -0.509625 -8.482140
## 4 9.70610 0.776337 10.806700 -3.03668 3.68210 2.316360 -1.512890 1.518280
## 5 2.00749 10.847100 4.886560 -2.65903 -5.96778 -5.692680 4.891790 -5.440430
## 6 3.31191 -1.190150 3.899690 -2.94469 -2.81133 -2.711750 -2.510280 3.611460
## PC178 PC179 PC180 PC181 PC182 PC183 PC184
## 1 -0.00200687 -4.65122 2.3894400 -7.303210 -2.759210 2.754250 5.157660
## 2 -0.74188400 4.72168 -1.3900200 -8.020900 -0.766446 0.408351 3.968530
## 3 1.17209000 4.64376 -4.5449300 -1.867810 -5.384910 7.567070 -7.286060
## 4 8.23867000 -7.06576 2.5939400 -2.129160 1.646700 -3.702110 -0.560026
## 5 -3.79358000 6.30434 -3.0662400 4.405560 -3.986570 -2.528300 3.393280
## 6 -0.15949300 0.36457 -0.0979605 -0.803125 -3.550490 1.204050 5.197190
## PC185 PC186 PC187 PC188 PC189 PC190 PC191
## 1 -5.122260 -2.875810 1.66973 -2.690040 -2.36216 -7.727470 3.27333
## 2 -5.199090 -1.588930 4.85938 5.565460 2.97993 -1.850640 -1.77663
## 3 3.780100 0.199628 1.42551 0.608429 -1.16493 1.421940 -0.28420
## 4 -11.393300 -8.025690 -3.60762 6.154500 -3.88123 -0.491429 4.93060
## 5 -0.683104 0.265958 -7.64458 0.818755 4.07498 -6.294480 -7.87804
## 6 3.152440 0.569911 -1.26830 -0.904214 -0.70014 -1.357030 -2.88737
## PC192 PC193 PC194 PC195 PC196 PC197 PC198
## 1 -10.970500 3.395710 2.02413 -1.57175 -3.547260 1.39563 -3.6125500
## 2 -1.240410 2.429520 -3.98737 -2.97395 1.090970 -1.02960 -0.0936227
## 3 2.198030 1.188300 1.26478 4.66170 0.524950 -2.12740 6.8943600
## 4 -0.355432 -0.614295 3.02340 -1.12641 -7.986670 -2.69594 -2.7320400
## 5 2.761850 3.336300 1.12680 -3.44725 0.689305 -1.93704 1.8109300
## 6 1.778270 0.410245 -0.27013 1.71417 -1.849740 -4.71803 0.0304301
## PC199 PC200 PC201 PC202 PC203 PC204 PC205
## 1 4.3506500 -0.1842840 -2.35063 2.435380 0.592889 -1.75159 -3.861130
## 2 -3.7102100 1.1534600 2.31505 3.295890 4.785610 -6.55696 -4.182160
## 3 -1.8342000 -0.0591787 1.43649 -1.125470 -1.176830 -5.18465 0.556054
## 4 5.9963000 4.2321100 1.46582 0.689867 -4.254750 6.18998 4.144820
## 5 -6.4662400 3.6972300 -3.45097 -5.285000 -5.222300 -5.51265 2.109890
## 6 0.0379097 1.2963200 -5.46168 -2.748510 7.776450 4.47502 4.427750
## PC206 PC207 PC208 PC209 PC210 PC211 PC212 PC213
## 1 1.29632 6.80115 3.724990 -5.679960 7.245660 1.430160 -3.92452 2.438540
## 2 -3.83494 1.33549 -3.521190 -1.570180 6.505040 -0.768251 -1.45241 2.828640
## 3 -5.16227 1.28890 1.088420 -2.014510 -1.789410 6.007320 -1.91646 -0.565721
## 4 0.99046 -1.30097 -2.315050 -5.949760 6.741770 -2.065300 2.48424 -1.596290
## 5 -1.61388 2.94504 -0.124489 -0.257321 11.537800 0.399082 -6.19094 -6.520800
## 6 1.93542 -4.99764 4.708590 -3.235450 0.620947 0.889299 -2.53503 0.928740
## PC214 PC215 PC216 PC217 PC218 PC219 PC220 PC221
## 1 4.028970 -4.886490 2.92810 5.481230 -3.43628 -8.39159 0.176600 -0.198451
## 2 -0.305079 -1.914920 -0.79662 -1.815350 -1.09437 -2.52244 -3.347810 -1.306930
## 3 -2.319340 1.040040 -1.62346 -3.834280 0.84389 1.46661 1.267470 1.828480
## 4 -0.255899 0.965714 7.46661 3.159170 5.80024 4.63414 -6.449860 1.305460
## 5 0.503550 7.551410 -2.29599 5.001530 4.75892 8.14829 4.804990 -2.737450
## 6 -3.560970 -0.402145 -2.13385 0.128978 -2.61625 4.07387 0.670273 0.929313
## PC222 PC223 PC224 PC225 PC226 PC227 PC228
## 1 3.137110 -5.921780 -3.046360 -1.67911 1.24058 -7.4721300 -0.48883500
## 2 5.360180 4.655510 -0.385307 -1.37559 -1.85719 -5.6091900 -0.26706700
## 3 -2.584560 -0.296809 5.357860 4.38550 -2.74253 -1.7924800 0.00585376
## 4 -0.462546 5.945220 1.681390 2.55162 -2.71198 -0.0619051 0.59627000
## 5 5.466080 -3.546580 8.015470 5.28112 1.35408 -0.1151420 -0.39073600
## 6 -3.499260 1.654170 -0.164668 4.23481 -1.47545 3.8106900 -0.67177800
## PC229 PC230 PC231 PC232 PC233 PC234 PC235 PC236
## 1 -0.484416 -6.591900 7.10378 1.20679 -0.953984 1.795180 -3.067130 -0.185464
## 2 5.944490 0.946733 -3.46878 -1.51164 0.568447 2.588420 -0.633609 6.386770
## 3 0.472092 2.519760 1.65108 -1.18434 -1.810870 2.063490 -0.997746 -1.331900
## 4 5.414530 -4.616250 -5.22295 3.60925 -1.318590 -5.395360 0.213695 -0.929498
## 5 1.747560 -3.011750 -3.00410 0.82996 0.798574 0.862193 2.458870 -4.114840
## 6 -1.438900 4.061510 -1.97774 6.04377 2.038440 -1.225040 -0.970757 2.346040
## PC237 PC238 PC239 PC240 PC241 PC242 PC243
## 1 -2.60627 1.724900 -3.233350 -1.8882900 -2.71458 -1.530650 -1.79883
## 2 -1.67193 -5.356220 -3.979290 -4.1170300 -2.96122 5.222450 4.06640
## 3 2.33019 -0.447266 -3.185630 0.0358097 8.08531 0.866774 -1.22115
## 4 -11.29090 6.735320 5.308680 -0.9669070 -3.06053 1.807940 4.24437
## 5 1.17821 3.636750 0.234708 -6.4126800 -5.04591 -1.722030 6.87473
## 6 -1.91458 -5.204140 2.415640 -1.8898000 -2.21064 -2.173030 -1.47902
## PC244 PC245 PC246 PC247 PC248 PC249 PC250 PC251
## 1 -0.141661 -3.046470 -5.59169 2.374930 3.7825000 7.28198 -1.39082 -2.122820
## 2 -2.937560 -3.264400 1.04026 1.934550 -4.5539700 -3.65782 -2.21004 1.590630
## 3 -0.158989 0.424500 2.31887 -0.986673 -0.6495400 -4.43763 1.94750 -0.639691
## 4 -0.387815 -4.758520 -5.30704 -0.800842 -3.1182100 -1.80576 -4.37313 -1.730690
## 5 -0.443691 2.042780 -4.16313 -2.437480 -1.8216800 1.89389 -3.22134 0.903353
## 6 2.181070 -0.869642 1.58744 3.757840 0.0782487 -2.07095 -5.05006 1.444780
## PC252 PC253 PC254 PC255 PC256 PC257 PC258
## 1 -3.1109100 -0.0989007 -3.984490 -0.380065 0.773690 5.081620 3.796560
## 2 -0.0900249 -3.1492800 0.945347 -0.316746 2.787110 4.558370 -0.148475
## 3 1.8735200 5.0584200 -0.611032 -1.920500 3.709800 -0.484981 -5.442160
## 4 -4.2312100 -7.2820700 0.406724 -0.858350 4.568340 -1.386100 2.992070
## 5 8.4939100 4.2626500 -0.487946 -0.449387 -1.182860 -0.322571 6.044300
## 6 -2.1517800 0.5775150 -0.378945 2.891990 0.450424 0.249205 0.817690
## PC259 PC260 PC261 PC262 PC263 PC264 PC265 PC266
## 1 4.554870 -0.180526 3.435920 0.225784 1.60653 3.76099 2.871990 -1.302680
## 2 1.288960 -3.907130 -1.382680 2.555630 -6.02580 1.71128 4.870660 7.403940
## 3 4.021820 -2.085190 4.568090 -1.944590 -2.48828 -3.21620 1.575480 -0.828938
## 4 -0.230136 3.909410 -3.996590 1.347380 3.39100 -2.32004 3.366520 -3.100750
## 5 -2.199270 -6.744820 -6.012390 -0.999186 3.85779 3.65402 -5.079180 0.360558
## 6 0.529356 -3.363430 -0.787424 0.286810 -1.05157 -1.30635 -0.712065 1.261690
## PC267 PC268 PC269 PC270 PC271 PC272 PC273
## 1 -0.704708 4.322350 -1.1258300 -1.179440 0.675343 3.01902 1.970360
## 2 5.160620 3.635520 -2.4468800 -0.493435 3.050280 -1.56081 -8.733250
## 3 -2.105030 -0.509731 -0.0109466 -1.482940 -2.779740 -6.08347 1.363990
## 4 4.917970 4.606890 3.2853500 -1.722580 -1.324390 1.72342 5.424210
## 5 -2.559020 1.384370 -2.3572400 1.784000 3.650560 -2.38102 -2.517820
## 6 2.473910 0.508406 2.7605000 -3.296760 0.279406 1.45649 0.701155
## PC274 PC275 PC276 PC277 PC278 PC279 PC280
## 1 1.076720 -6.456570 -4.3610100 2.232230 3.7653100 2.5232200 -2.577250
## 2 -2.348860 1.316240 -3.4896600 1.010140 3.5050800 1.3732900 1.907470
## 3 1.627800 -2.196950 5.0531400 3.953960 0.1296010 0.7704000 -1.831710
## 4 0.922302 2.941030 0.0113377 -2.077920 0.0954494 4.3804500 6.568490
## 5 -1.010390 0.577939 1.6901900 -1.417940 -0.2320480 2.0787400 3.394350
## 6 1.942790 -0.784017 1.2404900 0.799635 -1.3125100 -0.0911067 0.593806
## PC281 PC282 PC283 PC284 PC285 PC286 PC287 PC288
## 1 -1.167470 -7.32710 -1.757210 6.263120 6.68303 -2.738220 -1.282220 9.083170
## 2 6.050040 5.06460 1.867210 2.773310 3.04143 1.122050 2.125570 0.786617
## 3 -3.118240 2.85255 -1.304560 -1.839270 -3.27678 2.887690 4.543650 -4.966050
## 4 -0.755247 -2.41956 -0.177232 -0.785872 2.57769 -4.062370 -2.953830 0.331381
## 5 -0.167794 -5.00791 -0.104553 -1.051420 1.41791 -0.746397 -1.887730 0.908154
## 6 -0.419569 -2.21488 2.496790 -1.429140 -1.06806 -1.826130 0.880564 2.172490
## PC289 PC290 PC291 PC292 PC293 PC294 PC295
## 1 -2.24602 3.25972 3.101570 0.906306 3.789420 -0.418502 -0.835618
## 2 -1.97198 3.50750 0.107420 -1.740950 -1.272470 -2.388470 0.110258
## 3 3.27843 -6.82619 1.227160 -0.963813 -0.240709 0.520258 5.565670
## 4 4.31911 -2.41201 -0.813299 -2.981570 4.501630 2.012100 -0.720453
## 5 -2.20043 7.65197 -0.123663 0.949846 5.226230 -1.838180 5.166070
## 6 -1.55086 1.81220 2.017660 -0.907038 1.868770 -2.701710 3.248320
## PC296 PC297 PC298 PC299 PC300 PC301 PC302
## 1 4.8994000 -0.354399 -2.316600 1.198970 7.127490 -6.4063800 -9.698180
## 2 4.2534900 -1.765790 4.649910 -0.824233 -6.574470 -5.6218900 3.017050
## 3 1.0221600 1.172740 -0.620871 1.926090 1.182940 1.2398100 -1.562080
## 4 3.5535700 2.434290 2.813040 -0.250536 -2.751430 0.4080740 4.008350
## 5 -3.9412800 7.063680 1.152600 1.067250 0.957400 6.0361900 -5.040300
## 6 0.0479087 -0.334840 1.281160 -1.611780 0.510062 0.0803322 0.585129
## PC303 PC304 PC305 PC306 PC307 PC308 PC309
## 1 -1.244670 -0.0359111 3.494620 5.136460 -1.0652500 -5.897530 -2.196240
## 2 0.880336 -0.2842230 -5.833970 3.408550 2.8466200 -0.746355 3.100230
## 3 -3.920340 -2.6214400 5.745940 0.144089 -0.0390333 2.352300 0.797683
## 4 -3.766600 2.7632300 -2.127930 -1.375800 6.4100900 -4.232740 5.516660
## 5 2.002890 6.0349300 -2.486370 2.179430 9.5686900 -1.446080 -2.554080
## 6 2.285350 -0.0504606 -0.860408 1.275890 -0.3145560 0.263111 3.049160
## PC310 PC311 PC312 PC313 PC314 PC315 PC316
## 1 -4.529300 -0.596955 -6.022760 -1.98718 -3.854230 -4.458500 4.820600
## 2 -1.279880 -2.394470 -2.733360 2.29759 6.911940 2.783710 3.275130
## 3 1.875160 -0.843038 -3.966130 -3.25928 -0.861628 -1.571350 4.746350
## 4 -1.148870 -1.579430 2.904310 2.57539 -1.109820 -0.791526 -0.364149
## 5 -5.018320 1.121770 7.693330 -1.24061 0.587698 -0.272952 -1.099740
## 6 0.552704 1.666260 0.578342 -1.21995 1.913870 1.801810 -0.304175
## PC317 PC318 PC319 PC320 PC321 PC322 PC323
## 1 2.317450 -3.031970 -3.967150 -3.718840 -1.18691 3.754420 -2.247830
## 2 6.108240 0.961631 -1.638030 1.563570 2.16924 -3.449600 4.716840
## 3 1.737610 0.195350 4.031240 -1.256110 -4.96456 -0.360506 0.182904
## 4 -1.039470 0.732611 -1.700800 1.173080 4.33882 3.042740 -2.074310
## 5 -2.445130 -2.092750 -3.781580 7.753250 1.51262 -1.001810 0.216068
## 6 0.484572 0.124960 0.254165 -0.590667 -1.23423 1.284490 1.421910
## PC324 PC325 PC326 PC327 PC328 PC329 PC330 PC331
## 1 -2.407790 3.52872 3.019310 -5.046920 1.570320 3.542260 -4.17224 -0.764133
## 2 2.885370 1.60306 5.788690 -0.411278 -5.938110 -3.153700 1.52610 8.171960
## 3 3.217630 -2.05684 2.295780 2.350170 7.088650 -2.384220 -6.56339 3.529360
## 4 3.601730 3.30243 0.271778 -0.458454 -0.321689 -1.276820 10.04940 -5.558870
## 5 -1.756110 7.21950 -7.370730 0.722412 -1.418190 4.488210 5.81059 -1.090410
## 6 -0.140333 -2.03585 -0.620517 0.324934 2.739080 -0.184163 -1.30837 -1.856670
## PC332 PC333 PC334 PC335 PC336 PC337 PC338
## 1 2.403320 -3.4056100 3.337870 4.087050 -0.919898 1.374020 -0.956720
## 2 0.186044 -2.6618000 -0.863452 -0.624972 2.173400 1.361700 -3.670690
## 3 -2.177060 -0.5525700 -3.144710 2.214470 -2.243890 -1.964570 3.324350
## 4 2.962830 1.4213600 -1.762620 1.362520 4.203550 -1.478030 -0.558561
## 5 0.588064 6.0416500 5.217510 -3.452600 1.496190 0.221104 -5.046460
## 6 -0.592151 -0.0223541 -1.813180 1.348970 -0.975891 -1.191660 -2.384790
## PC339 PC340 PC341 PC342 PC343 PC344 PC345
## 1 -0.880226 -1.21509000 3.04682 -2.4142100 -5.935350 -1.983460 -4.654590
## 2 -0.605932 -0.00555504 3.36160 -2.1675600 1.394030 -1.566820 2.968850
## 3 4.231470 -4.78404000 3.28742 -0.0250219 0.753464 -4.882620 -1.118300
## 4 -2.447590 -4.44271000 -1.14934 8.4456400 2.939360 -0.727864 -1.694790
## 5 -2.852770 -1.32246000 1.09861 -0.1515270 6.350420 7.626440 0.222928
## 6 1.691910 2.79154000 -3.56493 4.8764800 1.760920 1.908600 0.218869
## PC346 PC347 PC348 PC349 PC350 PC351 PC352
## 1 -0.580142 -5.47046 -2.364440 1.107250 4.27404000 -0.412252 3.67231
## 2 1.987670 4.96409 11.246600 -1.562750 5.19936000 -0.521341 -2.80807
## 3 -4.398510 -1.68173 -0.847626 0.085437 0.46251200 -2.057560 2.52434
## 4 0.857038 2.08038 1.765370 -1.220690 -1.86166000 3.361190 1.92769
## 5 -2.248230 -3.16584 -7.307590 -0.262214 -5.46398000 1.412240 1.14520
## 6 2.221290 2.57069 1.317720 0.953769 -0.00655931 0.540784 1.76074
## PC353 PC354 PC355 PC356 PC357 PC358 PC359
## 1 1.871810 -6.511980 -0.661564 3.378200 -4.47432 0.941510 -2.664150
## 2 0.063551 -2.677530 -1.001650 -4.270190 -2.68414 -0.182748 0.708255
## 3 -1.812180 6.216740 3.139380 4.413630 -1.97647 5.228330 2.181090
## 4 -0.700273 -5.511250 -4.511440 1.528640 -2.73488 0.625332 -3.168350
## 5 -4.372240 -0.261217 0.172529 4.927270 1.26310 -3.894150 3.667970
## 6 -1.519310 4.241000 -1.619820 0.725635 -1.29752 -0.255093 -0.706519
## PC360 PC361 PC362 PC363 PC364 PC365 PC366
## 1 2.231740 4.099790 -8.543030 -1.588750 -1.157290 -3.5193500 -2.105260
## 2 1.397130 1.526930 -5.251060 5.153830 -5.638690 -4.5892300 7.809180
## 3 -5.802150 -0.321080 1.538390 -1.007440 -0.692839 2.2761400 3.638130
## 4 -0.785163 0.439312 -4.591540 6.905630 2.763150 -3.7990900 -4.649150
## 5 3.697240 1.214710 -0.744555 -0.815993 5.262620 2.7329000 -5.562210
## 6 -0.707137 -1.655810 0.165163 1.824010 0.922399 0.0216574 0.838715
## PC367 PC368 PC369 PC370 PC371 PC372 PC373
## 1 -1.6720900 4.487680 -4.314740 -3.528740 2.225810 -4.667440 1.028090
## 2 0.1706070 0.427944 -8.202190 1.707700 -0.317252 2.776320 4.323410
## 3 1.1027500 1.074630 0.550941 -1.084960 0.709983 -1.953010 2.460610
## 4 -1.0847500 -0.633538 2.947630 1.980230 -0.123283 3.328190 3.106650
## 5 4.8595200 -0.519760 -2.005060 3.654230 2.527530 0.186395 2.013940
## 6 -0.0637026 0.906861 -0.779790 -0.610078 -2.564690 -0.551038 0.510095
## PC374 PC375 PC376 PC377 PC378 PC379 PC380
## 1 7.02319 1.738220 6.596490 -0.8094320 -2.618450 -3.024410 -1.680290
## 2 -7.89084 -0.738554 -1.583800 -1.5258300 -3.943280 -1.357440 -6.709730
## 3 4.21677 -2.482110 -4.478720 -0.0608105 0.197405 0.299719 1.448730
## 4 -3.33134 2.823940 -4.081030 -2.2697700 -3.363470 -1.840450 4.161030
## 5 2.88541 0.485885 0.580940 -1.7494500 6.343700 2.654310 4.209810
## 6 -5.10864 1.730910 0.818387 -0.3118950 -0.834581 -0.560287 0.823605
## PC381 PC382 PC383 PC384 PC385 PC386 PC387
## 1 1.224140 -4.46823 4.560820 0.336982 3.222050 -0.128295 -3.128460
## 2 2.085980 -2.11413 -3.411630 6.556230 3.376970 0.324062 2.453470
## 3 4.953880 2.79445 2.917030 -2.244600 -0.713586 -0.365991 0.134078
## 4 0.742917 -3.21043 -0.491023 5.074570 0.285338 2.363070 -4.858370
## 5 -8.116330 4.33748 1.892020 -4.607370 -0.826007 2.118930 -1.398930
## 6 1.033700 -1.65589 -1.777830 -3.430340 -0.767556 1.481900 2.696470
## PC388 PC389 PC390 PC391 PC392 PC393 PC394
## 1 0.939928 0.933622 0.669828 2.5207300 2.17117 -2.860700 6.196000
## 2 -0.037260 1.490700 0.419819 0.1439770 -4.44610 -2.577600 -6.390070
## 3 -3.199150 -5.518490 3.181760 -2.8136800 1.66992 2.044430 -0.187200
## 4 1.497960 -0.326636 -3.596580 2.3397100 3.57688 1.227180 0.812500
## 5 6.972460 -1.416870 -3.069460 -2.8932200 -5.67699 -4.580810 2.630930
## 6 0.110215 -1.793140 0.266512 0.0441029 2.89785 -0.887878 -0.778045
## PC395 PC396 PC397 PC398 PC399 PC400 PC401
## 1 -2.982830 -3.082900 -1.915160 0.805651 0.620796 4.94050 2.0283400
## 2 -3.178690 3.264820 -0.390678 5.573150 -1.369010 2.10913 -1.7665300
## 3 4.499490 -1.544360 -0.836570 4.431710 -5.337080 -4.37793 0.2483010
## 4 5.035120 1.209630 0.238963 -3.609800 5.509770 -1.35605 0.7613310
## 5 -0.591322 -2.202240 -2.334450 -0.538381 0.351640 -9.57576 -0.0717029
## 6 0.426598 -0.399882 2.091850 2.806660 -0.307129 1.51197 0.2879930
## PC402 PC403 PC404 PC405 PC406 PC407 PC408 PC409
## 1 -4.114920 -1.97979 -0.808980 -1.91079 4.511750 -3.82357 -0.799932 -0.149239
## 2 -0.183011 2.31346 3.158090 -6.78478 2.630460 -6.62612 -7.284580 -1.642940
## 3 1.537450 5.09322 -0.578961 5.01701 -6.217660 2.83876 -1.389460 -3.389160
## 4 -2.623490 2.60056 0.689956 5.17085 -0.133193 -1.47182 -2.351500 2.767990
## 5 -2.331520 -6.66658 2.385440 8.46534 -7.742310 2.81319 3.648790 -2.183510
## 6 2.455320 1.00331 4.438930 1.72310 0.462323 -1.11461 -1.513510 0.549190
## PC410 PC411 PC412 PC413 PC414 PC415 PC416
## 1 1.180210 7.201860 5.997450 0.796744 6.778560 -6.71034 3.086720
## 2 -2.246620 3.939280 1.488150 -3.055610 -2.258990 6.62703 -2.355380
## 3 1.748630 -0.637145 1.767430 -1.559500 5.161070 -2.28945 -2.964570
## 4 -0.186485 2.244710 -4.674540 4.427060 3.618180 3.84153 2.612780
## 5 -6.562460 3.455780 -0.511899 -1.848520 0.333890 0.88684 -8.437590
## 6 1.467220 1.914170 -0.209509 -3.444480 0.877921 -1.44130 -0.203141
## PC417 PC418 PC419 PC420 PC421 PC422 PC423
## 1 -5.270000 2.669880 -2.737900 0.0972643 -1.743620 -1.627760 5.633320
## 2 5.311830 -1.166070 -5.028760 2.5614900 0.544232 5.968320 -0.237397
## 3 -1.070970 6.167360 -1.685960 0.2800240 -0.776307 -2.840230 3.627820
## 4 -1.219700 -7.430310 1.134160 5.7804800 0.770270 -1.164310 -5.490850
## 5 0.311861 2.239030 2.096780 -3.4352900 5.097890 -3.305730 1.374780
## 6 -0.633924 -0.160888 -0.692334 -1.4510800 -0.791833 -0.747096 -3.763220
## PC424 PC425 PC426 PC427 PC428 PC429 PC430
## 1 -1.059930 1.189740 7.897540 -1.28273000 3.842900 -2.966280 0.098208
## 2 4.908080 -7.822090 -3.670350 0.00254577 -5.044020 -1.718650 2.294610
## 3 -4.824760 0.765343 -2.504620 2.69992000 4.290130 -2.310330 -3.080490
## 4 0.409329 3.696870 -5.035110 -9.54874000 -4.796550 -3.665010 4.221900
## 5 0.175582 -3.201250 -0.861944 3.02739000 0.143382 -1.756010 5.042660
## 6 -1.932680 2.291090 0.951139 1.05760000 -1.540200 0.810951 2.420680
## PC431 PC432 PC433 PC434 PC435 PC436 PC437
## 1 -6.697050 1.685660 -0.243431 -2.448690 3.431410 4.924440 -0.557961
## 2 2.249520 1.899320 -0.886325 -2.881820 4.404680 3.758520 2.652000
## 3 0.561962 -0.963899 -3.753970 0.255719 4.539240 -1.637550 1.301970
## 4 -7.683000 -5.204160 2.251000 2.005980 -11.448100 -4.724820 -3.365900
## 5 0.568323 1.611650 4.376980 1.930420 -7.120900 -3.577540 0.190141
## 6 2.721070 -0.976344 -0.615873 0.295888 -0.452162 -0.825643 0.570102
## PC438 PC439 PC440 PC441 PC442 PC443 PC444
## 1 2.027550 -1.307050 6.075990 2.392040 -5.98478 3.875640 -1.5105900
## 2 1.606860 0.106339 1.904550 1.165110 -3.41790 5.343510 -2.1010700
## 3 -0.804997 -0.222015 0.398814 -2.446710 -3.29946 0.474361 -0.1870860
## 4 1.322950 2.981560 -0.178201 0.445028 -3.96408 1.481280 -6.1553900
## 5 1.096330 1.982040 -3.747420 2.849590 1.18340 -2.205130 6.1859400
## 6 -1.661220 0.460001 -4.163780 1.734380 -1.60864 1.425830 0.0177583
## PC445 PC446 PC447 PC448 PC449 PC450 PC451
## 1 0.0543639 -3.483880 -1.758580 5.8307800 -12.800700 8.665420 2.57629000
## 2 0.4838180 0.842091 -1.087880 8.0486200 4.420650 -5.093200 0.72606300
## 3 4.9778000 3.322760 -4.199770 3.2852100 5.431240 -0.278039 3.61754000
## 4 1.0217200 -2.943360 -1.568410 3.5185400 3.588530 1.165150 1.37150000
## 5 -0.2619590 2.436830 -2.134500 -6.2773600 -0.162487 -7.516650 0.00819288
## 6 -0.5146880 2.233280 -0.798793 -0.0443748 0.574122 -2.278650 -2.66079000
## PC452 PC453 PC454 PC455 PC456 PC457 PC458
## 1 -2.663170 1.336180 -3.876230 0.128933 1.647270 -2.4688800 -0.747510
## 2 0.464246 -0.880251 4.770560 1.805550 9.946890 -0.2074640 0.724752
## 3 -3.159810 1.534980 -1.749210 -2.494610 -1.520250 0.9077070 -4.305340
## 4 0.709957 5.805050 1.884610 1.677390 -0.649724 0.5085680 -2.370760
## 5 2.752160 -0.550547 -5.337650 0.399748 -5.600140 0.0219237 4.681990
## 6 1.325480 0.154809 -0.385594 -0.776716 -0.788787 2.1430000 -1.536190
## PC459 PC460 PC461 PC462 PC463 PC464 PC465
## 1 2.270570 -5.4933200 6.461500 -3.33142 3.142090 -12.198700 3.91248
## 2 2.549500 4.7613800 -3.256860 4.54163 2.767550 1.899950 -3.89990
## 3 -1.951570 0.0337128 6.954090 5.37897 0.262643 -2.775460 8.30044
## 4 0.219423 -1.0728000 6.479310 4.85522 -2.139290 -0.349628 4.27176
## 5 10.069900 2.1485000 0.896402 -4.57293 2.335340 -2.821520 -4.37676
## 6 -0.524228 -0.5470630 2.378640 2.67134 -0.211221 1.420820 1.73797
## PC466 PC467 PC468 PC469 PC470 PC471 PC472
## 1 3.628020 -4.0244400 -8.1711400 0.310744 -0.8074060 2.912020 -2.53329
## 2 0.418647 10.4508000 -1.0979000 -1.964110 -0.8672260 -4.281280 0.76241
## 3 6.141740 -5.1453900 8.6116400 -5.823570 3.9905600 0.883324 2.46051
## 4 -2.623500 -6.0081400 2.5242500 7.108670 -3.1207300 -4.661080 -1.30211
## 5 -1.183000 0.0101678 -0.0661312 -1.290870 5.4439800 -6.449400 2.27396
## 6 1.824960 -1.2615100 -0.2402270 1.131610 0.0380535 -0.192693 1.20366
## PC473 PC474 PC475 PC476 PC477 PC478 PC479 PC480
## 1 1.013180 1.6745700 2.96890 -5.48375 -6.103410 -4.61101 -2.322810 0.412452
## 2 2.673770 0.0147722 -4.50127 2.04992 3.102240 2.99153 7.120000 0.830942
## 3 3.358100 -3.4148800 1.81465 3.03073 -0.987833 -2.96006 0.594713 0.195196
## 4 1.966060 -3.9964500 -2.79979 -1.95266 0.727856 1.33037 -2.553200 5.371020
## 5 4.215990 1.6974900 -2.43417 -1.75712 1.000220 7.04984 -0.121104 0.675001
## 6 0.760275 0.5023750 -1.60718 2.58132 -1.260870 -1.52995 2.888590 -0.886037
## PC481 PC482 PC483 PC484 PC485 PC486 PC487
## 1 3.554580 -1.461740 0.0914599 -3.2171800 0.655798 3.843490 2.011310
## 2 7.372670 -5.650640 3.4037300 -0.0880227 4.858760 3.624130 -1.305790
## 3 -2.255540 3.775280 1.3882900 -1.7515100 -0.668557 -3.536550 -1.459290
## 4 1.432980 5.346770 -2.6040100 1.6998900 -1.660010 6.209710 -0.313448
## 5 0.450003 -2.554510 -0.2082930 -1.0672200 5.025500 0.353257 0.968105
## 6 3.231290 -0.392056 1.5970900 -0.1756650 -1.125020 1.700380 0.606852
## PC488 PC489 PC490 PC491 PC492 PC493 PC494
## 1 3.802490 -4.42576 -2.72845 7.621920 -4.325580 -0.3197550 3.667250
## 2 -0.273779 4.25418 -2.62627 -8.714900 2.932970 0.0685562 0.559071
## 3 1.703190 2.56347 1.85937 -0.324487 1.980840 1.2350700 -6.113290
## 4 -1.095370 -2.46304 -2.46801 2.913660 -3.140380 0.4899490 -5.079530
## 5 -2.027000 1.03381 3.38786 -5.432900 1.385940 -2.4002500 -3.183000
## 6 -0.680082 2.57602 -1.26008 -2.528290 0.896173 0.3928170 -1.163320
## PC495 PC496 PC497 PC498 PC499 PC500 PC501
## 1 2.152860 5.7872700 -6.75241 4.888950 10.529200 0.598650 5.330280
## 2 -0.160883 -2.1484100 4.78026 -5.771790 -3.471150 0.939116 -2.290020
## 3 6.756230 -0.0277912 1.17952 6.194580 -0.993496 0.656219 4.909960
## 4 4.118470 -1.4353600 2.49865 3.763830 -2.638930 1.726980 -4.746170
## 5 -4.630090 -0.3568730 -1.52424 0.184014 7.265410 5.768670 -0.799854
## 6 -1.031990 0.3377750 2.11723 1.698640 2.682820 -1.489850 2.085970
## PC502 PC503 PC504 PC505 PC506 PC507 PC508 PC509
## 1 1.147590 -6.18349 2.441420 2.589680 0.945649 -0.938806 5.70242 -1.472610
## 2 0.584227 -3.77973 -0.269226 5.746340 -2.132790 -6.135870 -3.78667 -1.808490
## 3 -1.617680 3.19440 -0.131290 -3.426400 2.990390 -1.383630 -1.41255 -1.592690
## 4 -2.249930 6.06621 -5.229590 -3.874520 -0.932163 -4.240970 -1.71753 5.178590
## 5 4.279080 -2.11966 -0.039880 2.324150 -6.198930 0.201677 -1.85836 3.175350
## 6 -0.180856 2.28419 0.209238 -0.384053 0.195751 1.556930 1.70532 -0.573656
## PC510 PC511 PC512 PC513 PC514 PC515 PC516 PC517
## 1 2.49245 -0.170717 -0.65352 -1.421020 -3.11407 -1.79039 -2.01058 0.322580
## 2 -4.53870 6.674060 5.31818 5.727520 -3.67606 -0.36751 -3.44218 -1.265880
## 3 -1.75669 -0.131597 -2.17060 -2.524800 -2.97285 -2.59630 3.07011 3.589120
## 4 7.61641 -0.945926 -7.50184 -0.244245 1.73440 -6.08824 2.38231 1.054690
## 5 1.51179 -4.933460 1.81896 5.225040 3.30415 3.05736 1.43495 -2.695320
## 6 1.69116 -2.429450 1.22643 0.605065 -0.58304 1.19657 1.36024 -0.223375
## PC518 PC519 PC520 PC521 PC522 PC523 PC524
## 1 -0.563165 -2.154070 -0.845365 -0.6483500 1.83221 2.88531 -1.725740
## 2 0.301268 -3.229540 0.873406 1.7891300 -5.46300 7.75819 2.325860
## 3 -0.767775 -0.220158 -4.583340 -3.5858600 -1.04207 -5.68721 -0.233143
## 4 1.791160 1.734350 -2.313860 1.5824700 3.74753 -1.16347 0.521510
## 5 -1.746250 -1.497070 0.906723 2.1055100 -2.84490 -2.15630 0.213521
## 6 -1.645620 -0.315717 -1.094820 -0.0148446 -1.29688 2.53103 0.295172
## PC525 PC526 PC527 PC528 PC529 PC530 PC531
## 1 -2.1186300 1.580550 -0.393534 -3.340450 0.7839050 -4.66696 -3.14469
## 2 -0.0507085 0.426393 -0.383365 -4.232440 2.2558000 3.93817 2.57636
## 3 -1.5023100 -1.351400 -0.664369 0.516294 -0.9451710 3.92352 -1.64968
## 4 0.7228330 -5.733700 7.168400 4.020570 -3.8497600 2.84411 1.62315
## 5 -1.2113900 5.624170 -4.654230 1.694480 -1.3925100 1.42230 -2.14384
## 6 0.0801718 -0.170995 0.501421 0.400535 -0.0388899 -0.12602 -0.49322
## PC532 PC533 PC534 PC535 PC536 PC537 PC538
## 1 3.405020 -1.023090 6.971360 3.084210 0.583564 -1.490980 -5.154550
## 2 -6.307960 1.694060 -6.513580 -1.381010 -7.136960 -2.431020 -2.209950
## 3 0.736933 -0.884715 -6.036790 -11.649600 7.088760 2.491950 3.449040
## 4 -1.267600 -1.954300 -1.052460 3.190740 1.285960 -0.221289 0.805661
## 5 2.102560 -1.143910 0.266658 -2.888100 2.042110 -0.211161 0.818581
## 6 -0.181220 1.334190 -1.171840 0.729099 -0.210794 0.856780 -0.167919
## PC539 PC540 PC541 PC542 PC543 PC544 PC545
## 1 -2.207470 0.758238 -3.335490 5.4590000 1.221180 1.63972000 4.427280
## 2 -0.911507 -1.389160 3.149200 -3.8821200 -3.431940 -6.08617000 -4.603970
## 3 3.543010 -1.516680 1.850980 -1.5179400 -0.407301 1.66806000 -7.206650
## 4 -2.818540 -1.144380 -2.341230 -7.4721300 -2.596550 2.33664000 -3.276970
## 5 -1.459910 -2.492320 1.155440 0.0787301 3.107540 -0.00789298 0.538279
## 6 0.139411 0.726248 0.900952 2.1527200 -1.238480 0.32846000 0.776762
## PC546 PC547 PC548 PC549 PC550 PC551 PC552
## 1 -0.592321 -3.950480 5.398870 -0.259369 1.346980 -3.0258500 2.398070
## 2 0.868614 -0.776384 -3.824660 2.338640 0.631417 -1.0236800 3.124520
## 3 4.242580 2.912100 -2.691830 -5.502290 1.836520 -1.5376700 -7.697860
## 4 0.600199 4.452940 -1.611770 -0.782176 1.684540 0.0141803 -2.459820
## 5 -0.293807 3.026430 -6.600290 -1.351960 0.712053 0.9871010 -0.721727
## 6 2.176100 -0.281714 -0.211135 1.887900 0.134043 -1.3467200 -1.413170
## PC553 PC554 PC555 PC556 PC557 PC558 PC559
## 1 -0.286456 -0.0663779 3.476500 -1.351740 -1.64359000 -0.778570 -1.306260
## 2 1.589360 -1.7513600 -3.728780 -0.730756 -0.00663453 -0.484378 1.384330
## 3 -7.170830 2.0107000 -9.569970 3.974570 3.56960000 8.585820 1.739970
## 4 -6.107070 1.8487100 2.812830 3.488180 0.66171100 -1.753540 -2.556800
## 5 -0.694254 0.4203220 -6.285140 1.427040 2.70552000 0.324602 -0.267143
## 6 -1.441920 -0.3393800 -0.212959 1.152500 -0.77473400 0.450112 -1.014180
## PC560 PC561 PC562 PC563 PC564 PC565 PC566
## 1 2.825240 0.0995965 1.57620 -3.60296 0.670159 -3.209690 1.550600
## 2 -2.489070 -4.0515200 -3.70281 6.60162 -2.458920 6.003580 -7.124020
## 3 -1.748750 2.9037700 2.62248 -6.01691 1.100880 -4.439560 5.372880
## 4 -4.708210 2.8882200 -1.32227 2.67467 2.624540 0.743727 2.569070
## 5 0.653901 -0.5110950 1.18163 -2.79985 -1.797680 0.198578 -3.013150
## 6 -1.833120 -1.2206000 -1.99551 1.19657 0.685755 0.234363 -0.634971
## PC567 PC568 PC569 PC570 PC571 PC572 PC573
## 1 0.989093 -0.459252 0.975044 2.73825 1.134910 -2.30975 0.2222010
## 2 -0.929648 -3.040190 0.822756 -1.69767 -1.093300 -2.08528 -0.0283054
## 3 1.996950 6.582050 0.692111 -2.92630 1.409960 6.58744 -1.6235900
## 4 0.980353 -3.243020 5.102860 -2.73461 0.253183 3.93572 -0.1543540
## 5 -1.986100 4.835600 -1.979180 -2.03141 0.057754 -4.35120 -0.0373671
## 6 -0.800821 -0.265115 -1.446920 -1.63397 -0.565600 2.08011 -0.6737900
## PC574 PC575 PC576 PC577 PC578 PC579 PC580
## 1 1.188210 0.870558 -2.113010 1.37526 0.000623485 2.240530 -0.459305
## 2 -3.579570 3.242020 0.369332 -3.67347 -2.681540000 -3.973060 0.369044
## 3 0.950265 -0.902675 -3.054820 1.31367 2.586150000 0.692345 1.026450
## 4 2.626630 -0.128662 6.096140 1.26551 0.796442000 -0.817054 -1.224270
## 5 -0.530347 -0.572138 -1.720590 1.32734 -0.445303000 1.693790 0.569703
## 6 -0.331061 -0.566017 0.365110 -0.71432 -0.226785000 0.743208 1.396390
## PC581 PC582 PC583 PC584 PC585 PC586 PC587
## 1 0.900178 -0.975244 1.351650 -0.764878 0.411443 -3.608420 0.816712
## 2 0.230988 -0.649956 -1.625020 3.628170 -3.325320 1.478650 1.625470
## 3 -3.870950 3.023630 2.915330 -1.153350 2.061790 0.483450 -5.288950
## 4 -0.117613 1.489090 2.486810 -0.658991 -0.150000 -1.361030 -2.080040
## 5 2.659190 2.698140 0.107061 0.738584 0.576776 2.061340 -0.933789
## 6 -0.741153 -0.478477 -1.091250 0.432916 0.536083 -0.676711 -0.147207
## PC588 PC589 PC590 PC591 PC592 PC593 PC594
## 1 0.0384876 -2.219890 -0.408084 -1.612930 -1.4117400 -0.2201720 1.179640
## 2 2.5240200 1.151330 -0.354449 -0.381923 -0.5873530 0.6151750 1.015280
## 3 -7.6461100 0.354336 2.357310 5.889990 0.2842930 1.6530400 -0.253811
## 4 -1.0604400 -0.817802 2.861370 4.752060 -1.3637300 -0.4553330 0.113574
## 5 -0.3504120 -1.044770 -0.397141 0.717417 -1.2142500 0.0818215 0.682315
## 6 -0.7484500 0.578017 1.178260 1.426280 -0.0826511 -1.7031100 -0.406109
## PC595 PC596 PC597 PC598 PC599 PC600 PC601
## 1 -0.0430501 -0.664985 -0.157422 0.0947256 2.419170 0.6925310 0.0561172
## 2 -2.9241400 1.393060 1.208010 -0.6697120 -1.009420 0.6709490 -1.2857100
## 3 3.9703000 2.300020 -3.190190 1.5798000 0.306442 -3.5776800 -0.9908110
## 4 -0.7069660 -2.730620 -1.374650 0.4564930 -1.200290 -1.1788900 1.8492300
## 5 1.2877000 -2.035230 -1.534000 -0.2660950 1.271710 -0.0783648 -3.0542800
## 6 2.1880700 1.462530 -1.806420 0.0631933 -0.662056 1.1882700 -0.9711610
## PC602 PC603 PC604 PC605 PC606 PC607 PC608
## 1 -0.422698 1.2217300 1.1282100 -0.0968164 0.883596 -1.851110 0.2777940
## 2 2.119190 -0.2993870 1.1771100 3.2232000 1.750800 -0.367609 -0.0249427
## 3 -4.576820 -0.8257750 1.2293200 -3.1137200 -0.268914 3.724350 -1.6254500
## 4 0.316105 -0.5618170 -0.1933880 -0.6756260 0.588500 1.240880 0.6906870
## 5 -2.893920 -0.8409080 0.0943487 -1.0693300 0.974841 -0.101900 -1.5808500
## 6 0.217015 -0.0314555 1.2089000 -0.0546657 -0.414050 -1.478620 0.2828550
## PC609 PC610 PC611 PC612 PC613 PC614 PC615
## 1 0.9498830 0.482229 0.0376125 0.1797420 1.8884100 -1.172590 0.0356375
## 2 0.1552650 -0.364791 -1.1793100 -0.9907910 -0.4088200 0.901477 0.1978340
## 3 -1.4347400 2.588180 2.9292400 -0.8670590 -0.0932232 1.349220 -1.0828100
## 4 0.0549793 -0.170540 -1.0536100 0.1080440 -2.1936100 3.458960 1.8271800
## 5 -0.8816780 0.193019 0.1477220 0.0441914 1.2076800 0.291053 -0.6286800
## 6 -1.0314900 1.159970 -0.5925440 0.4074700 -0.9531180 -0.382572 -0.0261295
## PC616 PC617 PC618 PC619 PC620 PC621 PC622
## 1 -1.2742200 -1.0166500 1.017700 0.413055 -0.615057 0.933675 0.561719
## 2 0.0792599 -0.2762350 0.377416 -1.354380 -1.201900 -0.260239 -0.613003
## 3 0.9567240 1.8708200 1.603800 2.505120 0.409465 -1.091240 -1.182830
## 4 0.7979200 1.3687000 -2.398750 2.090690 -1.684470 0.237746 1.055300
## 5 0.0248111 0.7346960 -0.984834 -0.284761 0.104881 -0.605211 1.480840
## 6 1.2182000 -0.0674913 -0.194666 -0.839569 -0.573405 0.038825 0.137123
## PC623 PC624 PC625 PC626 PC627 PC628 PC629
## 1 0.1222350 -1.655880 -1.0240800 -0.398773 -0.372284 0.479921 -0.0538361
## 2 0.7523290 1.770930 0.9077600 -0.342363 -1.178260 -1.118970 -0.8612290
## 3 0.4812560 0.754075 -1.5656300 0.225430 0.780495 0.447120 -0.1822280
## 4 1.0049600 -0.483412 1.6803900 -0.427996 0.511488 0.817202 -0.3105760
## 5 0.9491250 0.715926 -0.3017880 1.305980 0.541594 1.282800 0.2009910
## 6 -0.0150364 -1.275430 -0.0555473 -0.887926 0.135793 0.881540 1.3025200
## PC630 PC631 PC632 PC633 PC634 PC635 PC636
## 1 -0.6730140 1.329650 0.654511 -0.4903520 -0.3914520 -0.3719210 -0.9230830
## 2 0.0548119 -1.407510 -0.869820 1.1010700 -1.0294800 0.5386230 0.5480640
## 3 0.5820030 0.347856 0.200176 -0.3495600 1.3099300 -0.0265796 -0.5817040
## 4 -0.1259030 -1.563200 -0.721731 0.1302550 0.6358970 0.2288740 -1.1053800
## 5 0.5597680 0.776038 -0.203955 1.5193500 -0.0816816 -1.5026400 0.0457431
## 6 0.5599610 -0.573395 0.917824 -0.0270427 1.3210200 0.6686640 -2.0031600
## PC637 PC638 PC639 PC640 PC641 PC642 PC643
## 1 -0.545903 -0.2611980 0.663981 0.148466 -1.352060 0.534462 -0.0320718
## 2 1.160670 -0.6517060 -1.015110 -0.414513 0.627800 -0.762995 0.5611680
## 3 -0.377351 1.2528800 1.005600 -0.662310 0.233966 -0.474998 0.5785380
## 4 -0.285090 -0.0676641 -0.187437 -2.525190 0.409769 -1.207560 -0.3914100
## 5 1.077120 -0.3668680 -0.679117 0.429045 0.409687 0.160206 0.1937180
## 6 -0.929517 -1.1370400 -1.731310 0.106299 -2.042490 0.202262 -1.0385800
## PC644 PC645 PC646 PC647 PC648 PC649 PC650
## 1 0.695318 -0.5732730 1.419190 -0.376348 0.7357110 -0.8545110 -0.3704340
## 2 0.425374 -0.7022600 -0.923579 0.105626 0.2854380 -0.2521310 0.9802470
## 3 -0.291940 0.6470010 -0.476133 -0.762751 -1.1635700 -0.0828889 -0.0932712
## 4 1.453290 0.0768286 -1.272050 -0.611429 0.4130000 0.2926600 -0.2423210
## 5 0.215671 -0.0764362 0.256408 0.724395 0.0163036 -1.2054800 0.2619800
## 6 -0.760264 -2.0082000 2.892230 2.303850 -2.2543100 -0.8165350 -0.6895360
## PC651 PC652 PC653 PC654 PC655 PC656 PC657
## 1 -0.29158400 -0.956123 -0.972026 -0.877605 -1.1494000 0.829165 -0.0530470
## 2 -0.00710967 1.328450 -1.379670 0.160955 0.2285950 -0.466127 0.0432664
## 3 0.50577700 -0.142748 0.781087 0.590896 1.3880000 -0.124747 0.3939730
## 4 0.89601900 0.405259 0.029616 0.808107 0.0401254 -0.356069 -0.0146783
## 5 -0.26726200 -0.468042 0.183892 1.217820 0.2050710 -0.709634 -0.9518690
## 6 1.56279000 -1.084490 -1.147930 -3.845710 3.3993000 3.065770 1.6460700
## PC658 PC659 PC660 PC661 PC662 PC663 PC664
## 1 0.00773703 -0.5788180 0.4602070 0.0772628 -0.302533 -0.457385 -0.0492801
## 2 0.58495000 0.6189400 -0.1186510 0.5637610 -0.520939 0.256424 -1.5277700
## 3 0.23331200 -0.3534350 0.6300210 0.1764910 0.155248 -0.325883 0.2595370
## 4 -0.17203300 -0.0365733 -0.1475710 -0.2947840 0.147316 -0.172945 0.7324470
## 5 -0.61314100 0.0015057 -0.1592740 0.3222340 0.147858 0.313532 0.1500250
## 6 1.63719000 1.7095000 0.0199371 -3.7314300 8.424690 -6.823410 0.6166360
## PC665 PC666 PC667 PC668 PC669 PC670 PC671
## 1 -0.0482858 -1.071640 0.307679 0.517415 -0.247526 -0.2724220 0.0570314
## 2 0.4070540 0.978048 0.505680 0.441660 1.310400 0.2838230 0.3236680
## 3 0.1722620 0.860721 0.116949 0.344689 0.648392 0.0017265 0.1882100
## 4 0.0770676 -0.702335 -0.113037 -0.185621 -0.649254 0.1925390 0.0629604
## 5 -0.1966550 -0.104446 0.168875 0.373712 0.356984 0.2672210 -0.2675250
## 6 2.2520500 -9.838060 -10.543700 -2.177630 -18.570900 -4.9561400 -10.5035000
## PC672 PC673 PC674 PC675 PC676 PC677 PC678
## 1 0.3658390 -0.2220490 -0.4542430 -0.1059320 0.0668257 -0.1227860 -0.07169060
## 2 -0.3572560 0.3813790 -0.0584039 0.3682240 -0.0438910 -0.2437420 -0.62563900
## 3 0.0525116 1.1491300 -0.0821967 0.0657877 -0.3136410 1.0217800 -0.12046900
## 4 0.4529030 0.1608160 0.0197362 -0.6737840 -0.4331220 0.0589937 0.28747700
## 5 -0.3019860 0.1766060 0.2178910 0.6849810 0.3028740 -0.8538600 -0.00102422
## 6 1.3322600 0.0338846 -2.9810000 -1.6761600 1.4657700 -5.6600100 1.48465000
## PC679 PC680 PC681 PC682 PC683 PC684 PC685
## 1 -0.1012020 0.0386973 0.2632050 0.1172300 0.3892970 0.0890863 0.3201930
## 2 -0.1120630 -0.0705258 -0.0886319 -0.1414360 0.0814943 -0.0653639 0.1095060
## 3 0.0830344 0.2721590 -0.3359110 -0.3258950 -0.0744264 -0.1506270 -0.1918520
## 4 -0.1225620 0.1521610 -0.1445910 0.0501605 -0.2175600 -0.0702993 -0.0326227
## 5 0.3900610 -0.3917050 -0.4784030 0.1387840 -0.2413210 -0.1031130 0.1312310
## 6 -0.3688850 0.3400610 0.4379240 0.9850670 -0.5071560 -0.6654800 -0.7090970
## PC686 PC687 PC688 Individual region Pop_City
## 1 0.0385472 0.1949580 -6.61228e-07 801 Southern Europe Durres
## 2 -0.2263910 -0.0350247 -6.61228e-07 802 Southern Europe Durres
## 3 0.3104160 -0.3268830 -6.61228e-07 803 Southern Europe Durres
## 4 -0.1771030 0.0535150 -6.61228e-07 804 Southern Europe Durres
## 5 0.0731578 0.0622075 -6.61228e-07 805 Southern Europe Durres
## 6 0.6414640 0.0648584 -6.61228e-07 806 Southern Europe Durres
## Country Latitude Longitude Continent Year Region Subregion order
## 1 Albania 41.29704 19.50373 Europe 2018 Southern Europe East Europe 25
## 2 Albania 41.29704 19.50373 Europe 2018 Southern Europe East Europe 25
## 3 Albania 41.29704 19.50373 Europe 2018 Southern Europe East Europe 25
## 4 Albania 41.29704 19.50373 Europe 2018 Southern Europe East Europe 25
## 5 Albania 41.29704 19.50373 Europe 2018 Southern Europe East Europe 25
## 6 Albania 41.29704 19.50373 Europe 2018 Southern Europe East Europe 25
PCs 1 & 2
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "PCA_lea_euro_global_pc1_pc2_r01.pdf"
),
width = 8,
height = 8,
units = "in"
)
PC1 and PC3
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 73 Levels: ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS ... YUN
## [1] 73
Check the regions
## [1] "North America" "South America" "Western Europe" "Southern Europe"
## [5] "Eastern Europe" "East Asia" "South Asia" "Southeast Asia"
## [9] "West Africa" "East Africa" "Indian Ocean" "Central Africa"
## [13] "North Africa" "Caribbean"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 ALD 11.3046 3.85130 19.7833 1.687230 -31.9742 -7.03582 2.87450
## 2 ALD 11.3286 4.09234 21.6678 2.240930 -35.6509 -6.67780 1.30074
## 3 ALD 10.9877 2.96010 27.1215 1.722600 -27.0213 -8.93229 4.56509
## 4 ALD 12.5399 3.70106 23.5571 1.923170 -32.8265 -4.32701 -0.54051
## 5 ALD 10.6210 6.13863 18.5964 0.106578 -28.5055 -4.24365 -2.14243
## 6 ALD 10.3700 3.24124 23.0999 0.624742 -34.5679 -8.77115 -1.64540
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## 1 8.76051 -2.390600 -1.1133700 1.409550 -2.432570 2.21439 0.0831294
## 2 10.53680 0.572231 -0.0447769 5.148600 1.885100 6.72702 0.3359080
## 3 12.34000 2.083830 -2.5534900 5.418010 -3.295510 6.81957 -2.2182600
## 4 8.67068 3.930570 -1.8172500 -0.958173 3.130710 1.46826 3.9380100
## 5 6.97025 1.488870 -2.2270600 2.202370 -1.782360 -3.09520 -2.3885500
## 6 8.84329 -1.938790 -1.5921900 6.365010 0.288914 5.53801 -2.6206200
## PC15 PC16 PC17 PC18 PC19 PC20 PC21 PC22
## 1 1.591980 -3.89981 0.846729 -1.512010 1.275050 5.20659 0.355472 6.59573
## 2 2.506320 -1.82679 4.219520 -2.663210 0.722739 6.76211 4.321160 5.56285
## 3 -0.176634 -3.29138 3.994920 -3.303390 -3.444090 4.72517 2.293660 1.81743
## 4 1.702520 1.34122 -0.943790 0.525896 -0.857543 -2.31975 -2.554890 -1.97683
## 5 -0.245045 -4.12400 2.278240 -4.605470 4.697470 1.25214 3.550810 7.89109
## 6 3.673660 -1.73236 1.017960 -2.620330 -0.748756 4.38833 -0.176724 5.28283
## PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30
## 1 3.2310000 -3.782110 0.343321 3.452750 -0.397645 8.59238 -6.38829 2.67176
## 2 0.1545930 -0.973830 0.824802 5.002070 -1.803110 7.84339 -2.56195 2.51685
## 3 -0.3103470 0.104312 0.789867 5.413510 -3.317490 11.40880 -3.64877 3.30967
## 4 0.0302859 -0.871619 2.124920 5.503500 0.533652 -1.33150 -3.21115 2.07990
## 5 5.7531100 2.564260 -1.794590 -0.455631 -0.109703 12.61800 7.14574 -8.21520
## 6 2.0990500 -0.298802 -0.988705 7.396710 0.401363 5.97424 -9.19427 -3.69260
## PC31 PC32 PC33 PC34 PC35 PC36 PC37 PC38
## 1 6.86863 -1.089500 -4.44339 6.67840 -3.42491 -4.82034 8.93957 -4.88268
## 2 7.74545 2.853570 -3.56519 4.42324 -1.18201 -3.85741 4.39299 -6.69470
## 3 8.60304 6.357500 -7.46373 4.15771 -4.83301 -2.46457 7.54084 -8.38108
## 4 -4.69375 -0.207922 2.10898 2.65389 1.70503 5.73929 -4.27603 -3.66764
## 5 -6.60411 4.734720 -12.04430 4.07524 -3.22002 -2.26138 4.88504 -6.55756
## 6 4.72237 -0.926283 -3.27779 5.47676 -1.57894 -8.10418 7.07028 7.59312
## PC39 PC40 PC41 PC42 PC43 PC44 PC45 PC46
## 1 -6.78503 -4.42121 3.59483 -2.665010 -3.630720 15.46030 -3.96144 -6.4089000
## 2 -7.26412 -5.50867 7.42830 -2.154760 -0.773832 7.63320 -1.18746 -6.5392000
## 3 -5.62695 -5.01390 8.26299 1.203870 0.940659 1.56725 1.96505 0.5932230
## 4 -8.27641 -2.11220 7.04648 -2.259360 3.718930 1.64267 -7.67578 -0.5220810
## 5 -2.90173 0.32610 -3.06623 -0.994856 -8.462040 9.02437 1.05418 0.0444795
## 6 2.13255 8.45184 -12.27810 3.350630 0.377129 13.13020 -2.84936 -3.6098600
## PC47 PC48 PC49 PC50 PC51 PC52 PC53
## 1 -3.71097 -1.082220 -2.766790 7.672670 12.30180 -3.1054600 1.924130
## 2 -2.01765 1.189410 -5.822020 4.647830 4.41900 0.0153929 -0.433726
## 3 3.39743 0.471181 -5.686080 0.208139 1.09238 0.1296940 7.671860
## 4 7.34806 0.639311 -7.172720 -1.552730 1.99029 -1.8731700 -3.060680
## 5 -5.90926 4.810760 1.229390 1.441610 6.24433 -0.0726317 0.420500
## 6 -4.74439 8.944300 -0.487939 -3.048420 2.38544 0.9313740 -7.630770
## PC54 PC55 PC56 PC57 PC58 PC59 PC60
## 1 -0.683836 3.345480 17.363200 -1.485730 -0.0259309 1.56506 1.23083
## 2 -4.033930 2.998030 13.485500 -4.554240 2.5985500 3.97646 -2.31647
## 3 -3.867050 -0.788444 11.591500 -0.836219 2.9079800 4.70301 1.73630
## 4 4.549290 1.211220 -0.138582 -1.870110 -0.9434830 -12.62790 -5.61055
## 5 0.181434 4.545250 3.638140 -0.995323 -2.4282200 -3.61029 8.37504
## 6 -5.363230 -2.720780 -2.805080 5.570260 -0.5126940 8.06162 3.68012
## PC61 PC62 PC63 PC64 PC65 PC66 PC67
## 1 12.36460 8.14968 4.424100 0.675101 -4.946100 0.666866 -0.539953
## 2 7.21885 5.87628 2.239920 -5.329600 -4.809330 0.698679 -3.867710
## 3 10.40760 5.37401 5.918490 -2.324100 -5.898300 -4.353950 -3.038200
## 4 3.67099 2.43420 0.184676 2.806220 -6.231890 -3.045850 1.087490
## 5 4.05643 1.76671 6.028750 1.079830 -0.638482 -0.808931 -8.573490
## 6 -12.93130 -12.34680 -1.453650 -9.402500 25.534300 2.088730 3.960290
## PC68 PC69 PC70 PC71 PC72 PC73 PC74
## 1 3.626120 3.15212 3.178980 -0.322655 8.34466 -0.0736454 3.20536
## 2 6.192960 -1.63845 5.300660 -6.972940 10.38190 -3.4281300 2.21707
## 3 0.790238 -1.59328 10.640200 -4.497410 7.29813 -3.0353400 5.61416
## 4 -0.308131 -3.43526 -4.851840 -4.036010 8.68017 -4.5135500 -4.00738
## 5 4.745270 6.12391 0.487623 -5.248270 2.22806 4.2850400 -1.88139
## 6 5.207100 4.08905 -9.177160 14.790700 -10.49410 -14.7545000 -1.28016
## PC75 PC76 PC77 PC78 PC79 PC80 PC81
## 1 1.560740 -1.1318000 3.079280 -2.294070 0.330213 -0.849971 0.23262
## 2 2.419960 -3.5366100 -2.952500 -0.300429 1.180380 -3.288850 2.46454
## 3 4.474900 -3.2787500 2.369970 -4.133800 6.488080 -0.821653 -1.46916
## 4 -2.897600 0.0746116 -6.816260 -1.939580 4.969250 -7.470980 1.76947
## 5 -0.452728 -3.6357600 -0.414089 -4.228540 2.659440 -3.243120 -6.26291
## 6 3.161660 7.8113200 21.747800 2.844890 -10.529600 -1.050340 -10.94520
## PC82 PC83 PC84 PC85 PC86 PC87 PC88
## 1 3.676170 -2.114840 3.300130 9.77398 -0.501383 4.24391 2.821420
## 2 -2.625270 5.738870 7.397860 7.17553 -3.859310 4.76020 -4.966830
## 3 1.386140 4.294830 10.229400 1.45990 -6.178910 -2.86423 -3.815870
## 4 6.702490 2.631350 -0.272547 4.98237 -2.535640 2.31096 -3.023310
## 5 0.504438 0.151063 -0.428986 6.41210 4.387500 4.84532 -0.855451
## 6 -10.615300 -3.035320 -1.516010 -3.19946 -3.781570 -2.63837 -1.979470
## PC89 PC90 PC91 PC92 PC93 PC94 PC95
## 1 0.881157 -1.591860 5.38173 2.853400 2.381060 -1.304460 -5.221110
## 2 -4.828930 -4.081890 -1.09502 -0.126301 3.998150 5.205870 1.319620
## 3 -6.985200 -5.026990 -4.65174 -5.034190 0.994824 12.556600 4.545030
## 4 -5.524060 0.811997 1.02417 -6.209200 -0.934143 3.319320 -1.919300
## 5 -1.640050 1.103730 3.49897 -2.529840 2.613670 3.429880 -5.707880
## 6 -9.966890 -12.861300 5.42910 0.589571 2.157570 0.951197 -0.826276
## PC96 PC97 PC98 PC99 PC100 PC101 PC102
## 1 1.82553 2.370550 1.8149700 -0.9901620 -2.541920 -0.679209 -3.58714
## 2 -2.39237 1.638400 4.4738900 2.6497300 1.116190 -1.692700 1.95229
## 3 -5.22245 10.461600 -0.8455530 0.0263679 0.505576 -7.371870 1.94227
## 4 3.89914 4.791710 1.3550500 1.6973700 -1.274580 5.993940 -5.88560
## 5 -2.82533 0.932514 5.2839700 7.5794400 3.857620 1.928860 1.70852
## 6 -4.32066 -0.115444 0.0519455 -3.6473500 0.416684 2.038670 3.52975
## PC103 PC104 PC105 PC106 PC107 PC108 PC109 PC110
## 1 -1.242230 1.37588 -0.282432 1.009850 4.5056900 3.105340 -3.378790 3.24386
## 2 -0.388432 -4.76404 0.346802 -2.296200 -5.0945900 0.967458 4.496360 -1.54846
## 3 5.974930 4.55647 -1.570030 -0.563826 -0.1233150 2.944420 -1.330780 -6.44084
## 4 0.486571 -5.57529 1.298180 -5.440210 0.4837610 3.749750 0.621117 -6.45818
## 5 5.042380 -3.03734 12.192400 3.364350 0.0474162 1.490450 -3.737110 6.32788
## 6 -4.821870 -4.92392 -1.975290 2.803510 6.6819200 4.907960 3.215450 -5.80926
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## 1 1.831410 -3.56423 -0.829310 -6.09407 -1.41875 3.763020 -1.236830
## 2 -0.123996 2.05858 2.182990 -5.22162 -2.11596 -0.160551 1.039900
## 3 -2.496910 -2.32711 5.238510 -2.03523 -2.69885 7.127050 0.156205
## 4 1.704910 10.35100 -0.742679 7.69977 -1.23606 -3.155350 11.828700
## 5 5.810880 7.21787 3.713050 -4.69280 3.66361 -2.726250 -5.317630
## 6 8.105620 -10.90690 -6.465960 -1.26854 -1.50106 -5.159560 8.907690
## PC118 PC119 PC120 PC121 PC122 PC123 PC124
## 1 -6.16092000 1.699850 0.186924 -4.391350 3.11780 -0.422223 4.95131
## 2 -2.08942000 -1.546120 3.111010 -0.949337 -1.59902 1.958100 1.83929
## 3 -0.34188400 0.749496 2.910160 1.304050 4.03203 -1.654230 7.92005
## 4 -2.17334000 -4.921240 1.410030 -7.839490 4.07305 -2.265450 -8.38001
## 5 0.00684361 -3.118290 5.235230 6.032060 7.83041 -1.207320 -0.43200
## 6 4.07670000 5.702270 -3.817560 6.174360 -4.08069 -2.418970 -0.97293
## PC125 PC126 PC127 PC128 PC129 PC130 PC131
## 1 0.185832 0.261899 -6.282550 5.91919 6.23795 7.0226900 2.535890
## 2 -0.580664 5.848520 -1.924010 8.06666 -2.68879 -1.5571300 0.298089
## 3 7.351420 6.280290 -0.995556 7.52393 -5.38622 -0.0920857 -5.297550
## 4 -5.969310 -6.378260 -6.014520 -4.39762 3.20391 7.1223200 4.967480
## 5 -10.860400 -6.738010 0.320248 3.82667 -1.24171 -3.7832800 5.202910
## 6 -1.569440 5.852960 -0.186841 3.44772 -1.20364 -0.4914640 -0.200503
## PC132 PC133 PC134 PC135 PC136 PC137 PC138
## 1 -7.091640 -4.09787 -0.9431710 3.730210 -0.179847 -2.858830 1.118670
## 2 -0.353264 -6.52274 0.0324326 3.545020 0.543437 -0.982884 -3.196480
## 3 1.967810 -6.04081 -2.9640000 5.207930 3.043190 -2.378460 -0.665147
## 4 -2.844440 1.84523 2.1313300 1.505590 6.049240 -3.552260 -10.050800
## 5 -3.937810 9.15740 -4.9289600 -0.265273 0.282665 -2.600760 -0.921349
## 6 -6.911200 1.92793 1.3839200 -0.159293 -4.199880 -3.750490 -2.533260
## PC139 PC140 PC141 PC142 PC143 PC144 PC145 PC146
## 1 4.263590 6.099980 3.22012 -8.334070 1.299490 7.02878 0.628793 1.854180
## 2 -0.703574 7.821730 -2.22026 -1.010210 1.234940 -3.78188 2.323930 -3.586940
## 3 -2.674500 9.179140 -1.15101 -2.484550 -7.111730 -1.52159 6.152850 -8.405590
## 4 -5.241910 3.592440 5.13412 0.565826 -1.087230 -4.42264 1.112270 -0.866679
## 5 -4.036760 0.360688 -2.64851 -1.479770 3.857360 6.19061 -1.356600 2.374540
## 6 5.005410 -0.217569 3.18881 -5.517830 0.507129 -2.31484 5.017880 1.484370
## PC147 PC148 PC149 PC150 PC151 PC152 PC153 PC154
## 1 -3.169590 3.90917 5.48849 5.5814600 2.969340 -1.10906 0.375404 -1.01594
## 2 0.941444 -1.69402 -1.08083 -0.8090390 5.699030 1.34852 -4.540270 -2.32031
## 3 -3.784180 -5.59783 -1.90505 -1.8694100 2.849310 5.91475 -3.382540 -2.34757
## 4 -3.553470 -3.55506 -2.91962 -0.0703231 0.377022 1.92900 1.305020 -7.54519
## 5 12.438800 6.40531 -1.03165 3.5773900 4.051250 0.97918 0.427298 6.82716
## 6 -7.824840 -2.85133 2.15418 5.0165100 -2.573520 -3.33185 1.175740 5.80516
## PC155 PC156 PC157 PC158 PC159 PC160 PC161 PC162
## 1 -3.194590 6.344740 -4.716410 5.56698 -0.229742 -1.12558 -2.78257 -1.1763900
## 2 -2.299270 4.739130 -3.311780 -3.87053 -5.494820 -1.99021 2.77757 -6.3429000
## 3 0.131539 -0.691950 2.945900 -9.90679 0.222635 -4.41852 4.96841 -0.5344060
## 4 6.693760 -0.412623 -1.045450 2.78292 0.401828 -1.29800 -2.12093 0.0229272
## 5 0.645226 6.898780 -0.239179 -1.87662 -7.712930 -8.70063 -7.45579 3.9954700
## 6 -8.188180 -0.502378 -0.606133 8.85862 -6.430770 3.95163 3.67135 4.9203100
## PC163 PC164 PC165 PC166 PC167 PC168 PC169
## 1 2.407920 0.504277 -6.207460 -1.821530 -1.46783000 -2.2010600 0.284804
## 2 0.160678 9.304710 -2.631630 -5.545960 -0.14424700 -5.9512900 4.902430
## 3 1.266740 0.489838 -7.443820 0.357423 -0.00106332 -4.8939300 0.550162
## 4 1.272300 -9.188160 5.111440 -3.451410 -3.36530000 -1.3892100 -3.437740
## 5 -5.355580 3.933350 0.553361 1.089580 4.15026000 -0.0174637 0.393706
## 6 2.439100 1.310280 3.313470 5.025000 3.10215000 1.9033900 1.265480
## PC170 PC171 PC172 PC173 PC174 PC175 PC176 PC177
## 1 -2.06408 0.249062 3.206130 2.51127 -1.57129 2.806100 2.328480 0.940548
## 2 -2.31194 -3.141630 0.382582 2.01567 5.37382 0.446184 0.977171 1.229680
## 3 -1.36361 -0.628543 -3.132420 5.85005 -1.13194 -4.795800 -0.509625 -8.482140
## 4 9.70610 0.776337 10.806700 -3.03668 3.68210 2.316360 -1.512890 1.518280
## 5 2.00749 10.847100 4.886560 -2.65903 -5.96778 -5.692680 4.891790 -5.440430
## 6 3.31191 -1.190150 3.899690 -2.94469 -2.81133 -2.711750 -2.510280 3.611460
## PC178 PC179 PC180 PC181 PC182 PC183 PC184
## 1 -0.00200687 -4.65122 2.3894400 -7.303210 -2.759210 2.754250 5.157660
## 2 -0.74188400 4.72168 -1.3900200 -8.020900 -0.766446 0.408351 3.968530
## 3 1.17209000 4.64376 -4.5449300 -1.867810 -5.384910 7.567070 -7.286060
## 4 8.23867000 -7.06576 2.5939400 -2.129160 1.646700 -3.702110 -0.560026
## 5 -3.79358000 6.30434 -3.0662400 4.405560 -3.986570 -2.528300 3.393280
## 6 -0.15949300 0.36457 -0.0979605 -0.803125 -3.550490 1.204050 5.197190
## PC185 PC186 PC187 PC188 PC189 PC190 PC191
## 1 -5.122260 -2.875810 1.66973 -2.690040 -2.36216 -7.727470 3.27333
## 2 -5.199090 -1.588930 4.85938 5.565460 2.97993 -1.850640 -1.77663
## 3 3.780100 0.199628 1.42551 0.608429 -1.16493 1.421940 -0.28420
## 4 -11.393300 -8.025690 -3.60762 6.154500 -3.88123 -0.491429 4.93060
## 5 -0.683104 0.265958 -7.64458 0.818755 4.07498 -6.294480 -7.87804
## 6 3.152440 0.569911 -1.26830 -0.904214 -0.70014 -1.357030 -2.88737
## PC192 PC193 PC194 PC195 PC196 PC197 PC198
## 1 -10.970500 3.395710 2.02413 -1.57175 -3.547260 1.39563 -3.6125500
## 2 -1.240410 2.429520 -3.98737 -2.97395 1.090970 -1.02960 -0.0936227
## 3 2.198030 1.188300 1.26478 4.66170 0.524950 -2.12740 6.8943600
## 4 -0.355432 -0.614295 3.02340 -1.12641 -7.986670 -2.69594 -2.7320400
## 5 2.761850 3.336300 1.12680 -3.44725 0.689305 -1.93704 1.8109300
## 6 1.778270 0.410245 -0.27013 1.71417 -1.849740 -4.71803 0.0304301
## PC199 PC200 PC201 PC202 PC203 PC204 PC205
## 1 4.3506500 -0.1842840 -2.35063 2.435380 0.592889 -1.75159 -3.861130
## 2 -3.7102100 1.1534600 2.31505 3.295890 4.785610 -6.55696 -4.182160
## 3 -1.8342000 -0.0591787 1.43649 -1.125470 -1.176830 -5.18465 0.556054
## 4 5.9963000 4.2321100 1.46582 0.689867 -4.254750 6.18998 4.144820
## 5 -6.4662400 3.6972300 -3.45097 -5.285000 -5.222300 -5.51265 2.109890
## 6 0.0379097 1.2963200 -5.46168 -2.748510 7.776450 4.47502 4.427750
## PC206 PC207 PC208 PC209 PC210 PC211 PC212 PC213
## 1 1.29632 6.80115 3.724990 -5.679960 7.245660 1.430160 -3.92452 2.438540
## 2 -3.83494 1.33549 -3.521190 -1.570180 6.505040 -0.768251 -1.45241 2.828640
## 3 -5.16227 1.28890 1.088420 -2.014510 -1.789410 6.007320 -1.91646 -0.565721
## 4 0.99046 -1.30097 -2.315050 -5.949760 6.741770 -2.065300 2.48424 -1.596290
## 5 -1.61388 2.94504 -0.124489 -0.257321 11.537800 0.399082 -6.19094 -6.520800
## 6 1.93542 -4.99764 4.708590 -3.235450 0.620947 0.889299 -2.53503 0.928740
## PC214 PC215 PC216 PC217 PC218 PC219 PC220 PC221
## 1 4.028970 -4.886490 2.92810 5.481230 -3.43628 -8.39159 0.176600 -0.198451
## 2 -0.305079 -1.914920 -0.79662 -1.815350 -1.09437 -2.52244 -3.347810 -1.306930
## 3 -2.319340 1.040040 -1.62346 -3.834280 0.84389 1.46661 1.267470 1.828480
## 4 -0.255899 0.965714 7.46661 3.159170 5.80024 4.63414 -6.449860 1.305460
## 5 0.503550 7.551410 -2.29599 5.001530 4.75892 8.14829 4.804990 -2.737450
## 6 -3.560970 -0.402145 -2.13385 0.128978 -2.61625 4.07387 0.670273 0.929313
## PC222 PC223 PC224 PC225 PC226 PC227 PC228
## 1 3.137110 -5.921780 -3.046360 -1.67911 1.24058 -7.4721300 -0.48883500
## 2 5.360180 4.655510 -0.385307 -1.37559 -1.85719 -5.6091900 -0.26706700
## 3 -2.584560 -0.296809 5.357860 4.38550 -2.74253 -1.7924800 0.00585376
## 4 -0.462546 5.945220 1.681390 2.55162 -2.71198 -0.0619051 0.59627000
## 5 5.466080 -3.546580 8.015470 5.28112 1.35408 -0.1151420 -0.39073600
## 6 -3.499260 1.654170 -0.164668 4.23481 -1.47545 3.8106900 -0.67177800
## PC229 PC230 PC231 PC232 PC233 PC234 PC235 PC236
## 1 -0.484416 -6.591900 7.10378 1.20679 -0.953984 1.795180 -3.067130 -0.185464
## 2 5.944490 0.946733 -3.46878 -1.51164 0.568447 2.588420 -0.633609 6.386770
## 3 0.472092 2.519760 1.65108 -1.18434 -1.810870 2.063490 -0.997746 -1.331900
## 4 5.414530 -4.616250 -5.22295 3.60925 -1.318590 -5.395360 0.213695 -0.929498
## 5 1.747560 -3.011750 -3.00410 0.82996 0.798574 0.862193 2.458870 -4.114840
## 6 -1.438900 4.061510 -1.97774 6.04377 2.038440 -1.225040 -0.970757 2.346040
## PC237 PC238 PC239 PC240 PC241 PC242 PC243
## 1 -2.60627 1.724900 -3.233350 -1.8882900 -2.71458 -1.530650 -1.79883
## 2 -1.67193 -5.356220 -3.979290 -4.1170300 -2.96122 5.222450 4.06640
## 3 2.33019 -0.447266 -3.185630 0.0358097 8.08531 0.866774 -1.22115
## 4 -11.29090 6.735320 5.308680 -0.9669070 -3.06053 1.807940 4.24437
## 5 1.17821 3.636750 0.234708 -6.4126800 -5.04591 -1.722030 6.87473
## 6 -1.91458 -5.204140 2.415640 -1.8898000 -2.21064 -2.173030 -1.47902
## PC244 PC245 PC246 PC247 PC248 PC249 PC250 PC251
## 1 -0.141661 -3.046470 -5.59169 2.374930 3.7825000 7.28198 -1.39082 -2.122820
## 2 -2.937560 -3.264400 1.04026 1.934550 -4.5539700 -3.65782 -2.21004 1.590630
## 3 -0.158989 0.424500 2.31887 -0.986673 -0.6495400 -4.43763 1.94750 -0.639691
## 4 -0.387815 -4.758520 -5.30704 -0.800842 -3.1182100 -1.80576 -4.37313 -1.730690
## 5 -0.443691 2.042780 -4.16313 -2.437480 -1.8216800 1.89389 -3.22134 0.903353
## 6 2.181070 -0.869642 1.58744 3.757840 0.0782487 -2.07095 -5.05006 1.444780
## PC252 PC253 PC254 PC255 PC256 PC257 PC258
## 1 -3.1109100 -0.0989007 -3.984490 -0.380065 0.773690 5.081620 3.796560
## 2 -0.0900249 -3.1492800 0.945347 -0.316746 2.787110 4.558370 -0.148475
## 3 1.8735200 5.0584200 -0.611032 -1.920500 3.709800 -0.484981 -5.442160
## 4 -4.2312100 -7.2820700 0.406724 -0.858350 4.568340 -1.386100 2.992070
## 5 8.4939100 4.2626500 -0.487946 -0.449387 -1.182860 -0.322571 6.044300
## 6 -2.1517800 0.5775150 -0.378945 2.891990 0.450424 0.249205 0.817690
## PC259 PC260 PC261 PC262 PC263 PC264 PC265 PC266
## 1 4.554870 -0.180526 3.435920 0.225784 1.60653 3.76099 2.871990 -1.302680
## 2 1.288960 -3.907130 -1.382680 2.555630 -6.02580 1.71128 4.870660 7.403940
## 3 4.021820 -2.085190 4.568090 -1.944590 -2.48828 -3.21620 1.575480 -0.828938
## 4 -0.230136 3.909410 -3.996590 1.347380 3.39100 -2.32004 3.366520 -3.100750
## 5 -2.199270 -6.744820 -6.012390 -0.999186 3.85779 3.65402 -5.079180 0.360558
## 6 0.529356 -3.363430 -0.787424 0.286810 -1.05157 -1.30635 -0.712065 1.261690
## PC267 PC268 PC269 PC270 PC271 PC272 PC273
## 1 -0.704708 4.322350 -1.1258300 -1.179440 0.675343 3.01902 1.970360
## 2 5.160620 3.635520 -2.4468800 -0.493435 3.050280 -1.56081 -8.733250
## 3 -2.105030 -0.509731 -0.0109466 -1.482940 -2.779740 -6.08347 1.363990
## 4 4.917970 4.606890 3.2853500 -1.722580 -1.324390 1.72342 5.424210
## 5 -2.559020 1.384370 -2.3572400 1.784000 3.650560 -2.38102 -2.517820
## 6 2.473910 0.508406 2.7605000 -3.296760 0.279406 1.45649 0.701155
## PC274 PC275 PC276 PC277 PC278 PC279 PC280
## 1 1.076720 -6.456570 -4.3610100 2.232230 3.7653100 2.5232200 -2.577250
## 2 -2.348860 1.316240 -3.4896600 1.010140 3.5050800 1.3732900 1.907470
## 3 1.627800 -2.196950 5.0531400 3.953960 0.1296010 0.7704000 -1.831710
## 4 0.922302 2.941030 0.0113377 -2.077920 0.0954494 4.3804500 6.568490
## 5 -1.010390 0.577939 1.6901900 -1.417940 -0.2320480 2.0787400 3.394350
## 6 1.942790 -0.784017 1.2404900 0.799635 -1.3125100 -0.0911067 0.593806
## PC281 PC282 PC283 PC284 PC285 PC286 PC287 PC288
## 1 -1.167470 -7.32710 -1.757210 6.263120 6.68303 -2.738220 -1.282220 9.083170
## 2 6.050040 5.06460 1.867210 2.773310 3.04143 1.122050 2.125570 0.786617
## 3 -3.118240 2.85255 -1.304560 -1.839270 -3.27678 2.887690 4.543650 -4.966050
## 4 -0.755247 -2.41956 -0.177232 -0.785872 2.57769 -4.062370 -2.953830 0.331381
## 5 -0.167794 -5.00791 -0.104553 -1.051420 1.41791 -0.746397 -1.887730 0.908154
## 6 -0.419569 -2.21488 2.496790 -1.429140 -1.06806 -1.826130 0.880564 2.172490
## PC289 PC290 PC291 PC292 PC293 PC294 PC295
## 1 -2.24602 3.25972 3.101570 0.906306 3.789420 -0.418502 -0.835618
## 2 -1.97198 3.50750 0.107420 -1.740950 -1.272470 -2.388470 0.110258
## 3 3.27843 -6.82619 1.227160 -0.963813 -0.240709 0.520258 5.565670
## 4 4.31911 -2.41201 -0.813299 -2.981570 4.501630 2.012100 -0.720453
## 5 -2.20043 7.65197 -0.123663 0.949846 5.226230 -1.838180 5.166070
## 6 -1.55086 1.81220 2.017660 -0.907038 1.868770 -2.701710 3.248320
## PC296 PC297 PC298 PC299 PC300 PC301 PC302
## 1 4.8994000 -0.354399 -2.316600 1.198970 7.127490 -6.4063800 -9.698180
## 2 4.2534900 -1.765790 4.649910 -0.824233 -6.574470 -5.6218900 3.017050
## 3 1.0221600 1.172740 -0.620871 1.926090 1.182940 1.2398100 -1.562080
## 4 3.5535700 2.434290 2.813040 -0.250536 -2.751430 0.4080740 4.008350
## 5 -3.9412800 7.063680 1.152600 1.067250 0.957400 6.0361900 -5.040300
## 6 0.0479087 -0.334840 1.281160 -1.611780 0.510062 0.0803322 0.585129
## PC303 PC304 PC305 PC306 PC307 PC308 PC309
## 1 -1.244670 -0.0359111 3.494620 5.136460 -1.0652500 -5.897530 -2.196240
## 2 0.880336 -0.2842230 -5.833970 3.408550 2.8466200 -0.746355 3.100230
## 3 -3.920340 -2.6214400 5.745940 0.144089 -0.0390333 2.352300 0.797683
## 4 -3.766600 2.7632300 -2.127930 -1.375800 6.4100900 -4.232740 5.516660
## 5 2.002890 6.0349300 -2.486370 2.179430 9.5686900 -1.446080 -2.554080
## 6 2.285350 -0.0504606 -0.860408 1.275890 -0.3145560 0.263111 3.049160
## PC310 PC311 PC312 PC313 PC314 PC315 PC316
## 1 -4.529300 -0.596955 -6.022760 -1.98718 -3.854230 -4.458500 4.820600
## 2 -1.279880 -2.394470 -2.733360 2.29759 6.911940 2.783710 3.275130
## 3 1.875160 -0.843038 -3.966130 -3.25928 -0.861628 -1.571350 4.746350
## 4 -1.148870 -1.579430 2.904310 2.57539 -1.109820 -0.791526 -0.364149
## 5 -5.018320 1.121770 7.693330 -1.24061 0.587698 -0.272952 -1.099740
## 6 0.552704 1.666260 0.578342 -1.21995 1.913870 1.801810 -0.304175
## PC317 PC318 PC319 PC320 PC321 PC322 PC323
## 1 2.317450 -3.031970 -3.967150 -3.718840 -1.18691 3.754420 -2.247830
## 2 6.108240 0.961631 -1.638030 1.563570 2.16924 -3.449600 4.716840
## 3 1.737610 0.195350 4.031240 -1.256110 -4.96456 -0.360506 0.182904
## 4 -1.039470 0.732611 -1.700800 1.173080 4.33882 3.042740 -2.074310
## 5 -2.445130 -2.092750 -3.781580 7.753250 1.51262 -1.001810 0.216068
## 6 0.484572 0.124960 0.254165 -0.590667 -1.23423 1.284490 1.421910
## PC324 PC325 PC326 PC327 PC328 PC329 PC330 PC331
## 1 -2.407790 3.52872 3.019310 -5.046920 1.570320 3.542260 -4.17224 -0.764133
## 2 2.885370 1.60306 5.788690 -0.411278 -5.938110 -3.153700 1.52610 8.171960
## 3 3.217630 -2.05684 2.295780 2.350170 7.088650 -2.384220 -6.56339 3.529360
## 4 3.601730 3.30243 0.271778 -0.458454 -0.321689 -1.276820 10.04940 -5.558870
## 5 -1.756110 7.21950 -7.370730 0.722412 -1.418190 4.488210 5.81059 -1.090410
## 6 -0.140333 -2.03585 -0.620517 0.324934 2.739080 -0.184163 -1.30837 -1.856670
## PC332 PC333 PC334 PC335 PC336 PC337 PC338
## 1 2.403320 -3.4056100 3.337870 4.087050 -0.919898 1.374020 -0.956720
## 2 0.186044 -2.6618000 -0.863452 -0.624972 2.173400 1.361700 -3.670690
## 3 -2.177060 -0.5525700 -3.144710 2.214470 -2.243890 -1.964570 3.324350
## 4 2.962830 1.4213600 -1.762620 1.362520 4.203550 -1.478030 -0.558561
## 5 0.588064 6.0416500 5.217510 -3.452600 1.496190 0.221104 -5.046460
## 6 -0.592151 -0.0223541 -1.813180 1.348970 -0.975891 -1.191660 -2.384790
## PC339 PC340 PC341 PC342 PC343 PC344 PC345
## 1 -0.880226 -1.21509000 3.04682 -2.4142100 -5.935350 -1.983460 -4.654590
## 2 -0.605932 -0.00555504 3.36160 -2.1675600 1.394030 -1.566820 2.968850
## 3 4.231470 -4.78404000 3.28742 -0.0250219 0.753464 -4.882620 -1.118300
## 4 -2.447590 -4.44271000 -1.14934 8.4456400 2.939360 -0.727864 -1.694790
## 5 -2.852770 -1.32246000 1.09861 -0.1515270 6.350420 7.626440 0.222928
## 6 1.691910 2.79154000 -3.56493 4.8764800 1.760920 1.908600 0.218869
## PC346 PC347 PC348 PC349 PC350 PC351 PC352
## 1 -0.580142 -5.47046 -2.364440 1.107250 4.27404000 -0.412252 3.67231
## 2 1.987670 4.96409 11.246600 -1.562750 5.19936000 -0.521341 -2.80807
## 3 -4.398510 -1.68173 -0.847626 0.085437 0.46251200 -2.057560 2.52434
## 4 0.857038 2.08038 1.765370 -1.220690 -1.86166000 3.361190 1.92769
## 5 -2.248230 -3.16584 -7.307590 -0.262214 -5.46398000 1.412240 1.14520
## 6 2.221290 2.57069 1.317720 0.953769 -0.00655931 0.540784 1.76074
## PC353 PC354 PC355 PC356 PC357 PC358 PC359
## 1 1.871810 -6.511980 -0.661564 3.378200 -4.47432 0.941510 -2.664150
## 2 0.063551 -2.677530 -1.001650 -4.270190 -2.68414 -0.182748 0.708255
## 3 -1.812180 6.216740 3.139380 4.413630 -1.97647 5.228330 2.181090
## 4 -0.700273 -5.511250 -4.511440 1.528640 -2.73488 0.625332 -3.168350
## 5 -4.372240 -0.261217 0.172529 4.927270 1.26310 -3.894150 3.667970
## 6 -1.519310 4.241000 -1.619820 0.725635 -1.29752 -0.255093 -0.706519
## PC360 PC361 PC362 PC363 PC364 PC365 PC366
## 1 2.231740 4.099790 -8.543030 -1.588750 -1.157290 -3.5193500 -2.105260
## 2 1.397130 1.526930 -5.251060 5.153830 -5.638690 -4.5892300 7.809180
## 3 -5.802150 -0.321080 1.538390 -1.007440 -0.692839 2.2761400 3.638130
## 4 -0.785163 0.439312 -4.591540 6.905630 2.763150 -3.7990900 -4.649150
## 5 3.697240 1.214710 -0.744555 -0.815993 5.262620 2.7329000 -5.562210
## 6 -0.707137 -1.655810 0.165163 1.824010 0.922399 0.0216574 0.838715
## PC367 PC368 PC369 PC370 PC371 PC372 PC373
## 1 -1.6720900 4.487680 -4.314740 -3.528740 2.225810 -4.667440 1.028090
## 2 0.1706070 0.427944 -8.202190 1.707700 -0.317252 2.776320 4.323410
## 3 1.1027500 1.074630 0.550941 -1.084960 0.709983 -1.953010 2.460610
## 4 -1.0847500 -0.633538 2.947630 1.980230 -0.123283 3.328190 3.106650
## 5 4.8595200 -0.519760 -2.005060 3.654230 2.527530 0.186395 2.013940
## 6 -0.0637026 0.906861 -0.779790 -0.610078 -2.564690 -0.551038 0.510095
## PC374 PC375 PC376 PC377 PC378 PC379 PC380
## 1 7.02319 1.738220 6.596490 -0.8094320 -2.618450 -3.024410 -1.680290
## 2 -7.89084 -0.738554 -1.583800 -1.5258300 -3.943280 -1.357440 -6.709730
## 3 4.21677 -2.482110 -4.478720 -0.0608105 0.197405 0.299719 1.448730
## 4 -3.33134 2.823940 -4.081030 -2.2697700 -3.363470 -1.840450 4.161030
## 5 2.88541 0.485885 0.580940 -1.7494500 6.343700 2.654310 4.209810
## 6 -5.10864 1.730910 0.818387 -0.3118950 -0.834581 -0.560287 0.823605
## PC381 PC382 PC383 PC384 PC385 PC386 PC387
## 1 1.224140 -4.46823 4.560820 0.336982 3.222050 -0.128295 -3.128460
## 2 2.085980 -2.11413 -3.411630 6.556230 3.376970 0.324062 2.453470
## 3 4.953880 2.79445 2.917030 -2.244600 -0.713586 -0.365991 0.134078
## 4 0.742917 -3.21043 -0.491023 5.074570 0.285338 2.363070 -4.858370
## 5 -8.116330 4.33748 1.892020 -4.607370 -0.826007 2.118930 -1.398930
## 6 1.033700 -1.65589 -1.777830 -3.430340 -0.767556 1.481900 2.696470
## PC388 PC389 PC390 PC391 PC392 PC393 PC394
## 1 0.939928 0.933622 0.669828 2.5207300 2.17117 -2.860700 6.196000
## 2 -0.037260 1.490700 0.419819 0.1439770 -4.44610 -2.577600 -6.390070
## 3 -3.199150 -5.518490 3.181760 -2.8136800 1.66992 2.044430 -0.187200
## 4 1.497960 -0.326636 -3.596580 2.3397100 3.57688 1.227180 0.812500
## 5 6.972460 -1.416870 -3.069460 -2.8932200 -5.67699 -4.580810 2.630930
## 6 0.110215 -1.793140 0.266512 0.0441029 2.89785 -0.887878 -0.778045
## PC395 PC396 PC397 PC398 PC399 PC400 PC401
## 1 -2.982830 -3.082900 -1.915160 0.805651 0.620796 4.94050 2.0283400
## 2 -3.178690 3.264820 -0.390678 5.573150 -1.369010 2.10913 -1.7665300
## 3 4.499490 -1.544360 -0.836570 4.431710 -5.337080 -4.37793 0.2483010
## 4 5.035120 1.209630 0.238963 -3.609800 5.509770 -1.35605 0.7613310
## 5 -0.591322 -2.202240 -2.334450 -0.538381 0.351640 -9.57576 -0.0717029
## 6 0.426598 -0.399882 2.091850 2.806660 -0.307129 1.51197 0.2879930
## PC402 PC403 PC404 PC405 PC406 PC407 PC408 PC409
## 1 -4.114920 -1.97979 -0.808980 -1.91079 4.511750 -3.82357 -0.799932 -0.149239
## 2 -0.183011 2.31346 3.158090 -6.78478 2.630460 -6.62612 -7.284580 -1.642940
## 3 1.537450 5.09322 -0.578961 5.01701 -6.217660 2.83876 -1.389460 -3.389160
## 4 -2.623490 2.60056 0.689956 5.17085 -0.133193 -1.47182 -2.351500 2.767990
## 5 -2.331520 -6.66658 2.385440 8.46534 -7.742310 2.81319 3.648790 -2.183510
## 6 2.455320 1.00331 4.438930 1.72310 0.462323 -1.11461 -1.513510 0.549190
## PC410 PC411 PC412 PC413 PC414 PC415 PC416
## 1 1.180210 7.201860 5.997450 0.796744 6.778560 -6.71034 3.086720
## 2 -2.246620 3.939280 1.488150 -3.055610 -2.258990 6.62703 -2.355380
## 3 1.748630 -0.637145 1.767430 -1.559500 5.161070 -2.28945 -2.964570
## 4 -0.186485 2.244710 -4.674540 4.427060 3.618180 3.84153 2.612780
## 5 -6.562460 3.455780 -0.511899 -1.848520 0.333890 0.88684 -8.437590
## 6 1.467220 1.914170 -0.209509 -3.444480 0.877921 -1.44130 -0.203141
## PC417 PC418 PC419 PC420 PC421 PC422 PC423
## 1 -5.270000 2.669880 -2.737900 0.0972643 -1.743620 -1.627760 5.633320
## 2 5.311830 -1.166070 -5.028760 2.5614900 0.544232 5.968320 -0.237397
## 3 -1.070970 6.167360 -1.685960 0.2800240 -0.776307 -2.840230 3.627820
## 4 -1.219700 -7.430310 1.134160 5.7804800 0.770270 -1.164310 -5.490850
## 5 0.311861 2.239030 2.096780 -3.4352900 5.097890 -3.305730 1.374780
## 6 -0.633924 -0.160888 -0.692334 -1.4510800 -0.791833 -0.747096 -3.763220
## PC424 PC425 PC426 PC427 PC428 PC429 PC430
## 1 -1.059930 1.189740 7.897540 -1.28273000 3.842900 -2.966280 0.098208
## 2 4.908080 -7.822090 -3.670350 0.00254577 -5.044020 -1.718650 2.294610
## 3 -4.824760 0.765343 -2.504620 2.69992000 4.290130 -2.310330 -3.080490
## 4 0.409329 3.696870 -5.035110 -9.54874000 -4.796550 -3.665010 4.221900
## 5 0.175582 -3.201250 -0.861944 3.02739000 0.143382 -1.756010 5.042660
## 6 -1.932680 2.291090 0.951139 1.05760000 -1.540200 0.810951 2.420680
## PC431 PC432 PC433 PC434 PC435 PC436 PC437
## 1 -6.697050 1.685660 -0.243431 -2.448690 3.431410 4.924440 -0.557961
## 2 2.249520 1.899320 -0.886325 -2.881820 4.404680 3.758520 2.652000
## 3 0.561962 -0.963899 -3.753970 0.255719 4.539240 -1.637550 1.301970
## 4 -7.683000 -5.204160 2.251000 2.005980 -11.448100 -4.724820 -3.365900
## 5 0.568323 1.611650 4.376980 1.930420 -7.120900 -3.577540 0.190141
## 6 2.721070 -0.976344 -0.615873 0.295888 -0.452162 -0.825643 0.570102
## PC438 PC439 PC440 PC441 PC442 PC443 PC444
## 1 2.027550 -1.307050 6.075990 2.392040 -5.98478 3.875640 -1.5105900
## 2 1.606860 0.106339 1.904550 1.165110 -3.41790 5.343510 -2.1010700
## 3 -0.804997 -0.222015 0.398814 -2.446710 -3.29946 0.474361 -0.1870860
## 4 1.322950 2.981560 -0.178201 0.445028 -3.96408 1.481280 -6.1553900
## 5 1.096330 1.982040 -3.747420 2.849590 1.18340 -2.205130 6.1859400
## 6 -1.661220 0.460001 -4.163780 1.734380 -1.60864 1.425830 0.0177583
## PC445 PC446 PC447 PC448 PC449 PC450 PC451
## 1 0.0543639 -3.483880 -1.758580 5.8307800 -12.800700 8.665420 2.57629000
## 2 0.4838180 0.842091 -1.087880 8.0486200 4.420650 -5.093200 0.72606300
## 3 4.9778000 3.322760 -4.199770 3.2852100 5.431240 -0.278039 3.61754000
## 4 1.0217200 -2.943360 -1.568410 3.5185400 3.588530 1.165150 1.37150000
## 5 -0.2619590 2.436830 -2.134500 -6.2773600 -0.162487 -7.516650 0.00819288
## 6 -0.5146880 2.233280 -0.798793 -0.0443748 0.574122 -2.278650 -2.66079000
## PC452 PC453 PC454 PC455 PC456 PC457 PC458
## 1 -2.663170 1.336180 -3.876230 0.128933 1.647270 -2.4688800 -0.747510
## 2 0.464246 -0.880251 4.770560 1.805550 9.946890 -0.2074640 0.724752
## 3 -3.159810 1.534980 -1.749210 -2.494610 -1.520250 0.9077070 -4.305340
## 4 0.709957 5.805050 1.884610 1.677390 -0.649724 0.5085680 -2.370760
## 5 2.752160 -0.550547 -5.337650 0.399748 -5.600140 0.0219237 4.681990
## 6 1.325480 0.154809 -0.385594 -0.776716 -0.788787 2.1430000 -1.536190
## PC459 PC460 PC461 PC462 PC463 PC464 PC465
## 1 2.270570 -5.4933200 6.461500 -3.33142 3.142090 -12.198700 3.91248
## 2 2.549500 4.7613800 -3.256860 4.54163 2.767550 1.899950 -3.89990
## 3 -1.951570 0.0337128 6.954090 5.37897 0.262643 -2.775460 8.30044
## 4 0.219423 -1.0728000 6.479310 4.85522 -2.139290 -0.349628 4.27176
## 5 10.069900 2.1485000 0.896402 -4.57293 2.335340 -2.821520 -4.37676
## 6 -0.524228 -0.5470630 2.378640 2.67134 -0.211221 1.420820 1.73797
## PC466 PC467 PC468 PC469 PC470 PC471 PC472
## 1 3.628020 -4.0244400 -8.1711400 0.310744 -0.8074060 2.912020 -2.53329
## 2 0.418647 10.4508000 -1.0979000 -1.964110 -0.8672260 -4.281280 0.76241
## 3 6.141740 -5.1453900 8.6116400 -5.823570 3.9905600 0.883324 2.46051
## 4 -2.623500 -6.0081400 2.5242500 7.108670 -3.1207300 -4.661080 -1.30211
## 5 -1.183000 0.0101678 -0.0661312 -1.290870 5.4439800 -6.449400 2.27396
## 6 1.824960 -1.2615100 -0.2402270 1.131610 0.0380535 -0.192693 1.20366
## PC473 PC474 PC475 PC476 PC477 PC478 PC479 PC480
## 1 1.013180 1.6745700 2.96890 -5.48375 -6.103410 -4.61101 -2.322810 0.412452
## 2 2.673770 0.0147722 -4.50127 2.04992 3.102240 2.99153 7.120000 0.830942
## 3 3.358100 -3.4148800 1.81465 3.03073 -0.987833 -2.96006 0.594713 0.195196
## 4 1.966060 -3.9964500 -2.79979 -1.95266 0.727856 1.33037 -2.553200 5.371020
## 5 4.215990 1.6974900 -2.43417 -1.75712 1.000220 7.04984 -0.121104 0.675001
## 6 0.760275 0.5023750 -1.60718 2.58132 -1.260870 -1.52995 2.888590 -0.886037
## PC481 PC482 PC483 PC484 PC485 PC486 PC487
## 1 3.554580 -1.461740 0.0914599 -3.2171800 0.655798 3.843490 2.011310
## 2 7.372670 -5.650640 3.4037300 -0.0880227 4.858760 3.624130 -1.305790
## 3 -2.255540 3.775280 1.3882900 -1.7515100 -0.668557 -3.536550 -1.459290
## 4 1.432980 5.346770 -2.6040100 1.6998900 -1.660010 6.209710 -0.313448
## 5 0.450003 -2.554510 -0.2082930 -1.0672200 5.025500 0.353257 0.968105
## 6 3.231290 -0.392056 1.5970900 -0.1756650 -1.125020 1.700380 0.606852
## PC488 PC489 PC490 PC491 PC492 PC493 PC494
## 1 3.802490 -4.42576 -2.72845 7.621920 -4.325580 -0.3197550 3.667250
## 2 -0.273779 4.25418 -2.62627 -8.714900 2.932970 0.0685562 0.559071
## 3 1.703190 2.56347 1.85937 -0.324487 1.980840 1.2350700 -6.113290
## 4 -1.095370 -2.46304 -2.46801 2.913660 -3.140380 0.4899490 -5.079530
## 5 -2.027000 1.03381 3.38786 -5.432900 1.385940 -2.4002500 -3.183000
## 6 -0.680082 2.57602 -1.26008 -2.528290 0.896173 0.3928170 -1.163320
## PC495 PC496 PC497 PC498 PC499 PC500 PC501
## 1 2.152860 5.7872700 -6.75241 4.888950 10.529200 0.598650 5.330280
## 2 -0.160883 -2.1484100 4.78026 -5.771790 -3.471150 0.939116 -2.290020
## 3 6.756230 -0.0277912 1.17952 6.194580 -0.993496 0.656219 4.909960
## 4 4.118470 -1.4353600 2.49865 3.763830 -2.638930 1.726980 -4.746170
## 5 -4.630090 -0.3568730 -1.52424 0.184014 7.265410 5.768670 -0.799854
## 6 -1.031990 0.3377750 2.11723 1.698640 2.682820 -1.489850 2.085970
## PC502 PC503 PC504 PC505 PC506 PC507 PC508 PC509
## 1 1.147590 -6.18349 2.441420 2.589680 0.945649 -0.938806 5.70242 -1.472610
## 2 0.584227 -3.77973 -0.269226 5.746340 -2.132790 -6.135870 -3.78667 -1.808490
## 3 -1.617680 3.19440 -0.131290 -3.426400 2.990390 -1.383630 -1.41255 -1.592690
## 4 -2.249930 6.06621 -5.229590 -3.874520 -0.932163 -4.240970 -1.71753 5.178590
## 5 4.279080 -2.11966 -0.039880 2.324150 -6.198930 0.201677 -1.85836 3.175350
## 6 -0.180856 2.28419 0.209238 -0.384053 0.195751 1.556930 1.70532 -0.573656
## PC510 PC511 PC512 PC513 PC514 PC515 PC516 PC517
## 1 2.49245 -0.170717 -0.65352 -1.421020 -3.11407 -1.79039 -2.01058 0.322580
## 2 -4.53870 6.674060 5.31818 5.727520 -3.67606 -0.36751 -3.44218 -1.265880
## 3 -1.75669 -0.131597 -2.17060 -2.524800 -2.97285 -2.59630 3.07011 3.589120
## 4 7.61641 -0.945926 -7.50184 -0.244245 1.73440 -6.08824 2.38231 1.054690
## 5 1.51179 -4.933460 1.81896 5.225040 3.30415 3.05736 1.43495 -2.695320
## 6 1.69116 -2.429450 1.22643 0.605065 -0.58304 1.19657 1.36024 -0.223375
## PC518 PC519 PC520 PC521 PC522 PC523 PC524
## 1 -0.563165 -2.154070 -0.845365 -0.6483500 1.83221 2.88531 -1.725740
## 2 0.301268 -3.229540 0.873406 1.7891300 -5.46300 7.75819 2.325860
## 3 -0.767775 -0.220158 -4.583340 -3.5858600 -1.04207 -5.68721 -0.233143
## 4 1.791160 1.734350 -2.313860 1.5824700 3.74753 -1.16347 0.521510
## 5 -1.746250 -1.497070 0.906723 2.1055100 -2.84490 -2.15630 0.213521
## 6 -1.645620 -0.315717 -1.094820 -0.0148446 -1.29688 2.53103 0.295172
## PC525 PC526 PC527 PC528 PC529 PC530 PC531
## 1 -2.1186300 1.580550 -0.393534 -3.340450 0.7839050 -4.66696 -3.14469
## 2 -0.0507085 0.426393 -0.383365 -4.232440 2.2558000 3.93817 2.57636
## 3 -1.5023100 -1.351400 -0.664369 0.516294 -0.9451710 3.92352 -1.64968
## 4 0.7228330 -5.733700 7.168400 4.020570 -3.8497600 2.84411 1.62315
## 5 -1.2113900 5.624170 -4.654230 1.694480 -1.3925100 1.42230 -2.14384
## 6 0.0801718 -0.170995 0.501421 0.400535 -0.0388899 -0.12602 -0.49322
## PC532 PC533 PC534 PC535 PC536 PC537 PC538
## 1 3.405020 -1.023090 6.971360 3.084210 0.583564 -1.490980 -5.154550
## 2 -6.307960 1.694060 -6.513580 -1.381010 -7.136960 -2.431020 -2.209950
## 3 0.736933 -0.884715 -6.036790 -11.649600 7.088760 2.491950 3.449040
## 4 -1.267600 -1.954300 -1.052460 3.190740 1.285960 -0.221289 0.805661
## 5 2.102560 -1.143910 0.266658 -2.888100 2.042110 -0.211161 0.818581
## 6 -0.181220 1.334190 -1.171840 0.729099 -0.210794 0.856780 -0.167919
## PC539 PC540 PC541 PC542 PC543 PC544 PC545
## 1 -2.207470 0.758238 -3.335490 5.4590000 1.221180 1.63972000 4.427280
## 2 -0.911507 -1.389160 3.149200 -3.8821200 -3.431940 -6.08617000 -4.603970
## 3 3.543010 -1.516680 1.850980 -1.5179400 -0.407301 1.66806000 -7.206650
## 4 -2.818540 -1.144380 -2.341230 -7.4721300 -2.596550 2.33664000 -3.276970
## 5 -1.459910 -2.492320 1.155440 0.0787301 3.107540 -0.00789298 0.538279
## 6 0.139411 0.726248 0.900952 2.1527200 -1.238480 0.32846000 0.776762
## PC546 PC547 PC548 PC549 PC550 PC551 PC552
## 1 -0.592321 -3.950480 5.398870 -0.259369 1.346980 -3.0258500 2.398070
## 2 0.868614 -0.776384 -3.824660 2.338640 0.631417 -1.0236800 3.124520
## 3 4.242580 2.912100 -2.691830 -5.502290 1.836520 -1.5376700 -7.697860
## 4 0.600199 4.452940 -1.611770 -0.782176 1.684540 0.0141803 -2.459820
## 5 -0.293807 3.026430 -6.600290 -1.351960 0.712053 0.9871010 -0.721727
## 6 2.176100 -0.281714 -0.211135 1.887900 0.134043 -1.3467200 -1.413170
## PC553 PC554 PC555 PC556 PC557 PC558 PC559
## 1 -0.286456 -0.0663779 3.476500 -1.351740 -1.64359000 -0.778570 -1.306260
## 2 1.589360 -1.7513600 -3.728780 -0.730756 -0.00663453 -0.484378 1.384330
## 3 -7.170830 2.0107000 -9.569970 3.974570 3.56960000 8.585820 1.739970
## 4 -6.107070 1.8487100 2.812830 3.488180 0.66171100 -1.753540 -2.556800
## 5 -0.694254 0.4203220 -6.285140 1.427040 2.70552000 0.324602 -0.267143
## 6 -1.441920 -0.3393800 -0.212959 1.152500 -0.77473400 0.450112 -1.014180
## PC560 PC561 PC562 PC563 PC564 PC565 PC566
## 1 2.825240 0.0995965 1.57620 -3.60296 0.670159 -3.209690 1.550600
## 2 -2.489070 -4.0515200 -3.70281 6.60162 -2.458920 6.003580 -7.124020
## 3 -1.748750 2.9037700 2.62248 -6.01691 1.100880 -4.439560 5.372880
## 4 -4.708210 2.8882200 -1.32227 2.67467 2.624540 0.743727 2.569070
## 5 0.653901 -0.5110950 1.18163 -2.79985 -1.797680 0.198578 -3.013150
## 6 -1.833120 -1.2206000 -1.99551 1.19657 0.685755 0.234363 -0.634971
## PC567 PC568 PC569 PC570 PC571 PC572 PC573
## 1 0.989093 -0.459252 0.975044 2.73825 1.134910 -2.30975 0.2222010
## 2 -0.929648 -3.040190 0.822756 -1.69767 -1.093300 -2.08528 -0.0283054
## 3 1.996950 6.582050 0.692111 -2.92630 1.409960 6.58744 -1.6235900
## 4 0.980353 -3.243020 5.102860 -2.73461 0.253183 3.93572 -0.1543540
## 5 -1.986100 4.835600 -1.979180 -2.03141 0.057754 -4.35120 -0.0373671
## 6 -0.800821 -0.265115 -1.446920 -1.63397 -0.565600 2.08011 -0.6737900
## PC574 PC575 PC576 PC577 PC578 PC579 PC580
## 1 1.188210 0.870558 -2.113010 1.37526 0.000623485 2.240530 -0.459305
## 2 -3.579570 3.242020 0.369332 -3.67347 -2.681540000 -3.973060 0.369044
## 3 0.950265 -0.902675 -3.054820 1.31367 2.586150000 0.692345 1.026450
## 4 2.626630 -0.128662 6.096140 1.26551 0.796442000 -0.817054 -1.224270
## 5 -0.530347 -0.572138 -1.720590 1.32734 -0.445303000 1.693790 0.569703
## 6 -0.331061 -0.566017 0.365110 -0.71432 -0.226785000 0.743208 1.396390
## PC581 PC582 PC583 PC584 PC585 PC586 PC587
## 1 0.900178 -0.975244 1.351650 -0.764878 0.411443 -3.608420 0.816712
## 2 0.230988 -0.649956 -1.625020 3.628170 -3.325320 1.478650 1.625470
## 3 -3.870950 3.023630 2.915330 -1.153350 2.061790 0.483450 -5.288950
## 4 -0.117613 1.489090 2.486810 -0.658991 -0.150000 -1.361030 -2.080040
## 5 2.659190 2.698140 0.107061 0.738584 0.576776 2.061340 -0.933789
## 6 -0.741153 -0.478477 -1.091250 0.432916 0.536083 -0.676711 -0.147207
## PC588 PC589 PC590 PC591 PC592 PC593 PC594
## 1 0.0384876 -2.219890 -0.408084 -1.612930 -1.4117400 -0.2201720 1.179640
## 2 2.5240200 1.151330 -0.354449 -0.381923 -0.5873530 0.6151750 1.015280
## 3 -7.6461100 0.354336 2.357310 5.889990 0.2842930 1.6530400 -0.253811
## 4 -1.0604400 -0.817802 2.861370 4.752060 -1.3637300 -0.4553330 0.113574
## 5 -0.3504120 -1.044770 -0.397141 0.717417 -1.2142500 0.0818215 0.682315
## 6 -0.7484500 0.578017 1.178260 1.426280 -0.0826511 -1.7031100 -0.406109
## PC595 PC596 PC597 PC598 PC599 PC600 PC601
## 1 -0.0430501 -0.664985 -0.157422 0.0947256 2.419170 0.6925310 0.0561172
## 2 -2.9241400 1.393060 1.208010 -0.6697120 -1.009420 0.6709490 -1.2857100
## 3 3.9703000 2.300020 -3.190190 1.5798000 0.306442 -3.5776800 -0.9908110
## 4 -0.7069660 -2.730620 -1.374650 0.4564930 -1.200290 -1.1788900 1.8492300
## 5 1.2877000 -2.035230 -1.534000 -0.2660950 1.271710 -0.0783648 -3.0542800
## 6 2.1880700 1.462530 -1.806420 0.0631933 -0.662056 1.1882700 -0.9711610
## PC602 PC603 PC604 PC605 PC606 PC607 PC608
## 1 -0.422698 1.2217300 1.1282100 -0.0968164 0.883596 -1.851110 0.2777940
## 2 2.119190 -0.2993870 1.1771100 3.2232000 1.750800 -0.367609 -0.0249427
## 3 -4.576820 -0.8257750 1.2293200 -3.1137200 -0.268914 3.724350 -1.6254500
## 4 0.316105 -0.5618170 -0.1933880 -0.6756260 0.588500 1.240880 0.6906870
## 5 -2.893920 -0.8409080 0.0943487 -1.0693300 0.974841 -0.101900 -1.5808500
## 6 0.217015 -0.0314555 1.2089000 -0.0546657 -0.414050 -1.478620 0.2828550
## PC609 PC610 PC611 PC612 PC613 PC614 PC615
## 1 0.9498830 0.482229 0.0376125 0.1797420 1.8884100 -1.172590 0.0356375
## 2 0.1552650 -0.364791 -1.1793100 -0.9907910 -0.4088200 0.901477 0.1978340
## 3 -1.4347400 2.588180 2.9292400 -0.8670590 -0.0932232 1.349220 -1.0828100
## 4 0.0549793 -0.170540 -1.0536100 0.1080440 -2.1936100 3.458960 1.8271800
## 5 -0.8816780 0.193019 0.1477220 0.0441914 1.2076800 0.291053 -0.6286800
## 6 -1.0314900 1.159970 -0.5925440 0.4074700 -0.9531180 -0.382572 -0.0261295
## PC616 PC617 PC618 PC619 PC620 PC621 PC622
## 1 -1.2742200 -1.0166500 1.017700 0.413055 -0.615057 0.933675 0.561719
## 2 0.0792599 -0.2762350 0.377416 -1.354380 -1.201900 -0.260239 -0.613003
## 3 0.9567240 1.8708200 1.603800 2.505120 0.409465 -1.091240 -1.182830
## 4 0.7979200 1.3687000 -2.398750 2.090690 -1.684470 0.237746 1.055300
## 5 0.0248111 0.7346960 -0.984834 -0.284761 0.104881 -0.605211 1.480840
## 6 1.2182000 -0.0674913 -0.194666 -0.839569 -0.573405 0.038825 0.137123
## PC623 PC624 PC625 PC626 PC627 PC628 PC629
## 1 0.1222350 -1.655880 -1.0240800 -0.398773 -0.372284 0.479921 -0.0538361
## 2 0.7523290 1.770930 0.9077600 -0.342363 -1.178260 -1.118970 -0.8612290
## 3 0.4812560 0.754075 -1.5656300 0.225430 0.780495 0.447120 -0.1822280
## 4 1.0049600 -0.483412 1.6803900 -0.427996 0.511488 0.817202 -0.3105760
## 5 0.9491250 0.715926 -0.3017880 1.305980 0.541594 1.282800 0.2009910
## 6 -0.0150364 -1.275430 -0.0555473 -0.887926 0.135793 0.881540 1.3025200
## PC630 PC631 PC632 PC633 PC634 PC635 PC636
## 1 -0.6730140 1.329650 0.654511 -0.4903520 -0.3914520 -0.3719210 -0.9230830
## 2 0.0548119 -1.407510 -0.869820 1.1010700 -1.0294800 0.5386230 0.5480640
## 3 0.5820030 0.347856 0.200176 -0.3495600 1.3099300 -0.0265796 -0.5817040
## 4 -0.1259030 -1.563200 -0.721731 0.1302550 0.6358970 0.2288740 -1.1053800
## 5 0.5597680 0.776038 -0.203955 1.5193500 -0.0816816 -1.5026400 0.0457431
## 6 0.5599610 -0.573395 0.917824 -0.0270427 1.3210200 0.6686640 -2.0031600
## PC637 PC638 PC639 PC640 PC641 PC642 PC643
## 1 -0.545903 -0.2611980 0.663981 0.148466 -1.352060 0.534462 -0.0320718
## 2 1.160670 -0.6517060 -1.015110 -0.414513 0.627800 -0.762995 0.5611680
## 3 -0.377351 1.2528800 1.005600 -0.662310 0.233966 -0.474998 0.5785380
## 4 -0.285090 -0.0676641 -0.187437 -2.525190 0.409769 -1.207560 -0.3914100
## 5 1.077120 -0.3668680 -0.679117 0.429045 0.409687 0.160206 0.1937180
## 6 -0.929517 -1.1370400 -1.731310 0.106299 -2.042490 0.202262 -1.0385800
## PC644 PC645 PC646 PC647 PC648 PC649 PC650
## 1 0.695318 -0.5732730 1.419190 -0.376348 0.7357110 -0.8545110 -0.3704340
## 2 0.425374 -0.7022600 -0.923579 0.105626 0.2854380 -0.2521310 0.9802470
## 3 -0.291940 0.6470010 -0.476133 -0.762751 -1.1635700 -0.0828889 -0.0932712
## 4 1.453290 0.0768286 -1.272050 -0.611429 0.4130000 0.2926600 -0.2423210
## 5 0.215671 -0.0764362 0.256408 0.724395 0.0163036 -1.2054800 0.2619800
## 6 -0.760264 -2.0082000 2.892230 2.303850 -2.2543100 -0.8165350 -0.6895360
## PC651 PC652 PC653 PC654 PC655 PC656 PC657
## 1 -0.29158400 -0.956123 -0.972026 -0.877605 -1.1494000 0.829165 -0.0530470
## 2 -0.00710967 1.328450 -1.379670 0.160955 0.2285950 -0.466127 0.0432664
## 3 0.50577700 -0.142748 0.781087 0.590896 1.3880000 -0.124747 0.3939730
## 4 0.89601900 0.405259 0.029616 0.808107 0.0401254 -0.356069 -0.0146783
## 5 -0.26726200 -0.468042 0.183892 1.217820 0.2050710 -0.709634 -0.9518690
## 6 1.56279000 -1.084490 -1.147930 -3.845710 3.3993000 3.065770 1.6460700
## PC658 PC659 PC660 PC661 PC662 PC663 PC664
## 1 0.00773703 -0.5788180 0.4602070 0.0772628 -0.302533 -0.457385 -0.0492801
## 2 0.58495000 0.6189400 -0.1186510 0.5637610 -0.520939 0.256424 -1.5277700
## 3 0.23331200 -0.3534350 0.6300210 0.1764910 0.155248 -0.325883 0.2595370
## 4 -0.17203300 -0.0365733 -0.1475710 -0.2947840 0.147316 -0.172945 0.7324470
## 5 -0.61314100 0.0015057 -0.1592740 0.3222340 0.147858 0.313532 0.1500250
## 6 1.63719000 1.7095000 0.0199371 -3.7314300 8.424690 -6.823410 0.6166360
## PC665 PC666 PC667 PC668 PC669 PC670 PC671
## 1 -0.0482858 -1.071640 0.307679 0.517415 -0.247526 -0.2724220 0.0570314
## 2 0.4070540 0.978048 0.505680 0.441660 1.310400 0.2838230 0.3236680
## 3 0.1722620 0.860721 0.116949 0.344689 0.648392 0.0017265 0.1882100
## 4 0.0770676 -0.702335 -0.113037 -0.185621 -0.649254 0.1925390 0.0629604
## 5 -0.1966550 -0.104446 0.168875 0.373712 0.356984 0.2672210 -0.2675250
## 6 2.2520500 -9.838060 -10.543700 -2.177630 -18.570900 -4.9561400 -10.5035000
## PC672 PC673 PC674 PC675 PC676 PC677 PC678
## 1 0.3658390 -0.2220490 -0.4542430 -0.1059320 0.0668257 -0.1227860 -0.07169060
## 2 -0.3572560 0.3813790 -0.0584039 0.3682240 -0.0438910 -0.2437420 -0.62563900
## 3 0.0525116 1.1491300 -0.0821967 0.0657877 -0.3136410 1.0217800 -0.12046900
## 4 0.4529030 0.1608160 0.0197362 -0.6737840 -0.4331220 0.0589937 0.28747700
## 5 -0.3019860 0.1766060 0.2178910 0.6849810 0.3028740 -0.8538600 -0.00102422
## 6 1.3322600 0.0338846 -2.9810000 -1.6761600 1.4657700 -5.6600100 1.48465000
## PC679 PC680 PC681 PC682 PC683 PC684 PC685
## 1 -0.1012020 0.0386973 0.2632050 0.1172300 0.3892970 0.0890863 0.3201930
## 2 -0.1120630 -0.0705258 -0.0886319 -0.1414360 0.0814943 -0.0653639 0.1095060
## 3 0.0830344 0.2721590 -0.3359110 -0.3258950 -0.0744264 -0.1506270 -0.1918520
## 4 -0.1225620 0.1521610 -0.1445910 0.0501605 -0.2175600 -0.0702993 -0.0326227
## 5 0.3900610 -0.3917050 -0.4784030 0.1387840 -0.2413210 -0.1031130 0.1312310
## 6 -0.3688850 0.3400610 0.4379240 0.9850670 -0.5071560 -0.6654800 -0.7090970
## PC686 PC687 PC688 Individual Pop_City Country Latitude
## 1 0.0385472 0.1949580 -6.61228e-07 801 Durres Albania 41.29704
## 2 -0.2263910 -0.0350247 -6.61228e-07 802 Durres Albania 41.29704
## 3 0.3104160 -0.3268830 -6.61228e-07 803 Durres Albania 41.29704
## 4 -0.1771030 0.0535150 -6.61228e-07 804 Durres Albania 41.29704
## 5 0.0731578 0.0622075 -6.61228e-07 805 Durres Albania 41.29704
## 6 0.6414640 0.0648584 -6.61228e-07 806 Durres Albania 41.29704
## Longitude Continent Year Region Subregion order order2 orderold
## 1 19.50373 Europe 2018 Southern Europe East Europe 33 25 25
## 2 19.50373 Europe 2018 Southern Europe East Europe 33 25 25
## 3 19.50373 Europe 2018 Southern Europe East Europe 33 25 25
## 4 19.50373 Europe 2018 Southern Europe East Europe 33 25 25
## 5 19.50373 Europe 2018 Southern Europe East Europe 33 25 25
## 6 19.50373 Europe 2018 Southern Europe East Europe 33 25 25
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "PCA_lea_r0.01_colors_pc1_pc2.pdf"
),
width = 8,
height = 8,
units = "in"
)
PC1 & 3
We will do 5 repetitions
# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
genotype,
K = 1:30,
project = "new",
repetitions = 5,
percentage = 0.25,
iterations = 500,
CPU = 10,
entropy = TRUE
)
Cross entropy
# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","lea_cross_entropy_euro_global_r01.pdf"), width = 6, height = 4)
# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)
# Close the pdf file
dev.off()
## png
## 2
Summary of project
check with run is best
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy 5 5 5 5 5
## without cross-entropy 0 0 0 0 0
## total 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9741476 0.9478415 0.9371094 0.9306857 0.9274846 0.9252483 0.9230460
## mean 0.9744821 0.9482057 0.9373050 0.9308620 0.9287969 0.9258323 0.9234635
## max 0.9747943 0.9484720 0.9374630 0.9310598 0.9292955 0.9268456 0.9238938
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.9210405 0.9194040 0.9178541 0.9161954 0.9157153 0.9143805 0.9134463
## mean 0.9216164 0.9197455 0.9185914 0.9166938 0.9160336 0.9148880 0.9137159
## max 0.9220569 0.9200141 0.9191938 0.9177154 0.9167792 0.9154233 0.9140798
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.9130960 0.9125347 0.9122265 0.9114823 0.9112280 0.9108308 0.9105371
## mean 0.9133830 0.9129528 0.9124338 0.9118073 0.9117729 0.9114267 0.9109761
## max 0.9138815 0.9136672 0.9128994 0.9121939 0.9123163 0.9122307 0.9115720
## K = 22 K = 23 K = 24 K = 25 K = 26 K = 27 K = 28
## min 0.9103878 0.9102019 0.9103990 0.9106880 0.9104110 0.9109080 0.9106481
## mean 0.9108562 0.9110041 0.9108721 0.9109012 0.9111984 0.9110827 0.9113020
## max 0.9114802 0.9114443 0.9113352 0.9111545 0.9122130 0.9113573 0.9119994
## K = 29 K = 30
## min 0.9109183 0.9104377
## mean 0.9113406 0.9115078
## max 0.9116957 0.9122739
# get the cross-entropy of all runs for K = 20
ce = cross.entropy(project, K = 20)
ce #run 1 is best for k=20
## K = 20
## run 1 0.9108308
## run 2 0.9122307
## run 3 0.9114574
## run 4 0.9113974
## run 5 0.9112171
color_palette_20 <-
c(
"#FF8C1A",
"yellow2",
"#77DD77",
"#1E90FF",
"#B22222",
"#AE8333",
"#B20CC9",
"#F49AC2",
"blue",
"#008080",
"purple4",
"#FFFF99",
"#75FAFF",
"#AE9393",
"magenta",
"green4",
"navy",
"green",
"purple",
"orangered"
)
Default plot
# select the best run for chosen K
best = which.min(cross.entropy(project, K = 20))
# best is run 1
barchart(project, K = 20, run = best,
border = NA, space = 0,
col = color_palette_20,
xlab = "Individuals",
ylab = "Ancestry proportions",
main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
labels = bp$order, las=1,
cex.axis = .4)
using ggplot
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 20, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:20)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_20[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_20)
Extract ancestry coefficients for k=20
change to correct matrix
leak20 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.snmf/K20/run1/r2_0.01_r1.20.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak20)
## # A tibble: 6 × 20
## X1 X2 X3 X4 X5 X6 X7 X8 X9
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0138 0.000100 0.0918 2.83e-3 1.37e-2 2.21e-2 1.39e-2 5.12e-3 3.41e-2
## 2 0.00808 0.0383 0.0118 7.02e-2 4.34e-2 2.00e-2 3.24e-3 4.22e-2 3.28e-2
## 3 0.00231 0.0000998 0.0000998 9.98e-5 9.98e-5 8.77e-4 9.98e-5 9.98e-5 9.98e-5
## 4 0.0000999 0.0000999 0.0000999 3.33e-2 3.03e-2 9.99e-5 3.04e-2 9.99e-5 9.99e-5
## 5 0.00413 0.0000999 0.0000999 3.63e-2 3.15e-2 9.99e-5 9.99e-5 9.99e-5 1.05e-2
## 6 0.0000998 0.0000998 0.0000998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 11 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## # X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.01.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 1.38063e-02 9.99550e-05 9.18430e-02 2.82863e-03 1.37334e-02
## 2 1002 OKI 8.07866e-03 3.83354e-02 1.18183e-02 7.02198e-02 4.33844e-02
## 3 1003 OKI 2.31431e-03 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05
## 4 1004 OKI 9.99010e-05 9.99010e-05 9.99010e-05 3.32538e-02 3.02918e-02
## 5 1005 OKI 4.12997e-03 9.99010e-05 9.99010e-05 3.62772e-02 3.14710e-02
## 6 1006 OKI 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
## X6 X7 X8 X9 X10 X11
## 1 2.20642e-02 1.38697e-02 5.11567e-03 3.40829e-02 9.99550e-05 0.527942
## 2 1.99984e-02 3.24241e-03 4.22411e-02 3.27885e-02 4.31895e-02 0.324555
## 3 8.77439e-04 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05 0.995111
## 4 9.99010e-05 3.03931e-02 9.99010e-05 9.99010e-05 9.99010e-05 0.818280
## 5 9.99010e-05 9.99010e-05 9.99010e-05 1.05491e-02 9.99010e-05 0.874483
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 0.998103
## X12 X13 X14 X15 X16 X17
## 1 6.55197e-02 3.95934e-02 9.99550e-05 2.97411e-02 9.61370e-03 9.99550e-05
## 2 1.24613e-01 2.85697e-02 1.27235e-01 9.99730e-05 1.24154e-02 9.99730e-05
## 3 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05
## 4 2.45144e-02 3.72545e-02 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05
## 5 2.76394e-03 1.00913e-02 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
## X18 X19 X20
## 1 9.99550e-05 5.70084e-02 7.27377e-02
## 2 9.99730e-05 4.37211e-02 2.52939e-02
## 3 9.98470e-05 9.98470e-05 9.98470e-05
## 4 9.09715e-03 1.28061e-02 3.01026e-03
## 5 1.32808e-02 9.99010e-05 1.58546e-02
## 6 9.98289e-05 9.98289e-05 9.98289e-05
Rename the columns
# Rename the columns starting from the third one
leak20 <- leak20 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak20)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 1.38063e-02 9.99550e-05 9.18430e-02 2.82863e-03 1.37334e-02
## 2 1002 OKI 8.07866e-03 3.83354e-02 1.18183e-02 7.02198e-02 4.33844e-02
## 3 1003 OKI 2.31431e-03 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05
## 4 1004 OKI 9.99010e-05 9.99010e-05 9.99010e-05 3.32538e-02 3.02918e-02
## 5 1005 OKI 4.12997e-03 9.99010e-05 9.99010e-05 3.62772e-02 3.14710e-02
## 6 1006 OKI 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
## v6 v7 v8 v9 v10 v11
## 1 2.20642e-02 1.38697e-02 5.11567e-03 3.40829e-02 9.99550e-05 0.527942
## 2 1.99984e-02 3.24241e-03 4.22411e-02 3.27885e-02 4.31895e-02 0.324555
## 3 8.77439e-04 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05 0.995111
## 4 9.99010e-05 3.03931e-02 9.99010e-05 9.99010e-05 9.99010e-05 0.818280
## 5 9.99010e-05 9.99010e-05 9.99010e-05 1.05491e-02 9.99010e-05 0.874483
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 0.998103
## v12 v13 v14 v15 v16 v17
## 1 6.55197e-02 3.95934e-02 9.99550e-05 2.97411e-02 9.61370e-03 9.99550e-05
## 2 1.24613e-01 2.85697e-02 1.27235e-01 9.99730e-05 1.24154e-02 9.99730e-05
## 3 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05 9.98470e-05
## 4 2.45144e-02 3.72545e-02 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05
## 5 2.76394e-03 1.00913e-02 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05
## 6 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05 9.98289e-05
## v18 v19 v20
## 1 9.99550e-05 5.70084e-02 7.27377e-02
## 2 9.99730e-05 4.37211e-02 2.52939e-02
## 3 9.98470e-05 9.98470e-05 9.98470e-05
## 4 9.09715e-03 1.28061e-02 3.01026e-03
## 5 1.32808e-02 9.99010e-05 1.58546e-02
## 6 9.98289e-05 9.98289e-05 9.98289e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak20 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
color_palette_20 <-
c(
"#B20CC9",
"#1E90FF",
"purple",
"#FF8C1A",
"#AE8333",
"#B22222",
"yellow2",
"#F49AC2",
"orangered",
"magenta",
"purple4",
"#FFFF99",
"#75FAFF",
"#AE9393",
"#008080",
"green4",
"navy",
"green",
"#77DD77",
"blue"
)
# Generate all potential variable names
all_variables <- paste0("v", 1:20)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_20[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=20.\n LEA inference for k20 with 19,318 SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_20) +
expand_limits(y = c(0, 1.5))
Default plot for K=7
# select the best run for chosen K
best = which.min(cross.entropy(project, K = 7))
# best is run 1
barchart(project, K = 7, run = best,
border = NA, space = 0,
col = color_palette_20,
xlab = "Individuals",
ylab = "Ancestry proportions",
main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
labels = bp$order, las=1,
cex.axis = .4)
using ggplot
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))
# Create a named vector to map countries to regions
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:7)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_20[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=7.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_20)
leak7 <- read_delim(
here("euro_global/output/snps_sets/r2_0.01.snmf/K7/run4/r2_0.01_r4.7.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak7)
## # A tibble: 6 × 7
## X1 X2 X3 X4 X5 X6 X7
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0978 0.0358 0.0341 0.0636 0.549 0.0292 0.191
## 2 0.0615 0.0957 0.0593 0.0430 0.528 0.0461 0.167
## 3 0.0690 0.0360 0.000100 0.000100 0.696 0.0671 0.132
## 4 0.0299 0.102 0.0296 0.000100 0.639 0.00764 0.192
## 5 0.0654 0.0784 0.000100 0.000100 0.663 0.0105 0.183
## 6 0.0441 0.0539 0.000100 0.000100 0.705 0.0506 0.146
The fam file
fam_file <- here(
"euro_global/output/snps_sets/r2_0.01.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 0.0978103 0.0357826 0.034129800 0.063596800 0.548841 0.02919780
## 2 1002 OKI 0.0615106 0.0957283 0.059314700 0.042975600 0.527767 0.04608960
## 3 1003 OKI 0.0690331 0.0360201 0.000099982 0.000099982 0.696111 0.06705730
## 4 1004 OKI 0.0298846 0.1018460 0.029630700 0.000099991 0.638919 0.00763714
## 5 1005 OKI 0.0654267 0.0784264 0.000099982 0.000099982 0.662766 0.01046610
## 6 1006 OKI 0.0440915 0.0538924 0.000099982 0.000099982 0.704794 0.05057510
## X7
## 1 0.190641
## 2 0.166615
## 3 0.131579
## 4 0.191983
## 5 0.182715
## 6 0.146447
Rename the columns
# Rename the columns starting from the third one
leak7 <- leak7 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak7)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 0.0978103 0.0357826 0.034129800 0.063596800 0.548841 0.02919780
## 2 1002 OKI 0.0615106 0.0957283 0.059314700 0.042975600 0.527767 0.04608960
## 3 1003 OKI 0.0690331 0.0360201 0.000099982 0.000099982 0.696111 0.06705730
## 4 1004 OKI 0.0298846 0.1018460 0.029630700 0.000099991 0.638919 0.00763714
## 5 1005 OKI 0.0654267 0.0784264 0.000099982 0.000099982 0.662766 0.01046610
## 6 1006 OKI 0.0440915 0.0538924 0.000099982 0.000099982 0.704794 0.05057510
## v7
## 1 0.190641
## 2 0.166615
## 3 0.131579
## 4 0.191983
## 5 0.182715
## 6 0.146447
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
Using ggplot2 for individual admixtures
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak7 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
color_palette_7 <-
c(
"#00FF00",
"#FFFF00",
"#FF0000",
"#0000FF",
"#FF00FF",
"orange",
"purple"
)
# Generate all potential variable names
all_variables <- paste0("v", 1:7)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_7[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 10
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for k7 with 19,318 SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_7) +
expand_limits(y = c(0, 1.5))
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 12, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:12)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_20[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=12.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_20)
leak12 <- read_delim(
here("euro_global/output/snps_sets/r2_0.01.snmf/K12/run4/r2_0.01_r4.12.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak12)
## # A tibble: 6 × 12
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4.02e-2 1.15e-2 1.06e-2 1.00e-4 2.31e-2 6.51e-2 0.619 1.10e-1 3.78e-3 5.29e-2
## 2 7.60e-2 2.78e-2 5.41e-2 1.00e-4 1.50e-2 5.90e-2 0.499 1.03e-1 3.09e-2 8.21e-2
## 3 9.99e-5 8.91e-3 9.99e-5 6.68e-3 1.82e-3 3.18e-3 0.979 9.99e-5 2.54e-4 9.99e-5
## 4 1.42e-2 2.87e-2 2.45e-2 9.99e-5 9.99e-5 6.56e-3 0.884 9.99e-5 9.99e-5 9.99e-5
## 5 1.80e-2 2.77e-2 5.17e-3 2.40e-2 1.00e-4 2.92e-4 0.896 1.00e-4 1.00e-4 1.00e-4
## 6 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5
## # ℹ 2 more variables: X11 <dbl>, X12 <dbl>
The fam file
fam_file <- here(
"euro_global/output/snps_sets/r2_0.01.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 0.040199900 0.011530500 0.010638500 0.000099982 0.023146800
## 2 1002 OKI 0.075992800 0.027784300 0.054107900 0.000099991 0.014999300
## 3 1003 OKI 0.000099946 0.008908350 0.000099946 0.006684460 0.001824840
## 4 1004 OKI 0.014216000 0.028685700 0.024504700 0.000099946 0.000099946
## 5 1005 OKI 0.018042700 0.027702800 0.005169910 0.024000300 0.000099964
## 6 1006 OKI 0.000099901 0.000099901 0.000099901 0.000099901 0.000099901
## X6 X7 X8 X9 X10 X11
## 1 0.065050400 0.618872 0.109740000 0.003775240 0.052941900 0.063905500
## 2 0.059005000 0.498837 0.103242000 0.030899000 0.082053100 0.029348300
## 3 0.003175130 0.978554 0.000099946 0.000254015 0.000099946 0.000099946
## 4 0.006562450 0.883594 0.000099946 0.000099946 0.000099946 0.000099946
## 5 0.000292443 0.896372 0.000099964 0.000099964 0.000099964 0.027566900
## 6 0.000099901 0.998901 0.000099901 0.000099901 0.000099901 0.000099901
## X12
## 1 0.000099982
## 2 0.023630600
## 3 0.000099946
## 4 0.041837800
## 5 0.000453241
## 6 0.000099901
Rename the columns
# Rename the columns starting from the third one
leak12 <- leak12 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak12)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 0.040199900 0.011530500 0.010638500 0.000099982 0.023146800
## 2 1002 OKI 0.075992800 0.027784300 0.054107900 0.000099991 0.014999300
## 3 1003 OKI 0.000099946 0.008908350 0.000099946 0.006684460 0.001824840
## 4 1004 OKI 0.014216000 0.028685700 0.024504700 0.000099946 0.000099946
## 5 1005 OKI 0.018042700 0.027702800 0.005169910 0.024000300 0.000099964
## 6 1006 OKI 0.000099901 0.000099901 0.000099901 0.000099901 0.000099901
## v6 v7 v8 v9 v10 v11
## 1 0.065050400 0.618872 0.109740000 0.003775240 0.052941900 0.063905500
## 2 0.059005000 0.498837 0.103242000 0.030899000 0.082053100 0.029348300
## 3 0.003175130 0.978554 0.000099946 0.000254015 0.000099946 0.000099946
## 4 0.006562450 0.883594 0.000099946 0.000099946 0.000099946 0.000099946
## 5 0.000292443 0.896372 0.000099964 0.000099964 0.000099964 0.027566900
## 6 0.000099901 0.998901 0.000099901 0.000099901 0.000099901 0.000099901
## v12
## 1 0.000099982
## 2 0.023630600
## 3 0.000099946
## 4 0.041837800
## 5 0.000453241
## 6 0.000099901
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
Using ggplot2 for individual admixtures
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak12 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
color_palette_12 <-
c(
"#FF8C1A",
"#1E90FF",
"#B22222",
"green",
"yellow2",
"blue",
"#75FAFF",
"magenta",
"green4",
"navy",
"purple",
"orangered"
)
# Generate all potential variable names
all_variables <- paste0("v", 1:12)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_12[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 10
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=12.\n LEA inference for k12 with 19,318 SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_12) +
expand_limits(y = c(0, 1.5))
Try plotting k=5 (See if the 5 original Asian clusters hold)
Extract ancestry coefficients for k=5
Summary of project
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy 5 5 5 5 5
## without cross-entropy 0 0 0 0 0
## total 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9741476 0.9478415 0.9371094 0.9306857 0.9274846 0.9252483 0.9230460
## mean 0.9744821 0.9482057 0.9373050 0.9308620 0.9287969 0.9258323 0.9234635
## max 0.9747943 0.9484720 0.9374630 0.9310598 0.9292955 0.9268456 0.9238938
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.9210405 0.9194040 0.9178541 0.9161954 0.9157153 0.9143805 0.9134463
## mean 0.9216164 0.9197455 0.9185914 0.9166938 0.9160336 0.9148880 0.9137159
## max 0.9220569 0.9200141 0.9191938 0.9177154 0.9167792 0.9154233 0.9140798
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.9130960 0.9125347 0.9122265 0.9114823 0.9112280 0.9108308 0.9105371
## mean 0.9133830 0.9129528 0.9124338 0.9118073 0.9117729 0.9114267 0.9109761
## max 0.9138815 0.9136672 0.9128994 0.9121939 0.9123163 0.9122307 0.9115720
## K = 22 K = 23 K = 24 K = 25 K = 26 K = 27 K = 28
## min 0.9103878 0.9102019 0.9103990 0.9106880 0.9104110 0.9109080 0.9106481
## mean 0.9108562 0.9110041 0.9108721 0.9109012 0.9111984 0.9110827 0.9113020
## max 0.9114802 0.9114443 0.9113352 0.9111545 0.9122130 0.9113573 0.9119994
## K = 29 K = 30
## min 0.9109183 0.9104377
## mean 0.9113406 0.9115078
## max 0.9116957 0.9122739
# get the cross-entropy of all runs for K = 5
ce = cross.entropy(project, K = 5)
ce #run 3 is best for k=5
## K = 5
## run 1 0.9289309
## run 2 0.9292691
## run 3 0.9274846
## run 4 0.9290042
## run 5 0.9292955
choose best run here
leak5 <- read_delim(
here("euro_global/output/snps_sets/r2_0.01.snmf/K5/run3/r2_0.01_r3.5.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak5)
## # A tibble: 6 × 5
## X1 X2 X3 X4 X5
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.201 0.128 0.212 0.420 0.0405
## 2 0.200 0.156 0.168 0.405 0.0716
## 3 0.162 0.148 0.172 0.478 0.0391
## 4 0.234 0.100 0.207 0.444 0.0150
## 5 0.197 0.124 0.214 0.462 0.00281
## 6 0.202 0.146 0.198 0.438 0.0151
The fam file
fam_file <- here(
"euro_global/output/snps_sets/r2_0.01.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 0.200653 0.1276200 0.211680 0.419503 0.04054400
## 2 1002 OKI 0.200195 0.1560630 0.167533 0.404649 0.07155990
## 3 1003 OKI 0.162190 0.1481010 0.172349 0.478278 0.03908180
## 4 1004 OKI 0.233547 0.0999538 0.207488 0.444058 0.01495440
## 5 1005 OKI 0.196883 0.1244250 0.214223 0.461663 0.00280657
## 6 1006 OKI 0.202352 0.1460070 0.198391 0.438121 0.01512950
Rename the columns
# Rename the columns starting from the third one
leak5 <- leak5 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak5)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 0.200653 0.1276200 0.211680 0.419503 0.04054400
## 2 1002 OKI 0.200195 0.1560630 0.167533 0.404649 0.07155990
## 3 1003 OKI 0.162190 0.1481010 0.172349 0.478278 0.03908180
## 4 1004 OKI 0.233547 0.0999538 0.207488 0.444058 0.01495440
## 5 1005 OKI 0.196883 0.1244250 0.214223 0.461663 0.00280657
## 6 1006 OKI 0.202352 0.1460070 0.198391 0.438121 0.01512950
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
Default Plot
color_palette_5 <-
c(
"#FF8C1A",
"#77DD37",
"#1E90FF",
"purple3",
"#FFFF19"
)
# select the best run for K = 15 clusters
best = which.min(cross.entropy(project, K = 5))
# best is run 4
barchart(project, K = 5, run = best,
border = NA, space = 0,
col = color_palette_5,
xlab = "Individuals",
ylab = "Ancestry proportions",
main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
labels = bp$order, las=1,
cex.axis = .4)
using ggplot
color_palette_5 <-
c(
"#FF8C1A",
"#77DD37",
"#1E90FF",
"purple3",
"#FFFF19"
)
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 5, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:5)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_5[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=5.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_5)
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak5 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
color_palette_5 <-
c(
"#FF8C1A",
"#FFFF19",
"#1E90FF",
"purple3",
"#77DD37"
)
# Generate all potential variable names
all_variables <- paste0("v", 1:5)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_5[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 10
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=5.\n LEA inference for k5 with 19,318 SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_5) +
expand_limits(y = c(0, 1.5))
for r2<0.01 (Set 1)
Summary of project check with run is best
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 26 K = 27 K = 28 K = 29 K = 30
## with cross-entropy 5 5 5 5 5
## without cross-entropy 0 0 0 0 0
## total 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9741476 0.9478415 0.9371094 0.9306857 0.9274846 0.9252483 0.9230460
## mean 0.9744821 0.9482057 0.9373050 0.9308620 0.9287969 0.9258323 0.9234635
## max 0.9747943 0.9484720 0.9374630 0.9310598 0.9292955 0.9268456 0.9238938
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.9210405 0.9194040 0.9178541 0.9161954 0.9157153 0.9143805 0.9134463
## mean 0.9216164 0.9197455 0.9185914 0.9166938 0.9160336 0.9148880 0.9137159
## max 0.9220569 0.9200141 0.9191938 0.9177154 0.9167792 0.9154233 0.9140798
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.9130960 0.9125347 0.9122265 0.9114823 0.9112280 0.9108308 0.9105371
## mean 0.9133830 0.9129528 0.9124338 0.9118073 0.9117729 0.9114267 0.9109761
## max 0.9138815 0.9136672 0.9128994 0.9121939 0.9123163 0.9122307 0.9115720
## K = 22 K = 23 K = 24 K = 25 K = 26 K = 27 K = 28
## min 0.9103878 0.9102019 0.9103990 0.9106880 0.9104110 0.9109080 0.9106481
## mean 0.9108562 0.9110041 0.9108721 0.9109012 0.9111984 0.9110827 0.9113020
## max 0.9114802 0.9114443 0.9113352 0.9111545 0.9122130 0.9113573 0.9119994
## K = 29 K = 30
## min 0.9109183 0.9104377
## mean 0.9113406 0.9115078
## max 0.9116957 0.9122739
# get the cross-entropy of all runs for K = 16
ce = cross.entropy(project, K = 16)
ce #run 3 is best for k=
## K = 16
## run 1 0.9128210
## run 2 0.9136672
## run 3 0.9125347
## run 4 0.9126670
## run 5 0.9130741
color_palette_16 <-
c(
"#FF8C1A",
"#B20CC9",
"#77DD77",
"#1E90FF",
"#B22222",
"green",
"yellow2",
"#F49AC2",
"blue",
"#FFFF99",
"#75FAFF",
"magenta",
"green4",
"navy",
"purple",
"orangered"
)
using ggplot
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 16, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:20)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_16[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=16.") +
#scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_16)
Extract ancestry coefficients for k=16 change to correct matrix
leak16 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01.snmf/K16/run3/r2_0.01_r3.16.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak16)
## # A tibble: 6 × 16
## X1 X2 X3 X4 X5 X6 X7 X8 X9
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.00675 0.0401 0.000100 1.10e-1 1.11e-1 1.00e-4 2.20e-2 5.08e-2 6.08e-2
## 2 0.105 0.0349 0.0467 5.91e-2 1.87e-1 1.00e-4 4.83e-2 1.00e-4 4.28e-2
## 3 0.0000999 0.0000999 0.0000999 9.99e-5 9.99e-5 3.85e-3 9.99e-5 9.99e-5 9.99e-5
## 4 0.00139 0.0000999 0.0145 9.36e-3 3.30e-2 9.99e-5 9.99e-5 4.90e-2 9.99e-5
## 5 0.0228 0.0000999 0.0237 1.43e-2 1.61e-2 9.99e-5 9.99e-5 3.87e-2 9.99e-5
## 6 0.0000999 0.0000999 0.0000999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## # ℹ 7 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## # X15 <dbl>, X16 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.01.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 6.75139e-03 4.01424e-02 9.99550e-05 1.10261e-01 1.11171e-01
## 2 1002 OKI 1.05351e-01 3.48736e-02 4.66519e-02 5.91137e-02 1.87114e-01
## 3 1003 OKI 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05
## 4 1004 OKI 1.39301e-03 9.99190e-05 1.44613e-02 9.36419e-03 3.30173e-02
## 5 1005 OKI 2.27584e-02 9.99190e-05 2.37252e-02 1.42976e-02 1.60814e-02
## 6 1006 OKI 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
## X6 X7 X8 X9 X10 X11
## 1 9.99550e-05 2.20087e-02 5.07991e-02 6.08446e-02 9.99550e-05 9.99550e-05
## 2 9.99730e-05 4.82618e-02 9.99730e-05 4.28018e-02 9.99730e-05 6.02545e-03
## 3 3.84553e-03 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05
## 4 9.99190e-05 9.99190e-05 4.89646e-02 9.99190e-05 9.99190e-05 9.99190e-05
## 5 9.99190e-05 9.99190e-05 3.86597e-02 9.99190e-05 9.99190e-05 9.99190e-05
## 6 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
## X12 X13 X14 X15 X16
## 1 3.53490e-02 0.487606 4.00217e-02 3.45453e-02 9.99550e-05
## 2 1.00045e-02 0.314417 5.70276e-02 3.85740e-02 4.94835e-02
## 3 9.98915e-05 0.993476 1.37964e-03 9.98915e-05 9.98915e-05
## 4 2.66116e-02 0.865289 9.99190e-05 9.99190e-05 9.99190e-05
## 5 9.99190e-05 0.881810 1.76843e-03 9.99190e-05 9.99190e-05
## 6 9.98648e-05 0.998502 9.98648e-05 9.98648e-05 9.98648e-05
Rename the columns
# Rename the columns starting from the third one
leak16 <- leak16 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak16)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 6.75139e-03 4.01424e-02 9.99550e-05 1.10261e-01 1.11171e-01
## 2 1002 OKI 1.05351e-01 3.48736e-02 4.66519e-02 5.91137e-02 1.87114e-01
## 3 1003 OKI 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05
## 4 1004 OKI 1.39301e-03 9.99190e-05 1.44613e-02 9.36419e-03 3.30173e-02
## 5 1005 OKI 2.27584e-02 9.99190e-05 2.37252e-02 1.42976e-02 1.60814e-02
## 6 1006 OKI 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
## v6 v7 v8 v9 v10 v11
## 1 9.99550e-05 2.20087e-02 5.07991e-02 6.08446e-02 9.99550e-05 9.99550e-05
## 2 9.99730e-05 4.82618e-02 9.99730e-05 4.28018e-02 9.99730e-05 6.02545e-03
## 3 3.84553e-03 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05 9.98915e-05
## 4 9.99190e-05 9.99190e-05 4.89646e-02 9.99190e-05 9.99190e-05 9.99190e-05
## 5 9.99190e-05 9.99190e-05 3.86597e-02 9.99190e-05 9.99190e-05 9.99190e-05
## 6 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05 9.98648e-05
## v12 v13 v14 v15 v16
## 1 3.53490e-02 0.487606 4.00217e-02 3.45453e-02 9.99550e-05
## 2 1.00045e-02 0.314417 5.70276e-02 3.85740e-02 4.94835e-02
## 3 9.98915e-05 0.993476 1.37964e-03 9.98915e-05 9.98915e-05
## 4 2.66116e-02 0.865289 9.99190e-05 9.99190e-05 9.99190e-05
## 5 9.99190e-05 0.881810 1.76843e-03 9.99190e-05 9.99190e-05
## 6 9.98648e-05 0.998502 9.98648e-05 9.98648e-05 9.98648e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak16 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
color_palette_16 <-
c(
"#FF8C1A",
"#B20CC9",
"#F49AC2",
"#1E90FF",
"#B22222",
"purple",
"yellow2",
"orangered",
"magenta",
"#FFFF99",
"#75FAFF",
"blue",
"green4",
"navy",
"green",
"#77DD77"
)
# Generate all potential variable names
all_variables <- paste0("v", 1:16)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_16[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=16.\n LEA inference for k16 with 18,318 SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_16) +
expand_limits(y = c(0, 1.5))
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22642
## column count: 697
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22642
## Character matrix gt cols: 697
## skip: 0
## nrows: 22642
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22642
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS GEL GES GRA GRC GRV
## 10 12 12 10 12 12 12 13 10 12 14 12 12 16 12 2 12 11 10 12
## HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC KAG KAN KAT KER KLP KRA KUN LAM
## 12 4 7 12 4 11 4 5 9 12 2 6 12 11 6 12 4 12 4 9
## MAL MAT OKI PAL POL POP QNC RAR REC ROM ROS SER SEV SIC SLO SOC SON SPB SPC SPM
## 12 12 12 11 2 12 11 12 11 4 11 4 12 9 12 12 3 8 6 5
## SPS SSK STS SUF SUU TAI TIK TIR TRE TUA TUH UTS YUN
## 8 12 12 6 6 7 12 4 12 9 12 12 9
Convert format
##
## - number of detected individuals: 688
## - number of detected loci: 22642
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.geno"
##
## - number of detected individuals: 688
## - number of detected loci: 22642
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.removed file, for more informations.
##
##
## - number of detected individuals: 688
## - number of detected loci: 22642
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.lfmm"
PCA for SNP Set 3 (MAF 1% r2<0.01 snp set)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
##
## -n (number of individuals) 688
## -L (number of loci) 22642
## -K (number of principal components) 688
## -x (genotype file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.lfmm
## -a (eigenvalue file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.pca/r2_0.01_b.eigenvalues
## -e (eigenvector file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.pca/r2_0.01_b.eigenvectors
## -d (standard deviation file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.pca/r2_0.01_b.sdev
## -p (projection file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.pca/r2_0.01_b.projections
## -c data centered
## * pca class *
##
## project directory: /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/
## pca result directory: r2_0.01_b.pca/
## input file: r2_0.01_b.lfmm
## eigenvalue file: r2_0.01_b.eigenvalues
## eigenvector file: r2_0.01_b.eigenvectors
## standard deviation file: r2_0.01_b.sdev
## projection file: r2_0.01_b.projections
## pcaProject file: r2_0.01_b.pcaProject
## number of individuals: 688
## number of loci: 22642
## number of principal components: 688
## centered: TRUE
## scaled: FALSE
Test
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
##
## -n (number of eigenvalues) 688
## -i (input file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.pca/r2_0.01_b.eigenvalues
## -o (output file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.pca/r2_0.01_b.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8, xlim = c(0, 30))
Get values
# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
Check R symbols for plot
#to see all shapes -> plot shapes
N = 100; M = 1000
good.shapes = c(1:25,35:38,43,60,62:64)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
scale_shape_manual(values=good.shapes[1:N]) +
geom_point()
## Warning: Removed 640 rows containing missing values or values outside the scale range
## (`geom_point()`).
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 73 Levels: ALD ALU ALV ARM BAR BEN BER BRE BUL CAM CES CHA CRO DES FRS ... YUN
## [1] 73
Check the regions
## [1] "North America" "South America" "Western Europe" "Southern Europe"
## [5] "Eastern Europe" "East Asia" "South Asia" "Southeast Asia"
## [9] "West Africa" "East Africa" "Indian Ocean" "Central Africa"
## [13] "North Africa" "Caribbean"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 ALD 11.8985 3.85889 20.9680 1.880910 -33.6894 -6.76218 -4.533020
## 2 ALD 12.2322 4.18342 22.9889 1.947130 -37.7399 -7.03191 -3.158920
## 3 ALD 12.2253 2.88544 28.1043 1.912940 -28.8482 -8.83110 -6.905940
## 4 ALD 13.1452 3.48505 24.9198 1.685130 -34.7655 -4.18402 -1.369600
## 5 ALD 10.8278 5.66856 19.7776 0.404653 -29.8086 -4.80145 0.773310
## 6 ALD 10.4331 3.11243 24.7304 0.543678 -36.5059 -8.89831 -0.281582
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## 1 8.98646 1.541180 -1.403160 -1.538210 -2.495950 2.47575 0.2189350
## 2 10.44180 -1.248800 -0.310105 -6.370290 2.657040 6.69481 0.0872054
## 3 12.47650 -2.779840 -2.797700 -6.614790 -2.228320 6.91340 -2.2488800
## 4 8.17536 -4.582430 -1.645110 0.443441 3.101420 1.47234 3.4897500
## 5 6.85580 -2.406870 -2.271780 -1.680240 -1.212820 -3.66094 -1.3134100
## 6 9.35066 0.164892 -1.311790 -6.877400 -0.186574 4.76815 -2.8549600
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## 1 -0.7433950 -4.102250 -0.994354 1.553110 -1.304180 4.21418 -2.694100
## 2 -2.8777600 -1.860030 -4.347590 3.031520 -2.212580 8.05775 0.212026
## 3 -0.0648291 -2.618360 -4.343090 3.619210 -5.826280 3.74276 0.939447
## 4 -2.5178000 0.439961 0.672891 -0.396581 -0.544247 -3.22350 -0.838874
## 5 1.4977000 -3.805660 -2.805890 5.297080 3.246310 3.56786 -0.352491
## 6 -2.6450000 -2.608550 -1.146910 2.348040 -1.728110 3.43092 -1.194770
## PC22 PC23 PC24 PC25 PC26 PC27 PC28
## 1 -7.171720 2.801470 -4.56413000 -1.700610 4.766860 -6.759340 -2.940520
## 2 -6.795390 0.934749 -0.98090100 -0.346114 4.846600 -6.659470 -3.202760
## 3 -2.548400 0.555746 -0.00772089 -0.574792 5.259230 -9.362370 -6.661650
## 4 0.861181 -1.694410 0.85624600 3.704020 5.871270 0.743232 0.746248
## 5 -6.540370 7.734690 1.40291000 -4.953990 0.562202 -12.439700 -3.259680
## 6 -5.894430 1.711360 0.80065100 -1.622210 6.891260 -5.290660 -0.827538
## PC29 PC30 PC31 PC32 PC33 PC34 PC35 PC36
## 1 -7.38806 -1.93947 -7.68627 2.212990 -4.07782 5.66995 -2.279700 -1.99917
## 2 -3.39402 -1.81596 -9.67717 -0.447789 -4.24724 4.00922 -0.250749 -3.78005
## 3 -4.38048 -2.27692 -10.22660 -2.834660 -8.62314 2.31297 -2.826010 -2.10932
## 4 -3.02354 -1.68289 4.77615 -1.466760 2.27006 4.55003 1.537870 3.14186
## 5 7.22971 11.39920 4.69439 -2.691630 -11.42350 1.43781 -0.472601 -1.71645
## 6 -9.46076 3.04788 -5.13932 0.808606 -3.42825 3.32513 -1.893570 -2.58513
## PC37 PC38 PC39 PC40 PC41 PC42 PC43 PC44
## 1 -10.99070 2.35826 6.281100 -4.23144 1.23823 0.0577522 6.207560 15.32210
## 2 -7.36251 -1.94576 6.414360 -5.36919 6.66074 -1.9747400 4.776720 8.01157
## 3 -11.83400 -2.60205 5.419670 -6.63548 9.67688 -0.1473240 2.141570 2.20297
## 4 3.67647 -7.31058 7.933170 -1.97308 6.95744 -2.2662100 -0.564778 4.27435
## 5 -9.44355 -1.53910 3.798990 -1.14780 -5.47507 2.2477700 8.635720 8.40626
## 6 -6.63073 12.76110 0.744103 9.13753 -12.24120 7.4629600 -5.337030 10.15090
## PC45 PC46 PC47 PC48 PC49 PC50 PC51
## 1 -0.934501 3.8569000 -5.34155 -9.12502 -11.25280 1.78636 -11.74710
## 2 1.980790 5.0410600 -5.55942 -5.54413 -7.89991 -1.50608 -3.46455
## 3 2.263190 1.1788500 1.20225 1.07838 -3.96479 -5.18767 -1.93441
## 4 -5.385930 6.7931100 1.95438 3.49448 -2.68172 -7.88299 -2.05299
## 5 0.116632 -3.7034200 -7.79331 -1.33218 -1.45567 2.42951 -6.36327
## 6 2.781570 -0.0425489 -11.83510 2.74986 1.03119 -0.89795 -1.41032
## PC52 PC53 PC54 PC55 PC56 PC57 PC58
## 1 2.067140 0.358824 -0.351160 6.45217 -16.561100 -3.41118 -1.277840
## 2 0.545668 0.909372 5.038720 6.27877 -13.832000 -4.87548 2.290730
## 3 2.938710 -7.580600 3.105390 1.23675 -13.720600 -0.57217 0.763471
## 4 1.268040 3.782910 -3.785790 2.78646 -0.279656 1.19117 0.336880
## 5 -0.728252 -1.003880 0.453712 3.13619 -1.738380 -3.28987 -2.215380
## 6 -3.565340 8.202730 6.491160 -3.51222 4.388420 -1.42062 -6.057310
## PC59 PC60 PC61 PC62 PC63 PC64 PC65
## 1 -0.463471 0.725333 -11.947300 -2.912760 -3.3649600 -6.17048 5.40581
## 2 -3.294070 -1.452100 -5.657040 -3.004870 1.4187100 -1.72455 8.78201
## 3 -4.384280 1.215710 -8.646940 -1.688210 0.0826056 -5.34159 7.48674
## 4 11.793800 -7.436210 -0.645052 3.023270 -4.8547100 -2.61409 2.03267
## 5 5.101360 8.419080 -3.246330 0.501329 0.8120250 -7.35079 1.33704
## 6 -9.717840 8.018080 8.968660 1.053380 23.2322000 14.93660 -11.65460
## PC66 PC67 PC68 PC69 PC70 PC71 PC72 PC73
## 1 -0.739923 0.890372 4.09764 -2.77532 -3.97646 -4.582900 -7.003680 -3.86519
## 2 2.870150 -0.129829 10.43760 -4.27367 -2.24687 -10.470700 -4.354890 -4.12743
## 3 1.384970 4.868260 6.14693 -8.26014 2.04405 -10.210400 -5.177980 -4.64743
## 4 -1.105010 2.834160 1.06600 7.31397 -2.80450 -9.040710 -0.128026 -2.03907
## 5 9.029660 0.998050 5.45647 -4.52312 -6.74390 0.459956 0.267002 -3.56658
## 6 -4.128160 -5.135860 -1.84614 11.18930 1.96809 13.454400 -16.504800 13.73320
## PC74 PC75 PC76 PC77 PC78 PC79 PC80
## 1 3.28683 1.205050 -1.113160 -1.938320 4.043630 0.121096 -1.681540
## 2 2.50524 3.168370 -3.061710 -1.441830 -1.249120 4.856710 -4.413080
## 3 6.95597 -0.957647 -3.204570 -4.632810 5.581000 6.026590 2.059280
## 4 -5.26367 7.137910 0.694828 0.473936 -4.461780 6.310620 -0.971351
## 5 -3.34290 1.802250 -2.016620 -4.584130 0.584253 0.868004 7.008150
## 6 3.81356 -14.334100 9.148230 5.380290 6.502990 -19.781300 11.836000
## PC81 PC82 PC83 PC84 PC85 PC86 PC87 PC88
## 1 6.179290 3.742580 -4.35778 6.06019 -5.37454 1.22585 4.69195 4.470680
## 2 1.751120 -1.717100 1.47410 9.95463 -3.84189 -4.28609 4.28630 -0.937928
## 3 0.389775 2.518990 1.93787 12.74170 1.62216 -4.65288 -4.13230 -2.550920
## 4 9.967140 0.243231 3.59179 4.14183 -2.33266 -4.77638 2.50605 -0.123554
## 5 4.165940 0.227164 -2.47538 3.59877 -3.39338 6.02402 7.51161 2.474580
## 6 -4.336970 -12.575000 -4.96907 1.51878 4.12724 -4.20452 -6.99142 2.720720
## PC89 PC90 PC91 PC92 PC93 PC94 PC95
## 1 -1.34363 0.118700 0.9844460 -3.8977800 -1.440860 -3.171870 -1.36889
## 2 8.64144 1.893400 -0.0188444 -3.2230900 -0.834429 -2.978750 -2.86645
## 3 7.56701 -0.104621 -2.5311000 -1.4874800 2.528660 -9.868470 -8.62068
## 4 6.04669 4.415650 -2.5067200 1.0201900 5.591420 -8.926990 1.12416
## 5 4.29238 3.584870 -0.4815900 0.0974798 2.470260 -4.932240 -3.70299
## 6 10.87000 -8.036280 5.3530400 -10.7839000 0.459864 0.493365 -4.04304
## PC96 PC97 PC98 PC99 PC100 PC101 PC102
## 1 6.93165 -1.3037100 -1.093510 -1.70220 -0.483900 -1.492360 2.40173
## 2 -3.67457 -4.8283400 -3.520380 -3.29133 2.981410 -0.496448 -2.66578
## 3 -8.57851 -9.4991500 4.151930 -2.38174 2.335690 0.901063 -5.87972
## 4 2.01164 -0.0282694 0.854353 -0.24969 -0.954521 4.310620 5.00815
## 5 -1.08490 3.6544700 -5.800560 -10.15810 2.489950 4.266330 -5.09592
## 6 1.31753 -1.5546500 -2.443900 2.37766 0.527705 -0.870260 -1.04518
## PC103 PC104 PC105 PC106 PC107 PC108 PC109 PC110
## 1 -0.389261 1.286790 -3.80797 -2.26695 6.514070 -1.009370 -2.17540 3.457810
## 2 0.418618 -3.667640 3.40287 -1.25286 -5.356330 1.504250 5.77917 0.186282
## 3 3.277350 6.517350 2.36292 3.33355 1.736070 -2.700070 2.25668 4.306890
## 4 -2.873090 -0.185915 5.52946 -3.48252 -0.367124 -0.559500 6.59497 -8.278920
## 5 1.027970 0.879890 5.40572 -10.98460 -2.640520 0.281598 -5.78467 -4.027110
## 6 -5.646480 -6.122190 -8.47112 -2.55660 3.617870 2.021160 4.64364 12.412100
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## 1 -1.446900 1.70651 -4.894200 -2.38792 5.122720 -4.276130 1.267230
## 2 0.717849 -0.89245 -0.593490 -3.11769 3.662380 -1.304200 -0.724111
## 3 3.134630 -1.72248 0.676184 -6.33110 -0.101371 -4.427790 -2.827220
## 4 11.258200 2.17220 7.036340 5.80008 3.340340 -0.770597 6.637560
## 5 -4.182230 6.40994 -2.749260 -4.25393 3.341230 -0.176298 3.536200
## 6 3.168250 6.47919 -1.530630 5.83320 -1.042500 5.055740 -8.840110
## PC118 PC119 PC120 PC121 PC122 PC123 PC124 PC125
## 1 -1.6685300 -5.32720 1.95853 -4.73370 1.11994 -3.510790 -4.4559100 9.836470
## 2 0.0492695 3.31784 -2.97760 -3.64527 1.25323 0.341979 -2.5663600 3.728240
## 3 0.1905510 5.23757 -1.28177 -5.22730 -7.47138 -5.865210 0.0191948 6.365040
## 4 -1.6458000 4.00927 13.41000 -3.03806 2.83135 1.046140 -3.7309300 -8.959210
## 5 1.6369400 -6.29942 -1.28463 -3.73613 -9.37480 12.623800 -4.9531700 -2.636770
## 6 -7.6600000 4.91161 1.88427 5.14815 3.24284 4.011890 -2.8030300 -0.942569
## PC126 PC127 PC128 PC129 PC130 PC131 PC132
## 1 3.781900 1.183500 3.079410 -1.237430 -4.000670 5.461280 7.111460
## 2 5.924740 0.209126 5.628260 -7.010920 0.750376 -0.320736 -1.211470
## 3 3.593720 -3.151720 7.903910 -3.167900 4.254680 -4.720320 -0.379222
## 4 1.947630 -1.728130 -8.076620 -1.096440 -8.423640 4.872620 1.470880
## 5 -0.825903 -2.270400 -0.464037 0.900414 -0.346241 4.837860 0.724787
## 6 4.431420 -1.218760 1.719750 -5.397170 -2.682860 0.760937 0.784101
## PC133 PC134 PC135 PC136 PC137 PC138 PC139 PC140
## 1 -2.378120 1.687690 2.673780 4.04975 -0.145723 -3.46940 -8.072330 -0.611995
## 2 -0.583977 0.995366 0.278118 3.57282 -0.822583 -0.99227 -2.947450 0.852558
## 3 0.763785 3.016570 3.195090 4.71458 -7.164000 -3.95266 -2.295240 -0.183394
## 4 2.799490 0.135762 9.490730 3.58000 5.946930 5.79806 -3.517150 -0.600069
## 5 3.567450 -13.004800 4.827770 -1.05043 -0.542921 6.19495 0.825143 0.472428
## 6 -7.777590 -7.670570 1.062020 -1.22783 2.489490 -3.46471 -5.596910 1.897120
## PC141 PC142 PC143 PC144 PC145 PC146 PC147
## 1 -0.997202 -5.236530 -0.870772 -9.329990 5.277620 0.264430 -2.14896
## 2 -8.905070 -2.311090 1.837050 3.201020 5.881510 -3.452580 5.51274
## 3 -8.576190 -5.757500 12.182000 -0.539536 7.198210 -4.657740 8.32013
## 4 -3.239030 -1.043220 3.544470 -0.830649 0.498468 -0.213647 3.50364
## 5 -1.520360 0.582979 -0.701247 1.623020 6.551760 5.747600 -11.01850
## 6 2.936710 1.779840 -3.898050 -7.489730 -0.700444 -3.800990 5.85017
## PC148 PC149 PC150 PC151 PC152 PC153 PC154
## 1 0.313684 -7.849820 -3.866890 4.655750 -2.564390 1.627360 -3.9310000
## 2 1.709620 0.613923 2.784340 3.309270 -4.325070 0.419344 -6.3652400
## 3 6.773820 3.983930 3.300730 0.210383 -3.214640 4.895100 -1.8233800
## 4 4.757610 5.654400 4.296540 -0.827081 -3.770600 7.321870 0.0186542
## 5 -4.566200 -2.901290 0.600091 2.021960 6.230730 -3.753150 3.7881900
## 6 4.899100 -2.498930 -9.219470 1.232410 0.962651 -8.125360 0.5357390
## PC155 PC156 PC157 PC158 PC159 PC160 PC161 PC162
## 1 3.13810 -1.46355 -3.013630 -6.622600 4.31151 2.046280 2.416730 -4.229680
## 2 -2.84686 -4.29443 -0.734930 -2.664550 9.99006 -1.788660 -0.388667 2.097670
## 3 -3.06354 1.15015 1.453980 9.361540 4.96861 -7.196140 5.069530 0.666307
## 4 4.18548 0.73210 -1.040920 -0.409835 -8.12447 -1.700210 -0.505560 -2.381160
## 5 -4.96705 -5.56457 3.627220 -1.292500 4.38248 4.740250 12.842600 9.023710
## 6 -2.77699 -2.17923 0.531267 -5.878390 -5.08949 0.238323 -1.981010 -0.318513
## PC163 PC164 PC165 PC166 PC167 PC168 PC169 PC170
## 1 -0.977354 -1.57235 3.576590 -3.04232 -3.39439 -0.963023 6.37370 2.38532
## 2 -3.551370 -1.75907 5.701560 -5.98100 -6.55855 6.582750 1.76178 -1.73615
## 3 -3.243210 -2.37397 7.271910 -3.47200 4.19099 -3.045100 1.47464 -6.04968
## 4 4.256880 6.37976 -8.568080 6.55969 -6.54069 -6.252820 5.29830 1.80133
## 5 0.400542 -3.65314 0.254957 2.00497 -1.70544 -0.565616 -1.28996 6.96619
## 6 -4.076200 -1.91306 -5.453440 4.05089 3.90265 -1.436990 -2.79383 5.89461
## PC171 PC172 PC173 PC174 PC175 PC176 PC177
## 1 -0.141260 -1.753580 -3.201160 -4.371390 -2.918110 -3.97661 -0.0998708
## 2 -8.414750 -0.693887 3.124380 1.726010 0.771151 -5.48584 -3.2261900
## 3 -4.390430 -2.527000 1.202060 5.838940 -2.815140 -1.78273 4.4973900
## 4 -5.234380 -3.848410 -3.252540 -2.116090 -1.929810 -4.26861 -2.6883700
## 5 1.294890 -8.476360 -0.429583 -4.852190 -3.054290 6.06653 -2.7902400
## 6 0.357824 -0.787292 2.563700 -0.379031 2.943810 6.72160 -3.6346900
## PC178 PC179 PC180 PC181 PC182 PC183 PC184 PC185
## 1 1.11397 2.7705000 -6.41632 -0.718047 5.40606 2.768710 -0.157675 -5.296250
## 2 -2.04173 -0.1632810 -3.30494 3.162210 -1.66111 2.798700 2.504030 -2.528250
## 3 -1.59255 2.1579500 -2.50266 -8.285570 -6.36530 -0.365336 7.896620 6.849820
## 4 8.76007 -0.0967904 5.35715 5.651510 3.03202 8.365110 -1.639480 -6.466690
## 5 -2.19353 4.2507800 4.67608 -4.351820 -9.37398 -9.092460 -2.733110 -0.165382
## 6 3.75966 1.2323200 -3.29620 -2.309620 2.50132 0.352197 -1.674230 -3.168450
## PC186 PC187 PC188 PC189 PC190 PC191 PC192 PC193
## 1 -5.29530 -1.32898 4.422620 -4.347050 -1.17324 5.215930 7.408460 -1.102280
## 2 -8.42934 -1.90825 -4.061890 2.266310 -3.85701 0.404375 0.915842 -5.226510
## 3 -1.17040 2.62380 -2.048570 -0.624122 2.77890 -2.452040 -3.036270 -4.019250
## 4 4.52913 -11.06170 0.650079 -5.651790 6.30209 8.814130 6.935000 5.774390
## 5 -1.27104 -8.09140 0.322796 -4.247770 -7.71188 -5.393390 2.957960 -0.873157
## 6 -4.10243 2.79037 3.951170 -3.347500 -2.11222 -2.412620 -0.794744 -0.164616
## PC194 PC195 PC196 PC197 PC198 PC199 PC200 PC201
## 1 5.6279800 -8.54265 -4.436000 6.32859 -2.01467 -2.518600 -4.600950 -0.305185
## 2 -0.0337481 2.07801 -3.018280 1.35562 5.01774 0.498107 3.113090 4.860630
## 3 -3.6605100 -1.75431 2.724360 -2.22400 6.42381 0.175597 0.512386 -0.780544
## 4 3.5217200 -3.43567 0.648697 2.14305 -2.21421 3.045700 4.651460 -7.773500
## 5 -4.1743400 2.78024 2.247300 -2.46370 6.27945 -0.584857 -7.693870 -2.855090
## 6 0.0126222 -2.12899 3.167800 -1.31376 -1.27225 5.049300 3.290740 -1.016910
## PC202 PC203 PC204 PC205 PC206 PC207 PC208
## 1 0.619995 0.511851 -3.566140 6.564960 1.05245 2.95804 1.171880
## 2 -2.242940 2.427600 -0.462441 5.246700 4.93263 2.08983 -4.565700
## 3 -3.012950 -0.943221 4.925950 2.369390 -3.13122 2.86687 -0.600310
## 4 -5.168800 -0.166476 -3.775910 0.124918 5.39915 -2.07519 -0.379023
## 5 -3.661670 5.837700 0.821897 5.528850 -1.26431 3.37289 -1.628840
## 6 -4.271770 1.435020 -0.816374 -0.938444 5.35043 2.90376 1.994700
## PC209 PC210 PC211 PC212 PC213 PC214 PC215
## 1 3.0637500 0.326561 -7.263850 -3.866530 7.072110 -2.182620 4.69728
## 2 1.3800900 1.887270 0.583088 -0.649341 1.104540 2.639440 2.46155
## 3 -2.5983200 0.691781 0.876536 4.073340 1.429400 -1.384510 1.80725
## 4 -6.3484000 0.952962 6.126060 -1.116160 -0.687042 -6.839030 -1.59933
## 5 -1.8068900 7.318490 9.855260 -9.872910 -2.322490 -6.641690 -5.07957
## 6 0.0800038 -1.941610 1.521390 -2.767840 -0.987760 -0.378819 4.29534
## PC216 PC217 PC218 PC219 PC220 PC221 PC222 PC223
## 1 -2.022560 4.51343 0.20998900 1.653830 8.45475 -3.32061 -0.544521 2.89980
## 2 -0.339576 5.05123 1.97419000 0.887665 -2.10645 2.86436 1.676340 8.68816
## 3 3.648840 2.84461 -0.00618843 0.225685 -2.87660 -3.37039 0.926413 -3.81371
## 4 -4.000240 6.52331 0.73299700 -1.356560 4.28038 5.32275 -0.711682 -1.93726
## 5 -7.584570 9.90239 -8.13707000 6.849920 -1.41234 2.28737 5.204920 -8.19837
## 6 6.970830 -1.15739 -2.42506000 -2.768490 -1.98175 1.06500 -2.492900 3.80023
## PC224 PC225 PC226 PC227 PC228 PC229 PC230
## 1 1.832910 2.859500 1.6443100 -6.19263000 -7.130470 4.446880 -0.186256
## 2 3.509170 0.689052 -2.5059900 -0.71940900 -1.395450 -6.097530 3.278390
## 3 1.137120 -0.771971 0.0507143 -0.00445608 -1.598960 -1.434190 4.461100
## 4 -3.475090 1.261260 -5.2605700 0.21031600 5.988410 -3.607710 -3.866150
## 5 0.680956 1.012670 4.2677300 7.47408000 7.435590 -4.036900 4.173860
## 6 1.733650 -1.209130 -1.8056300 5.76595000 -0.292876 -0.774136 3.131270
## PC231 PC232 PC233 PC234 PC235 PC236 PC237
## 1 -8.190690 -1.969220 -5.58511 2.3334900 0.914007 -3.948490 0.39069200
## 2 -2.158590 3.244850 -4.33197 4.2369200 -2.032430 0.388047 -3.91907000
## 3 2.374080 2.620280 2.32979 -2.9930200 0.118947 3.863260 5.97636000
## 4 -0.939006 4.875300 -4.73733 -8.7861700 -0.969056 -6.273040 -7.33774000
## 5 2.218690 -7.296970 -1.19657 0.3920730 4.239490 4.259520 0.00123725
## 6 4.212630 0.198037 5.16208 0.0533057 -0.412084 -3.455080 -5.43434000
## PC238 PC239 PC240 PC241 PC242 PC243 PC244
## 1 -0.786844 0.7157320 2.112520 2.758120 3.183580 5.79723 0.121639
## 2 -0.676934 -3.7479800 -0.892240 0.625494 3.260310 7.39515 5.013700
## 3 5.734090 0.0970912 1.194330 -0.948146 0.150667 -2.08495 -3.908600
## 4 -1.221010 -1.2565900 -1.376880 -3.889080 -2.109230 5.72671 4.772850
## 5 -2.826820 2.7278800 3.535780 -0.662025 -4.142770 2.35954 3.869920
## 6 -3.027610 3.1014900 -0.977669 1.698270 3.128530 1.90610 -2.604320
## PC245 PC246 PC247 PC248 PC249 PC250 PC251
## 1 1.5072500 5.00468 -3.50096 -0.104169 -5.228660 2.473540 -1.448730
## 2 -0.0311376 5.11494 8.98308 1.450940 3.224880 1.807720 -1.712260
## 3 1.2898800 -1.01829 1.40151 1.782440 0.469905 -6.430050 -6.003360
## 4 -3.4588500 -5.38979 -4.22454 -0.188380 -2.659040 6.129860 -2.268780
## 5 -2.1593500 2.57015 -8.55004 4.475860 2.570250 1.161970 5.016230
## 6 2.9842100 1.04132 -1.38910 -1.011830 1.720750 0.744594 -0.902192
## PC252 PC253 PC254 PC255 PC256 PC257 PC258
## 1 -1.298770 -1.931920 -1.1629200 0.339418 7.094020 -1.279290 3.684530
## 2 -4.382710 0.351900 0.0794999 -2.590000 -6.585020 -0.154801 -0.762867
## 3 1.250490 -1.244750 -1.9325400 0.193598 -2.273380 1.204290 0.384443
## 4 4.427770 -0.509626 -5.2930400 5.100560 -0.822354 2.722500 2.415860
## 5 -3.527460 -1.120240 -0.4678880 0.698454 4.737260 -4.762300 0.586104
## 6 0.219882 2.287450 -1.0379600 2.254390 -2.952640 -2.717000 1.096950
## PC259 PC260 PC261 PC262 PC263 PC264 PC265
## 1 3.349670 1.78552 0.36825300 1.460990 3.061050 -1.322840 -1.104860
## 2 -0.744912 2.41334 -4.10532000 -2.852850 0.048269 0.925379 -1.157720
## 3 1.726140 -3.28961 4.01317000 -1.463210 3.902070 3.992470 -2.276590
## 4 -3.011900 3.14689 -0.00388165 2.195830 -4.845990 -6.451570 -2.239920
## 5 -0.649155 -1.26044 -5.64032000 -0.444088 0.235939 2.850170 -0.175531
## 6 0.554808 -4.13210 1.95626000 -1.652790 0.373481 0.931666 -0.736384
## PC266 PC267 PC268 PC269 PC270 PC271 PC272
## 1 0.668185 6.702660 0.660727 0.175709 0.283383 7.254500 -4.152170
## 2 -3.503440 -0.348669 2.387070 3.154040 1.737000 -0.168999 5.013310
## 3 3.448800 -0.903781 3.426960 -4.229630 3.132950 -0.174052 0.108654
## 4 -5.209450 0.015039 0.894539 -0.723044 -1.445870 -0.882903 2.763000
## 5 -2.044720 -3.073900 4.607310 1.090490 -1.984880 -0.529451 -3.132650
## 6 -3.013410 -3.695040 -1.634880 0.171090 0.238332 -1.163630 -0.196222
## PC273 PC274 PC275 PC276 PC277 PC278 PC279 PC280
## 1 1.677970 7.81944 2.110250 -1.567860 2.952540 2.552990 5.61472 0.629756
## 2 7.060520 2.41947 8.498130 -0.146736 0.985365 -2.400750 5.79757 1.194720
## 3 0.432424 -2.51915 -1.744860 -0.786598 -1.582370 1.642210 -1.19837 -0.682976
## 4 1.304410 7.66498 -2.791560 -2.927290 -1.100930 -4.489640 -6.48909 1.840430
## 5 -1.513140 -3.17431 2.320440 5.630840 1.104180 -0.339380 1.65247 1.201840
## 6 -2.368800 -1.69213 -0.798591 -2.842390 -0.233671 0.846307 -1.25258 0.144443
## PC281 PC282 PC283 PC284 PC285 PC286 PC287
## 1 0.692712 -3.488640 -2.001480 -7.559070 2.66131 1.510070 11.325000
## 2 -6.646920 -2.524960 0.964793 3.108780 6.42244 -4.000590 3.753810
## 3 -3.944820 -2.361430 -1.264450 -0.569989 -9.26104 0.140117 -4.610780
## 4 -0.748424 -0.698996 -0.320397 3.709160 6.70639 0.850139 0.753095
## 5 -0.183655 -0.374259 2.314610 0.549637 5.98890 3.744880 -0.776613
## 6 0.906798 -2.586710 -0.804821 -2.208050 -1.02308 -3.128990 3.222870
## PC288 PC289 PC290 PC291 PC292 PC293 PC294
## 1 2.404590 -0.748032 -4.7580400 -1.147350 -0.524219 0.250731 -8.51649
## 2 -5.474040 3.367370 -0.5142120 0.026877 -0.619392 3.233640 1.62955
## 3 1.402480 -2.427300 5.4351900 3.741020 0.863480 5.053610 3.72361
## 4 -1.409670 -4.040910 -2.7730500 -0.744712 1.062200 4.419110 4.22575
## 5 4.027320 2.458220 -3.7326400 -5.947370 0.588445 3.251560 -5.64496
## 6 -0.261222 -1.457330 0.0563136 -3.039090 3.337200 2.624540 1.16674
## PC295 PC296 PC297 PC298 PC299 PC300 PC301
## 1 -3.407200 1.82864 4.358320 3.67117 4.708780 3.9489900 -0.65519
## 2 -7.020300 -1.22006 -1.062970 -5.83735 -0.806433 -1.6187600 -5.00769
## 3 -4.137140 -3.70534 -1.662280 -1.85428 0.792453 -0.0375975 1.28279
## 4 -0.514099 -4.00937 0.995828 1.27443 2.837610 4.4402600 3.95943
## 5 2.241220 9.60890 2.787100 -1.66928 0.782261 6.8586600 2.41582
## 6 0.198664 1.20530 2.248890 -2.76466 -3.363880 0.2810570 0.50078
## PC302 PC303 PC304 PC305 PC306 PC307 PC308
## 1 0.9491320 -2.54439000 2.94930 0.185465 1.657670 -3.038740 -3.515970
## 2 0.1957950 1.65509000 -1.86220 1.583750 -0.622924 -2.944350 3.634120
## 3 -1.8934600 1.67825000 1.83357 1.307230 -1.681070 0.615403 3.634690
## 4 -2.0623100 0.00725161 -7.64866 1.639910 2.337770 4.502890 -2.156110
## 5 -1.6569400 -0.50527200 -2.96218 -2.025100 7.262400 -3.084190 -2.560640
## 6 -0.0334848 0.40520000 1.69610 1.075850 1.985610 -2.159800 -0.263277
## PC309 PC310 PC311 PC312 PC313 PC314 PC315
## 1 4.4662600 -7.220310 4.0035500 4.84828 -2.047380 6.459350 2.0530200
## 2 -2.6696100 -2.706070 0.0953163 -8.36116 5.867400 -0.686917 -3.6883100
## 3 -0.1056200 0.740668 -1.1779200 5.50363 -2.451050 1.676270 -2.4761100
## 4 -0.7031060 -2.958080 3.2981200 -2.76237 -4.030790 -4.244030 0.0271444
## 5 -0.2799270 -1.455730 -2.5699200 -0.30904 -1.524430 -2.266380 5.5629200
## 6 -0.0742412 2.285170 -2.0177500 -1.33069 0.114855 -0.937781 0.1677240
## PC316 PC317 PC318 PC319 PC320 PC321 PC322
## 1 1.60793 -0.928904 -2.8963400 -3.8314300 3.322600 -5.91347 6.957620
## 2 3.40482 -5.136700 -1.5446300 0.0540386 1.899880 -1.29758 -0.866601
## 3 -1.37351 4.842630 -3.3204600 -0.8597600 -4.035920 2.64319 3.593210
## 4 -1.07955 -2.844890 -0.0536269 -0.4826110 -0.696679 -4.43431 0.534557
## 5 2.17793 -1.818010 -3.1465600 1.0611100 4.082690 -1.00184 -2.522460
## 6 -1.25075 0.276142 -0.1193020 0.8655870 0.432312 -1.31602 0.584220
## PC323 PC324 PC325 PC326 PC327 PC328 PC329
## 1 -2.261220 -1.946790 -1.06225 -2.423140 6.433680 -9.1127800 -3.79751
## 2 1.858560 -0.443009 3.72992 -0.435356 2.911360 -2.9544200 1.53054
## 3 -2.008980 0.507728 0.34785 -2.946460 2.778320 -0.0304957 0.68032
## 4 -3.039660 -1.733570 -5.98391 -4.679300 0.176414 2.8618600 -2.10652
## 5 -1.942430 2.054440 -1.89354 -1.035090 2.462550 6.1113600 1.38743
## 6 -0.584371 0.297172 -2.25775 1.915640 -1.834600 0.7316520 -1.10027
## PC330 PC331 PC332 PC333 PC334 PC335 PC336
## 1 -0.6697460 2.335020 -9.02480 7.0954300 2.481280 1.416810 5.1544100
## 2 4.9301800 1.395380 -1.58689 -9.4036500 -1.351530 5.762420 0.0203044
## 3 -11.0183000 3.702350 3.29380 4.2603200 1.536270 -0.779746 5.4327500
## 4 10.3303000 -0.949576 1.66993 -1.5769700 0.917715 -1.427320 -4.7079600
## 5 5.2508600 -4.016490 3.35820 -0.0464337 1.840960 -1.538450 -3.0274400
## 6 0.0381149 2.240540 2.30567 1.0953200 1.511930 1.653160 -4.3168700
## PC337 PC338 PC339 PC340 PC341 PC342 PC343
## 1 0.595132 -2.10366 1.533210 0.4307060 1.221380 1.715490 -4.3898300
## 2 5.754790 1.85071 -6.060160 -4.0798600 -1.418390 0.912188 -0.6976580
## 3 5.779580 5.84280 1.646920 -0.0883349 0.473957 -3.012210 -4.8430400
## 4 -1.015270 -3.17295 0.647781 2.6516200 0.290116 -3.995180 -1.0811500
## 5 -0.468120 -7.29827 3.243030 3.6592200 -4.561820 7.072780 5.8585300
## 6 -1.614560 1.47322 -3.687260 1.6110900 3.520520 0.759446 -0.0980159
## PC344 PC345 PC346 PC347 PC348 PC349 PC350
## 1 -0.6908760 -2.248950 1.599820 -1.144390 4.664490 -0.390483 5.10194
## 2 0.0941007 -1.346530 -2.941950 -1.900770 -2.139460 1.842290 -0.59382
## 3 0.1265370 0.708169 -3.135280 -1.683880 -1.383580 -1.425310 5.37760
## 4 -1.5620700 0.618369 -0.473878 -0.779087 2.097720 -7.439810 -8.00980
## 5 -3.3422600 0.816991 -3.204700 6.875240 5.791930 1.900770 -5.86972
## 6 -2.7558500 3.541620 -1.532780 1.516360 -0.295384 -1.507070 1.02258
## PC351 PC352 PC353 PC354 PC355 PC356 PC357 PC358
## 1 -0.185552 -0.782399 2.06013 -1.428750 -4.58035 -2.8537900 -3.39104 5.70659
## 2 -2.909170 0.293619 6.19210 3.683840 5.92079 -6.5299700 -8.58787 -3.98967
## 3 3.892560 -5.713910 -1.95532 -4.528570 1.74027 0.4775880 3.58162 -4.22597
## 4 1.868760 -4.097560 -2.44472 1.130900 2.71606 -5.6525500 -4.07519 3.07901
## 5 -0.088897 1.599600 1.58747 0.302451 -2.95658 3.5443400 6.55612 1.75672
## 6 -1.927820 -1.549190 -3.31106 -1.106010 0.18390 -0.0697238 3.15082 -1.25369
## PC359 PC360 PC361 PC362 PC363 PC364 PC365 PC366
## 1 -1.19079 3.792470 2.29117 -5.15170 5.281940 8.598590 4.469220 -1.6559300
## 2 -5.87414 0.797655 3.94421 -5.00693 2.852630 -0.487419 -4.192020 -2.1086800
## 3 3.32671 1.534180 0.44207 3.84969 -0.932832 -7.460330 6.197250 0.0573447
## 4 -6.81197 -0.991286 1.18064 1.86411 5.780690 4.180990 -3.234230 1.4324400
## 5 12.19500 2.255990 2.33111 6.89192 0.771399 2.849610 -2.229590 3.2951100
## 6 -2.54109 1.357850 2.33792 1.20592 0.136482 -1.206020 0.220469 -0.5791840
## PC367 PC368 PC369 PC370 PC371 PC372 PC373
## 1 4.560030 -2.963340 3.28228 8.800210 -6.6371000 -1.743110 -3.347280
## 2 2.604130 5.550430 3.22494 4.077540 7.5722000 -7.964400 -6.674970
## 3 -1.603570 2.750110 -2.21031 1.880560 -1.5570200 3.056490 1.743550
## 4 -0.664225 0.398964 -3.67413 1.844910 2.7492600 2.967250 1.157680
## 5 3.709460 1.158250 -6.95520 0.381659 0.3821940 1.416710 -0.541871
## 6 -2.640120 -0.116021 -1.27714 -1.819890 0.0230128 0.848949 -3.660630
## PC374 PC375 PC376 PC377 PC378 PC379 PC380
## 1 4.666990 3.093120 0.147127 -5.417010 0.860193 -0.477035 0.304496
## 2 -0.902418 0.703398 1.904310 3.267530 0.169598 -0.765301 5.625600
## 3 4.135700 -5.816000 -4.018550 -0.320740 1.950950 4.306870 0.978982
## 4 -2.588750 -0.293278 1.692370 0.472464 -2.868220 2.649880 -2.855800
## 5 -0.241666 6.016770 2.677910 -3.250850 3.940460 4.165690 -6.079370
## 6 -4.742270 -0.464228 -1.374020 0.798703 -2.586160 -1.565320 1.618960
## PC381 PC382 PC383 PC384 PC385 PC386 PC387
## 1 2.124450 -0.882423 0.609469 -0.592757 -4.49149 0.610937 1.650770
## 2 -5.022800 -6.476660 -0.841114 0.620252 -5.74996 -1.856400 4.229190
## 3 0.244552 -2.306170 2.842770 -0.376082 4.20329 -3.561100 2.199490
## 4 3.353390 -6.495190 -1.600340 3.912110 -2.87357 5.196310 1.350250
## 5 2.726300 4.342850 -5.076050 3.030210 3.33195 7.579190 -1.662490
## 6 1.859110 -2.037550 0.705680 1.421200 1.46382 -1.939330 -0.520467
## PC388 PC389 PC390 PC391 PC392 PC393 PC394
## 1 -3.065200 8.044350 -0.640716 5.392230 2.793180 2.56680 3.32342
## 2 3.128960 -0.731143 0.628872 -0.706485 0.870649 7.56201 2.95395
## 3 0.488453 1.868410 -4.262500 3.876070 -3.609700 -3.45799 -3.38279
## 4 -4.697910 -1.395930 2.831990 0.761245 1.596570 -0.53367 -6.84325
## 5 2.026030 1.841950 -2.373110 -4.806120 -6.442250 6.03059 2.24633
## 6 0.451781 0.754478 0.325290 -2.018220 0.228110 2.04450 -2.36051
## PC395 PC396 PC397 PC398 PC399 PC400 PC401
## 1 6.8458400 2.601220 -3.902850 -4.784300 -1.103490 -2.34627 5.813530
## 2 2.2776900 1.800140 1.737280 2.343510 2.579260 -3.18111 -0.250946
## 3 2.3516900 -4.627460 4.260240 -1.274330 2.103960 2.20815 -3.583430
## 4 -4.6978800 -3.526580 0.563934 -1.589780 4.390240 2.72954 2.931680
## 5 -0.0425012 2.741770 7.498050 -0.678743 -4.734170 3.42736 -3.508500
## 6 1.1488100 -0.793504 0.334757 0.321808 -0.719136 1.21465 -2.098070
## PC402 PC403 PC404 PC405 PC406 PC407 PC408
## 1 2.614000 1.5340100 0.5677840 2.042300 -2.877800 5.413290 -0.460487
## 2 -3.791240 0.0993136 -0.7526180 3.235600 -2.885100 3.101630 2.555550
## 3 -2.865430 2.7200200 3.9167600 -0.601597 -0.959362 5.246560 -2.492140
## 4 0.802927 -0.1342930 -0.0590634 -0.477474 10.004000 -1.160220 0.965827
## 5 3.129410 -4.4339300 2.9766200 3.267150 -0.242847 -0.985181 -1.252730
## 6 1.534780 1.6311600 -2.2387000 0.189547 -0.935675 2.900690 -1.135740
## PC409 PC410 PC411 PC412 PC413 PC414 PC415
## 1 5.777350 0.186193 -3.697420 1.053840 3.70827 -5.480940 1.928650
## 2 0.814351 -8.592350 -4.871200 -0.858377 -7.73860 1.948890 -2.902160
## 3 -0.948669 2.233200 0.256813 6.531920 -2.19023 -0.666166 6.522690
## 4 4.144810 6.588860 -1.085550 -1.382170 -1.37113 -0.703932 0.413409
## 5 -7.554420 8.089660 0.699263 7.572820 -2.09022 -0.919366 2.917860
## 6 3.467400 1.091660 0.038636 -2.278600 0.25542 2.264440 -0.663749
## PC416 PC417 PC418 PC419 PC420 PC421 PC422
## 1 -0.960798 0.534835 -1.627320 7.447220 6.952380 -9.5273000 -6.048750
## 2 5.853670 -1.761720 -0.360365 -8.859380 3.452240 0.0801953 -2.015720
## 3 -2.638750 1.807090 2.227800 0.517441 1.582060 -1.3469500 -0.587567
## 4 7.248530 -4.305390 -3.785030 -1.855380 -4.860160 -1.4193400 4.766900
## 5 6.631460 -1.010210 1.917000 4.946750 2.139460 5.3060900 1.107250
## 6 0.218753 -0.605274 0.706693 -1.697190 0.854723 -2.0809100 -1.260280
## PC423 PC424 PC425 PC426 PC427 PC428 PC429
## 1 -2.4164200 -4.19649 -4.862450 -2.59090 -0.718711 -2.003220 0.149758
## 2 -0.0309085 2.86232 -4.261170 7.32878 -4.533250 6.446920 -0.684153
## 3 4.0501300 4.85166 5.264650 -3.99502 1.338970 -0.254161 0.777067
## 4 0.1984860 1.55305 -4.039200 10.39760 1.798110 -3.909950 -4.878780
## 5 2.5580600 -4.82430 0.581627 -2.12373 7.219020 3.301050 -0.278043
## 6 -0.5154770 1.14752 3.015220 -2.58170 -1.167980 0.752221 -2.054580
## PC430 PC431 PC432 PC433 PC434 PC435 PC436 PC437
## 1 6.903710 -3.23594 -1.899440 1.755130 -1.009700 1.032420 0.605694 -5.14891
## 2 0.111747 4.16981 -0.429687 -0.896585 -1.734790 -2.926040 5.170750 -2.07149
## 3 3.422970 2.07135 2.496960 -7.949870 3.549730 -3.927070 0.626145 -3.80836
## 4 0.155235 -3.79257 -2.710380 9.570040 6.689530 0.696102 2.520570 4.68329
## 5 -3.017840 -1.67729 -2.455800 4.004330 -3.256930 2.615680 -2.162880 9.54156
## 6 -2.734310 0.76756 0.593651 1.942550 0.971759 0.034676 1.244580 3.85916
## PC438 PC439 PC440 PC441 PC442 PC443 PC444 PC445
## 1 5.47715 -0.333951 -0.710384 -0.87455 -1.069980 -1.108100 -2.04437 1.68954
## 2 3.52655 -4.197790 -5.272720 -2.01565 7.289110 -0.748792 2.66491 -7.08296
## 3 -2.28515 3.220360 2.532970 -0.24209 0.435932 -1.061440 -3.83419 -2.07934
## 4 -8.25464 -3.819230 -7.421120 -1.55453 1.797960 5.747530 -7.55522 1.54181
## 5 -6.22470 4.017720 4.745770 -5.10651 -2.776900 -1.659780 -2.71868 -1.31831
## 6 -1.78598 0.993170 -0.560315 1.57821 -2.271400 0.712352 -1.80377 -3.87990
## PC446 PC447 PC448 PC449 PC450 PC451 PC452
## 1 1.609370 5.777400 -2.78657 -4.841920 6.575910 1.955260 3.291230
## 2 -3.878400 1.257500 -2.74408 -1.613860 1.263820 -5.885220 4.668970
## 3 -2.621180 2.010490 2.48831 3.940340 -5.675830 -3.944010 -0.403480
## 4 -0.298918 3.403740 -1.93151 -1.045810 -4.763210 1.184080 0.768264
## 5 2.381300 4.334380 2.00908 3.423350 1.921050 0.619247 1.741630
## 6 1.689620 -0.944392 2.00521 0.797348 0.386529 -2.287020 -2.655610
## PC453 PC454 PC455 PC456 PC457 PC458 PC459
## 1 2.740960 2.007130 -7.856160 -3.908200 -14.401300 -2.2873200 0.458626
## 2 -2.301980 2.597060 -3.995680 6.332570 8.113390 -8.6148000 -3.382040
## 3 -4.533070 3.036120 -3.583590 -2.309730 1.026400 0.7367130 5.642500
## 4 0.761276 1.089780 -7.264330 -5.383180 0.543375 0.0216743 3.115990
## 5 3.224010 -6.451810 13.767100 -3.217300 -0.284549 -6.8200900 -1.865860
## 6 -0.037085 0.915636 -0.770887 0.343602 2.740980 -0.3263570 0.144887
## PC460 PC461 PC462 PC463 PC464 PC465 PC466
## 1 -5.8307300 0.335195 0.8371280 -3.947950 9.456580 1.378480 -3.283470
## 2 3.0782400 5.823550 -1.5176700 -6.812190 -5.074650 0.193145 -2.545930
## 3 -5.6526700 2.398080 0.2947330 -0.635040 3.685810 10.201300 -1.725050
## 4 -1.2320100 3.640730 0.0720409 4.277130 0.140552 -0.347928 -1.591960
## 5 -1.1100300 -1.169190 -5.0132400 -0.817523 -4.903980 -1.229360 -5.018170
## 6 0.0445979 4.160530 -0.5825320 0.173514 -1.391120 3.110940 -0.340302
## PC467 PC468 PC469 PC470 PC471 PC472 PC473
## 1 11.259500 -6.6065100 -2.41028 -5.17415 4.767700 -2.066910 1.625840
## 2 -3.045640 -0.5041950 3.04394 4.78015 -0.793591 1.275440 -5.215130
## 3 2.103520 6.2128000 -1.38180 1.59864 -4.149680 -0.985679 6.012580
## 4 -4.241820 -1.0168000 -5.00685 -4.47607 2.270440 -3.317540 3.226550
## 5 -0.539145 0.0830878 10.16960 -2.43238 -2.670970 -1.868420 -1.636530
## 6 -0.618192 -1.5144400 1.11818 -0.65934 -1.397300 -3.780960 0.571235
## PC474 PC475 PC476 PC477 PC478 PC479 PC480
## 1 -3.716030 -0.904873 -3.5439600 1.192140 -0.0160798 0.9148390 -3.6783700
## 2 0.231343 -0.436475 4.7219900 -0.598404 3.6480200 8.5704700 -3.8117800
## 3 1.686770 -2.846930 -4.1807800 2.574220 -3.3559800 -3.0842300 -0.0782256
## 4 4.589050 -4.128190 -2.6962900 3.385050 -1.0951000 -2.2036300 2.3830800
## 5 1.155690 -5.976330 3.2007000 -1.321310 3.5089600 5.4931300 2.1447300
## 6 0.677919 1.605820 -0.0390345 3.205160 0.5444620 -0.0604682 -2.6974100
## PC481 PC482 PC483 PC484 PC485 PC486 PC487
## 1 0.0416064 -2.455510 -3.726990 2.021170 3.216720 -0.738357 -6.83587
## 2 -3.1901300 -2.004330 -3.926620 1.385990 -2.052160 7.608510 1.93646
## 3 1.2533500 0.710538 -0.487847 -4.668710 4.968950 0.191199 3.39163
## 4 -1.5768300 -8.989680 -1.225960 2.243270 -1.436910 -2.304290 1.72469
## 5 0.9552680 0.871639 1.920940 -0.739996 -0.858372 0.974944 -2.97047
## 6 -0.4576980 -0.602847 -1.728920 2.415730 2.118060 0.207212 1.83863
## PC488 PC489 PC490 PC491 PC492 PC493 PC494
## 1 3.88737 4.943570 -3.893890 9.65027 -2.760150 -2.728860 -4.5912600
## 2 -4.54673 -2.473430 4.058000 -7.58451 1.900550 -0.448330 3.0949600
## 3 2.86445 -2.070220 2.462650 2.51220 2.058280 5.339860 2.6515900
## 4 1.68421 -1.199610 -0.530965 -1.33119 -0.975906 3.893280 -0.0340843
## 5 3.82968 -1.900950 1.625210 -2.45836 5.390610 -3.771980 -2.1347900
## 6 -1.46312 0.554516 2.867900 -1.58279 0.983982 -0.891599 0.4064800
## PC495 PC496 PC497 PC498 PC499 PC500 PC501
## 1 2.351490 7.163820 -8.2409200 -3.9120700 -9.062020 3.441160 -3.08368
## 2 -5.062370 0.855031 -2.7127200 2.3142900 1.586490 -1.507090 5.28443
## 3 5.551460 -2.132360 -1.4278500 5.2343000 3.873320 5.284640 -8.04346
## 4 10.054500 -5.523180 5.5164200 1.8302400 3.816660 0.187067 -6.74254
## 5 -0.230529 -0.137757 -7.7412500 0.0808266 -5.174790 -5.542820 1.94645
## 6 0.258723 -0.700783 -0.0400363 -0.6213620 0.955429 2.011610 -2.18172
## PC502 PC503 PC504 PC505 PC506 PC507 PC508 PC509
## 1 -0.419969 -2.995380 5.961550 5.361510 -1.462250 1.691420 4.43112 1.01300
## 2 0.763600 3.303000 -2.168690 -1.453230 3.321350 -2.424390 -6.57481 -2.74568
## 3 -0.855195 0.200559 2.670440 0.968265 2.032350 3.530460 1.67844 -1.72968
## 4 2.425220 -1.481630 -2.864290 0.309452 -4.523930 -6.332410 -7.21716 -9.59828
## 5 6.997940 5.287440 -0.247115 -3.957960 -0.768855 -5.349030 -3.52164 2.70210
## 6 -1.222820 1.850020 3.368660 0.123468 -1.643740 -0.116247 1.02118 1.80872
## PC510 PC511 PC512 PC513 PC514 PC515 PC516
## 1 1.14270 -0.593595 4.568530 -0.783411 6.798120 3.465030 -1.386550
## 2 -5.43281 -6.073480 -9.238620 -3.930640 4.714750 3.786550 5.979870
## 3 1.92742 1.038900 -0.758543 3.194210 -1.424260 3.861540 -1.934080
## 4 -2.61054 3.113330 4.754330 10.433200 -0.938057 -0.495519 -3.047320
## 5 5.49661 0.799706 -3.859220 -1.651530 -2.488160 -1.019560 -0.313847
## 6 4.25996 0.588597 0.363282 0.333997 -1.110800 1.084420 -0.047089
## PC517 PC518 PC519 PC520 PC521 PC522 PC523
## 1 0.8358150 -0.876649 0.585292 -3.637650 1.655300 0.501426 3.003400
## 2 1.7188800 -1.044720 -4.890080 8.956480 -4.874770 6.440240 -7.659130
## 3 -1.9131200 -1.373370 6.798190 1.718600 7.593670 -1.421980 -0.778801
## 4 -0.8310570 1.023570 -1.006960 -3.737210 1.595190 -0.422374 -0.502342
## 5 0.7935220 -2.730800 0.691550 -4.445310 -1.428650 -1.608260 0.645048
## 6 -0.0785162 -1.464450 -1.750720 -0.275537 0.461668 0.281456 -0.719800
## PC524 PC525 PC526 PC527 PC528 PC529 PC530
## 1 0.142158 -0.266102 1.5461200 2.902880 -3.2089000 -0.848327 -3.410050
## 2 0.839364 5.432040 2.0222800 -3.053660 -3.1449600 -1.215720 6.297870
## 3 -0.569315 -3.984880 0.0628193 -4.170970 0.8654790 1.617450 -3.183280
## 4 -2.952250 -1.144010 -0.6536370 -3.288340 8.6818500 -6.509320 -3.589260
## 5 -1.499840 -0.112070 -2.4087800 1.257200 -4.0401000 0.260552 0.485898
## 6 -2.144600 0.108141 -0.6499950 -0.186286 0.0153899 0.111869 1.455910
## PC531 PC532 PC533 PC534 PC535 PC536 PC537
## 1 -3.781640 1.16091 -0.199297 2.688420 -3.921750 0.9269860 -6.306970
## 2 -1.578350 1.83128 -1.105080 -3.940920 2.397470 -2.7789500 6.376910
## 3 7.645230 -2.08849 5.024420 -3.078700 7.033480 -2.6912400 4.013870
## 4 0.709288 1.40429 0.118824 -0.606427 0.373311 2.8674300 1.767860
## 5 2.082530 -1.38143 2.558050 -0.135362 1.782990 -2.0086800 -2.512510
## 6 -0.668018 -1.36199 -1.563150 0.325076 -0.269977 0.0203826 -0.178135
## PC538 PC539 PC540 PC541 PC542 PC543 PC544
## 1 -8.0671200 -1.708610 1.948850 2.694340 -1.408230 2.19445 1.809490
## 2 0.9573450 5.234290 -6.633550 -4.090920 2.165270 1.91719 -5.977660
## 3 7.2265100 -0.134621 5.578220 -2.762610 4.422340 -2.61493 3.222350
## 4 2.9975300 -2.940770 -1.263460 3.066620 -4.970990 -7.31893 1.014820
## 5 -0.2653890 -2.483520 0.549056 -1.825210 0.372149 -1.27103 0.922858
## 6 0.0719487 1.748010 1.272710 -0.845579 0.985204 1.41856 -0.889595
## PC545 PC546 PC547 PC548 PC549 PC550 PC551 PC552
## 1 4.40683 -5.409050 5.96768 0.398777 -0.702313 -1.357150 -2.232910 2.326290
## 2 -1.43173 6.598210 -1.12831 -3.808900 0.802527 -3.136790 -1.408970 -1.345590
## 3 -3.86566 -0.439519 -4.08789 3.048990 -5.735550 -2.143700 0.969942 -3.183430
## 4 -4.05053 -0.149712 -2.68935 -4.168250 -2.612520 -0.710597 -0.938771 3.164760
## 5 -4.88518 3.106530 -4.32789 -1.546010 -2.443470 -1.167400 5.067440 -2.298670
## 6 2.84320 -0.582349 -2.30697 -1.589510 0.670184 -1.329660 0.221059 0.586166
## PC553 PC554 PC555 PC556 PC557 PC558 PC559
## 1 0.79319 0.0470201 1.981870 -1.703330 -2.061230 1.562570 0.825182
## 2 -3.32840 2.2720900 -0.506293 -0.436124 2.560870 -1.030220 -1.628560
## 3 -7.12748 -9.1333200 1.353370 11.436600 5.192520 -1.752540 8.428580
## 4 -4.10860 -1.9656600 0.557320 -1.547850 -0.705234 -0.601586 -4.642500
## 5 1.67823 -2.2403200 -0.481799 2.929270 2.811560 -2.738110 4.174570
## 6 -1.75585 -0.2725420 0.552749 0.570284 0.514863 -0.704511 -1.643480
## PC560 PC561 PC562 PC563 PC564 PC565 PC566
## 1 0.420481 -3.793130 -0.414296 -0.0600313 5.563500 2.361350 -0.262055
## 2 2.953380 4.280560 0.601785 -2.9315400 -2.021430 -7.079140 4.836760
## 3 -3.234820 1.132630 0.694219 3.9883600 -5.318680 4.967230 -6.210310
## 4 -0.712875 -0.518147 1.870550 -0.9059940 -6.691840 2.143030 5.938940
## 5 0.791369 0.524604 -0.367272 4.1172100 0.705802 -1.575030 -4.433060
## 6 0.792963 0.647974 0.339495 0.4210210 0.127645 0.483052 1.349320
## PC567 PC568 PC569 PC570 PC571 PC572 PC573
## 1 0.29151600 0.7719060 0.306834 -1.0000500 3.047820 3.18003 2.843420
## 2 0.59613900 -3.1383900 -4.055770 -0.0871318 -3.504170 -1.41691 -2.354610
## 3 2.98454000 0.0442454 4.062920 -0.0503942 -3.219580 -1.66863 2.486230
## 4 -0.00271435 -0.2810280 4.301680 -0.4060630 1.402390 -1.75967 -1.530670
## 5 0.74239100 -1.5467500 -3.770110 -2.8879800 -1.106600 1.11875 0.442358
## 6 -1.60319000 -1.8204800 0.887334 0.3000610 -0.726671 -1.73700 -0.634761
## PC574 PC575 PC576 PC577 PC578 PC579 PC580
## 1 -0.3358360 -1.237310 0.665701 -0.605718 0.0551401 -1.007290 1.793220
## 2 -2.9567100 1.861450 -3.139170 -3.050840 -0.8580520 3.980700 -3.359080
## 3 2.6076500 1.443250 0.584546 4.961950 1.2243800 -4.983430 -0.230114
## 4 5.7013300 -1.543000 0.630317 1.868920 1.1734300 -1.295220 -0.954495
## 5 -0.0129894 0.115781 0.323865 0.380932 -0.8205720 0.449513 0.579594
## 6 0.6042500 1.319940 -0.597215 1.652550 -0.4444370 -0.331132 -0.569123
## PC581 PC582 PC583 PC584 PC585 PC586 PC587
## 1 2.720270 -0.4616770 -2.3320300 -1.475010 -1.899580 -2.866020 -1.932290
## 2 -1.834220 -0.0690979 3.1247700 4.688780 -0.123124 -1.236990 -2.422160
## 3 3.156670 0.2686570 0.7222070 -6.301350 0.217357 1.439610 10.159800
## 4 0.814130 -3.2691900 -0.0419468 -1.381850 -0.676409 -0.266177 0.927464
## 5 -2.802740 -1.7489800 2.9339800 -0.163633 -1.765400 1.683580 1.384650
## 6 -0.466528 1.2506900 -0.4928560 -0.626011 -0.526173 0.292149 0.573847
## PC588 PC589 PC590 PC591 PC592 PC593 PC594
## 1 -0.141236 -0.664032 -0.8161300 -0.207564 1.4935100 0.316396 -0.667961
## 2 2.604660 2.575450 -1.5535600 -0.500613 0.0649269 -0.277733 -0.694854
## 3 -1.298170 -3.526050 4.8350800 1.476620 -4.5997500 3.266830 4.748050
## 4 -0.943995 -2.347760 3.7155600 1.677190 -2.1261300 -1.451200 -0.320646
## 5 -2.474000 -1.555780 0.0218849 0.402409 -0.2095010 0.888746 0.921578
## 6 -0.722939 -0.379269 0.2168900 2.065300 -0.7622940 -1.045700 2.085040
## PC595 PC596 PC597 PC598 PC599 PC600 PC601
## 1 -0.549683 -0.246980 -1.251890 0.414057 1.418840 -2.87165 -0.469797
## 2 0.775012 -0.540705 -1.738300 -0.582886 -0.645756 -1.21956 -0.324708
## 3 -1.797690 0.313111 3.205810 -2.811320 0.922040 1.41111 1.903940
## 4 -0.216478 1.009920 -0.662947 2.115990 0.858634 1.88245 -0.748201
## 5 -2.515350 0.499773 1.423690 1.116750 1.089660 -1.82501 2.321740
## 6 -0.131602 -0.199249 1.387420 -1.301500 -1.145690 -1.21221 -0.328161
## PC602 PC603 PC604 PC605 PC606 PC607 PC608
## 1 0.2907260 1.473110 -0.336378 0.100602 -0.6851090 -0.781235 -0.3115810
## 2 -0.4071800 -4.197880 -1.868530 1.069350 -2.8481800 0.505759 -0.0393097
## 3 -1.9962200 3.364780 1.648440 2.445660 2.8466400 2.121700 -0.0818179
## 4 0.0959698 -0.898860 1.091910 0.178327 0.0165746 -0.598874 0.4995670
## 5 -0.8577700 0.662205 1.237620 0.781759 0.4888930 1.386450 0.0132441
## 6 0.2848460 0.041936 -0.780476 0.672115 -0.7908480 -1.269890 1.0314300
## PC609 PC610 PC611 PC612 PC613 PC614 PC615
## 1 -0.221057 0.442800 0.521209 -2.357190 -0.850230 0.591296 -1.296590
## 2 0.200218 -2.238000 -0.872753 1.246050 0.438104 -0.094276 0.505613
## 3 -2.850980 0.723151 -0.587935 -0.245363 0.224606 -1.587410 -3.535470
## 4 1.108090 -0.402738 -1.670960 1.746220 4.937510 -1.617230 -1.059860
## 5 -0.206420 -0.885669 2.159730 -0.637576 0.935158 -0.578438 -1.365530
## 6 -0.565625 0.069130 0.532827 -0.571052 0.621579 0.243557 1.545660
## PC616 PC617 PC618 PC619 PC620 PC621 PC622
## 1 -1.632250 0.6660840 0.389295 0.276354 -0.774150 0.953437 0.0521014
## 2 0.138709 -0.5827020 -1.151380 -0.435772 -1.163980 -0.306299 0.3336960
## 3 2.235280 -1.4992200 0.896373 0.308423 0.440106 -1.725590 1.5645700
## 4 0.438829 -0.0687455 2.197370 -0.360688 -1.277530 0.288341 0.5624200
## 5 -0.236214 0.8144490 -0.483606 -0.289504 -0.841674 0.392056 -0.8320790
## 6 0.818821 -0.6228910 -0.699817 -1.189440 -0.185710 0.129525 0.1670970
## PC623 PC624 PC625 PC626 PC627 PC628 PC629
## 1 0.5175900 -1.81857000 -0.7316110 -0.823505 0.1560310 0.192283 0.418723
## 2 -0.4767880 1.58266000 -1.3272400 1.407600 -0.6784180 -0.135068 -0.591682
## 3 0.0563922 -0.17899200 0.4434480 0.630557 1.7496100 0.201506 -0.740755
## 4 -0.4156750 0.63909300 0.8555740 -1.431060 -0.5154220 -0.145055 0.434376
## 5 0.0536224 0.00415705 0.8372700 -0.243549 0.7443450 1.271420 -0.486573
## 6 0.3216150 -1.03834000 -0.0593882 -1.546240 0.0601514 -0.141470 1.874040
## PC630 PC631 PC632 PC633 PC634 PC635 PC636
## 1 -0.945399 -0.116795 0.3469450 -0.7861110 -0.533855 0.509936 -1.6642200
## 2 0.205106 0.423638 -1.1551400 -0.4015300 1.020260 -0.627157 1.8214800
## 3 0.670900 0.164132 -0.1241230 1.8740000 0.104715 0.359540 -0.8932950
## 4 1.444600 0.445946 -0.0578001 0.0259392 1.181570 -0.179463 -0.0379748
## 5 -1.395740 -0.927931 -0.6534370 0.9361040 0.608401 0.398321 1.0212500
## 6 0.359775 -1.074600 -0.2059990 0.2622780 -0.164927 -0.100484 -0.1302880
## PC637 PC638 PC639 PC640 PC641 PC642 PC643
## 1 0.162724 -0.4605770 0.4752010 0.6770570 -0.4625220 -0.834200 0.201035
## 2 -0.415161 0.4964910 -0.4146870 -0.0406003 0.6603250 0.593328 -0.184156
## 3 -0.326878 0.6808590 0.0577737 0.0221834 0.5410200 0.128853 -0.604277
## 4 -0.315693 -1.0257500 -0.5162990 0.6430490 2.2318600 1.095330 1.038110
## 5 -0.495483 -0.0768325 0.2054040 -0.0461064 -0.2768540 -0.169853 0.132705
## 6 1.498620 -2.0069700 2.5834200 -0.0282036 0.0243375 0.482614 0.949408
## PC644 PC645 PC646 PC647 PC648 PC649 PC650
## 1 -0.5574490 1.036380 -1.150420 -0.353489 0.4766350 -0.7206810 -0.5252320
## 2 0.5225750 0.361486 1.335850 0.248652 -0.0931085 0.8748700 -0.5641140
## 3 0.4590010 0.377079 -0.155535 -0.813350 -0.7363170 0.4244160 1.1097400
## 4 1.6384200 0.306756 1.243450 -0.767603 0.0657744 0.0890435 0.2621140
## 5 -0.0914471 0.523687 0.102309 0.285675 -0.6218090 -0.1905420 -1.0366500
## 6 -3.3399100 1.651420 -2.192890 2.853810 -0.7432670 -0.0134943 -0.0703362
## PC651 PC652 PC653 PC654 PC655 PC656 PC657
## 1 1.181390 0.411984 -0.491513 0.636168 -1.108530 -0.7914510 0.5128880
## 2 -0.306752 -1.094940 -0.974501 -0.224273 0.374006 -0.1289310 -0.4801830
## 3 0.200795 0.768692 0.203815 -0.514048 1.624800 0.0332113 0.2230850
## 4 -1.070100 0.238257 -0.242319 -0.823498 0.255163 0.2897510 0.0515537
## 5 0.855812 0.103177 -0.416720 -0.929673 0.352428 0.3306270 -1.0749700
## 6 -0.611530 2.776650 -0.394573 4.397120 0.926366 -1.7102700 2.9783800
## PC658 PC659 PC660 PC661 PC662 PC663 PC664
## 1 -0.2485930 0.479566 0.6480230 -0.6219510 0.2869750 -0.3355290 0.1855000
## 2 -0.8273070 -0.623966 -0.5404300 -0.6169230 0.3506670 -0.0202628 -1.6292600
## 3 0.1220640 -0.021907 0.8216040 0.1395790 -0.3960220 0.1827820 0.4200820
## 4 0.0235954 -0.322674 -0.0527559 0.1230140 -0.2317690 -0.3287740 0.6901430
## 5 0.5372470 -0.578310 -0.0338003 0.0413231 0.0962499 0.2256080 0.0386441
## 6 -1.1051900 -1.850610 2.0511700 5.6060400 -7.9374700 -2.0061800 3.2096900
## PC665 PC666 PC667 PC668 PC669 PC670 PC671
## 1 -0.754257 0.554987 0.158071 0.7458300 -0.379930 0.1047520 -0.179097
## 2 0.228498 -0.399271 -0.591397 0.4634680 1.476680 -0.0455636 -0.303098
## 3 0.531042 -0.305498 -0.350900 -0.0910953 0.638506 0.0446885 0.178340
## 4 -0.612182 0.126106 0.187181 0.0336248 -0.660484 -0.4541910 -0.345722
## 5 0.163223 0.390714 0.108911 0.3355710 0.333600 -0.0537582 0.395603
## 6 -10.648800 -6.623200 9.017770 -3.8143900 -23.000500 2.8040000 5.838030
## PC672 PC673 PC674 PC675 PC676 PC677 PC678
## 1 -0.3342810 0.3981560 -0.435260 0.175920 -0.0468280 0.1285820 0.0498232
## 2 0.3728800 -0.4325030 0.390417 0.350570 -0.0780383 0.3937650 0.6010360
## 3 -0.0501735 -0.9105390 -0.330144 0.204279 -0.7197590 -0.9475590 0.3314220
## 4 -0.4610860 0.0223074 -0.690654 -0.574589 -0.4404890 0.0831125 -0.2581000
## 5 0.3669710 -0.6234950 0.871031 0.295893 0.6582310 0.6858390 -0.0185350
## 6 -3.1218000 -0.7085280 -2.893680 0.841936 2.9148100 5.8779700 -2.9231400
## PC679 PC680 PC681 PC682 PC683 PC684 PC685
## 1 -0.209140 -0.0670167 0.0682520 0.1681010 0.4406970 0.0738443 -0.2731980
## 2 -0.187945 0.0337131 -0.1581240 -0.1310550 0.0736109 -0.0511851 -0.0689223
## 3 0.174690 0.3413370 -0.4316240 -0.3709540 -0.0836069 -0.1589380 0.2315860
## 4 -0.053170 0.1103960 -0.0865292 -0.0130275 -0.1663030 0.0509096 0.0573638
## 5 0.471279 -0.4053920 -0.3836460 0.2063080 -0.2012220 -0.1072520 -0.1028470
## 6 -0.844235 0.2050230 0.1600970 0.8233040 -0.5068480 -0.5304620 0.6896650
## PC686 PC687 PC688 Individual Pop_City Country Latitude
## 1 0.1659210 -0.2032410 -5.23915e-07 801 Durres Albania 41.29704
## 2 -0.2389500 0.0493433 -5.23915e-07 802 Durres Albania 41.29704
## 3 0.2278310 0.3125050 -5.23915e-07 803 Durres Albania 41.29704
## 4 -0.2272640 -0.0666839 -5.23915e-07 804 Durres Albania 41.29704
## 5 0.0771716 -0.0817589 -5.23915e-07 805 Durres Albania 41.29704
## 6 0.4703270 -0.0361765 -5.23915e-07 806 Durres Albania 41.29704
## Longitude Continent Year Region Subregion order order2 orderold
## 1 19.50373 Europe 2018 Southern Europe East Europe 33 25 25
## 2 19.50373 Europe 2018 Southern Europe East Europe 33 25 25
## 3 19.50373 Europe 2018 Southern Europe East Europe 33 25 25
## 4 19.50373 Europe 2018 Southern Europe East Europe 33 25 25
## 5 19.50373 Europe 2018 Southern Europe East Europe 33 25 25
## 6 19.50373 Europe 2018 Southern Europe East Europe 33 25 25
#save the pca plot
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "PCA_lea_euro_global_pc1_pc2_b.pdf"
),
width = 8,
height = 8,
units = "in"
)
PC1 and PC3
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "PCA_lea_euro_global_pc1_pc3_b.pdf"
),
width = 8,
height = 6,
units = "in"
)
Colored by region
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "MAF_1", "PCA_lea_euro_global_region_pc1_pc2_b.pdf"
),
width = 8,
height = 8,
units = "in"
)
PC1 and PC3
We will do 5 repetitions
# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
genotype,
K = 1:25,
project = "new",
repetitions = 5,
percentage = 0.25,
iterations = 500,
CPU = 10,
entropy = TRUE
)
Cross entropy
# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","MAF_1","lea_cross_entropy_euro_global_r01_b.pdf"), width = 6, height = 4)
# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)
# Close the pdf file
dev.off()
## png
## 2
Summary of project check with run is best
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9295938 0.9035762 0.8929160 0.8866176 0.8831455 0.8810623 0.8793035
## mean 0.9299463 0.9038992 0.8932721 0.8869377 0.8840167 0.8813716 0.8797496
## max 0.9303090 0.9042813 0.8935996 0.8872815 0.8859704 0.8818163 0.8802178
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8777762 0.8754397 0.8737881 0.8724198 0.8709529 0.8703098 0.8702140
## mean 0.8779980 0.8760114 0.8743412 0.8732523 0.8716021 0.8712855 0.8703966
## max 0.8784509 0.8765061 0.8748110 0.8741085 0.8724610 0.8718638 0.8706670
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8691706 0.8691407 0.8684490 0.8678589 0.8677646 0.8675384 0.8673978
## mean 0.8697189 0.8695057 0.8689370 0.8686954 0.8680425 0.8681742 0.8678156
## max 0.8701692 0.8700150 0.8695248 0.8697050 0.8682069 0.8685108 0.8686613
## K = 22 K = 23 K = 24 K = 25
## min 0.8669903 0.8671792 0.8666401 0.8673425
## mean 0.8679212 0.8675363 0.8676546 0.8677506
## max 0.8688708 0.8679872 0.8685890 0.8681663
# get the cross-entropy of all runs for K = 20
ce = cross.entropy(project, K = 20)
ce #run 1 is best for k=20
## K = 20
## run 1 0.8675384
## run 2 0.8685108
## run 3 0.8681354
## run 4 0.8683496
## run 5 0.8683367
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9295938 0.9035762 0.8929160 0.8866176 0.8831455 0.8810623 0.8793035
## mean 0.9299463 0.9038992 0.8932721 0.8869377 0.8840167 0.8813716 0.8797496
## max 0.9303090 0.9042813 0.8935996 0.8872815 0.8859704 0.8818163 0.8802178
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8777762 0.8754397 0.8737881 0.8724198 0.8709529 0.8703098 0.8702140
## mean 0.8779980 0.8760114 0.8743412 0.8732523 0.8716021 0.8712855 0.8703966
## max 0.8784509 0.8765061 0.8748110 0.8741085 0.8724610 0.8718638 0.8706670
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8691706 0.8691407 0.8684490 0.8678589 0.8677646 0.8675384 0.8673978
## mean 0.8697189 0.8695057 0.8689370 0.8686954 0.8680425 0.8681742 0.8678156
## max 0.8701692 0.8700150 0.8695248 0.8697050 0.8682069 0.8685108 0.8686613
## K = 22 K = 23 K = 24 K = 25
## min 0.8669903 0.8671792 0.8666401 0.8673425
## mean 0.8679212 0.8675363 0.8676546 0.8677506
## max 0.8688708 0.8679872 0.8685890 0.8681663
# get the cross-entropy of all runs for K = 12
ce = cross.entropy(project, K = 12)
ce #run 5 is best for k=12
## K = 12
## run 1 0.8711113
## run 2 0.8716798
## run 3 0.8718053
## run 4 0.8724610
## run 5 0.8709529
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9295938 0.9035762 0.8929160 0.8866176 0.8831455 0.8810623 0.8793035
## mean 0.9299463 0.9038992 0.8932721 0.8869377 0.8840167 0.8813716 0.8797496
## max 0.9303090 0.9042813 0.8935996 0.8872815 0.8859704 0.8818163 0.8802178
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8777762 0.8754397 0.8737881 0.8724198 0.8709529 0.8703098 0.8702140
## mean 0.8779980 0.8760114 0.8743412 0.8732523 0.8716021 0.8712855 0.8703966
## max 0.8784509 0.8765061 0.8748110 0.8741085 0.8724610 0.8718638 0.8706670
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8691706 0.8691407 0.8684490 0.8678589 0.8677646 0.8675384 0.8673978
## mean 0.8697189 0.8695057 0.8689370 0.8686954 0.8680425 0.8681742 0.8678156
## max 0.8701692 0.8700150 0.8695248 0.8697050 0.8682069 0.8685108 0.8686613
## K = 22 K = 23 K = 24 K = 25
## min 0.8669903 0.8671792 0.8666401 0.8673425
## mean 0.8679212 0.8675363 0.8676546 0.8677506
## max 0.8688708 0.8679872 0.8685890 0.8681663
# get the cross-entropy of all runs for K = 15
ce = cross.entropy(project, K = 15)
ce #run 5 is best for k=15
## K = 15
## run 1 0.8695671
## run 2 0.8698192
## run 3 0.8701692
## run 4 0.8698683
## run 5 0.8691706
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9295938 0.9035762 0.8929160 0.8866176 0.8831455 0.8810623 0.8793035
## mean 0.9299463 0.9038992 0.8932721 0.8869377 0.8840167 0.8813716 0.8797496
## max 0.9303090 0.9042813 0.8935996 0.8872815 0.8859704 0.8818163 0.8802178
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8777762 0.8754397 0.8737881 0.8724198 0.8709529 0.8703098 0.8702140
## mean 0.8779980 0.8760114 0.8743412 0.8732523 0.8716021 0.8712855 0.8703966
## max 0.8784509 0.8765061 0.8748110 0.8741085 0.8724610 0.8718638 0.8706670
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8691706 0.8691407 0.8684490 0.8678589 0.8677646 0.8675384 0.8673978
## mean 0.8697189 0.8695057 0.8689370 0.8686954 0.8680425 0.8681742 0.8678156
## max 0.8701692 0.8700150 0.8695248 0.8697050 0.8682069 0.8685108 0.8686613
## K = 22 K = 23 K = 24 K = 25
## min 0.8669903 0.8671792 0.8666401 0.8673425
## mean 0.8679212 0.8675363 0.8676546 0.8677506
## max 0.8688708 0.8679872 0.8685890 0.8681663
# get the cross-entropy of all runs for K = 6
ce = cross.entropy(project, K = 6)
ce #run 5 is best for k=6
## K = 6
## run 1 0.8811239
## run 2 0.8814691
## run 3 0.8818163
## run 4 0.8813863
## run 5 0.8810623
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9295938 0.9035762 0.8929160 0.8866176 0.8831455 0.8810623 0.8793035
## mean 0.9299463 0.9038992 0.8932721 0.8869377 0.8840167 0.8813716 0.8797496
## max 0.9303090 0.9042813 0.8935996 0.8872815 0.8859704 0.8818163 0.8802178
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8777762 0.8754397 0.8737881 0.8724198 0.8709529 0.8703098 0.8702140
## mean 0.8779980 0.8760114 0.8743412 0.8732523 0.8716021 0.8712855 0.8703966
## max 0.8784509 0.8765061 0.8748110 0.8741085 0.8724610 0.8718638 0.8706670
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8691706 0.8691407 0.8684490 0.8678589 0.8677646 0.8675384 0.8673978
## mean 0.8697189 0.8695057 0.8689370 0.8686954 0.8680425 0.8681742 0.8678156
## max 0.8701692 0.8700150 0.8695248 0.8697050 0.8682069 0.8685108 0.8686613
## K = 22 K = 23 K = 24 K = 25
## min 0.8669903 0.8671792 0.8666401 0.8673425
## mean 0.8679212 0.8675363 0.8676546 0.8677506
## max 0.8688708 0.8679872 0.8685890 0.8681663
# get the cross-entropy of all runs for K = 22
ce = cross.entropy(project, K = 22)
ce #run 5 is best for k=22
## K = 22
## run 1 0.8688708
## run 2 0.8675688
## run 3 0.8681413
## run 4 0.8680349
## run 5 0.8669903
color_palette_20 <-
c(
"#F49AC2",
"navy",
"yellow2",
"purple",
"#B20CC9",
"magenta",
"#008080",
"green",
"#77DD77",
"#AE9393",
"#B22222",
"#AE8333",
"#FF8C1A",
"#1E90FF",
"orangered",
"green4",
"purple4",
"#75FAFF",
"blue",
"#FFFF99"
)
using ggplot
best = which.min(cross.entropy(project, K = 20))
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 20, run = best))
# Create a named vector to map countries to regions
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:20)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_20[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_20)
Extract ancestry coefficients for k=20 change to correct matrix
leak20 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K20/run1/r2_0.01_b_r1.20.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak20)
## # A tibble: 6 × 20
## X1 X2 X3 X4 X5 X6 X7 X8 X9
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0264 0.0207 0.0131 1.31e-2 1.90e-2 3.48e-2 2.08e-3 4.19e-3 1.34e-1
## 2 0.0000999 0.0657 0.0185 9.99e-5 9.99e-5 5.16e-2 9.99e-5 6.28e-2 1.70e-1
## 3 0.0000999 0.0000999 0.0000999 9.99e-5 9.99e-5 2.84e-3 9.99e-5 9.99e-5 9.99e-5
## 4 0.0000999 0.0323 0.0000999 8.44e-3 9.99e-5 1.29e-2 1.39e-2 5.77e-3 1.90e-2
## 5 0.0000999 0.00361 0.0146 9.99e-5 1.80e-3 4.09e-3 2.93e-2 9.99e-5 9.99e-5
## 6 0.0000998 0.0000998 0.0000998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 11 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## # X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 2.64096e-02 2.06646e-02 1.31437e-02 1.30614e-02 1.90113e-02
## 2 1002 OKI 9.99460e-05 6.56597e-02 1.84685e-02 9.99460e-05 9.99460e-05
## 3 1003 OKI 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05
## 4 1004 OKI 9.99190e-05 3.23457e-02 9.99190e-05 8.43696e-03 9.99190e-05
## 5 1005 OKI 9.99190e-05 3.61125e-03 1.46469e-02 9.99190e-05 1.79802e-03
## 6 1006 OKI 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05
## X6 X7 X8 X9 X10 X11
## 1 3.48243e-02 2.08108e-03 4.18512e-03 1.34475e-01 9.99820e-05 2.13904e-02
## 2 5.15924e-02 9.99460e-05 6.28295e-02 1.70231e-01 1.02422e-01 2.38672e-02
## 3 2.84154e-03 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05 7.08506e-03
## 4 1.28981e-02 1.38764e-02 5.76749e-03 1.89892e-02 9.99190e-05 9.99190e-05
## 5 4.08688e-03 2.93200e-02 9.99190e-05 9.99190e-05 9.99190e-05 9.99190e-05
## 6 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05
## X12 X13 X14 X15 X16 X17
## 1 9.99820e-05 5.83628e-02 2.90473e-03 1.98357e-02 6.83355e-03 0.459098
## 2 9.99460e-05 7.69412e-03 2.59487e-02 4.60633e-02 9.99460e-05 0.335353
## 3 9.98740e-05 9.98740e-05 2.42597e-04 9.98740e-05 9.98740e-05 0.981889
## 4 2.32811e-02 1.26668e-02 9.99190e-05 3.02842e-02 9.99190e-05 0.834116
## 5 2.96501e-03 9.99190e-05 9.99190e-05 2.97077e-02 9.99190e-05 0.872147
## 6 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 0.998103
## X18 X19 X20
## 1 8.79653e-02 5.75547e-02 1.79983e-02
## 2 2.66581e-02 6.06222e-02 1.99036e-03
## 3 5.65165e-03 9.98740e-05 8.91697e-04
## 4 9.99190e-05 6.43857e-03 9.99190e-05
## 5 1.46587e-02 1.05423e-02 1.56174e-02
## 6 9.98288e-05 9.98288e-05 9.98288e-05
Rename the columns
# Rename the columns starting from the third one
leak20 <- leak20 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak20)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 2.64096e-02 2.06646e-02 1.31437e-02 1.30614e-02 1.90113e-02
## 2 1002 OKI 9.99460e-05 6.56597e-02 1.84685e-02 9.99460e-05 9.99460e-05
## 3 1003 OKI 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05
## 4 1004 OKI 9.99190e-05 3.23457e-02 9.99190e-05 8.43696e-03 9.99190e-05
## 5 1005 OKI 9.99190e-05 3.61125e-03 1.46469e-02 9.99190e-05 1.79802e-03
## 6 1006 OKI 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05
## v6 v7 v8 v9 v10 v11
## 1 3.48243e-02 2.08108e-03 4.18512e-03 1.34475e-01 9.99820e-05 2.13904e-02
## 2 5.15924e-02 9.99460e-05 6.28295e-02 1.70231e-01 1.02422e-01 2.38672e-02
## 3 2.84154e-03 9.98740e-05 9.98740e-05 9.98740e-05 9.98740e-05 7.08506e-03
## 4 1.28981e-02 1.38764e-02 5.76749e-03 1.89892e-02 9.99190e-05 9.99190e-05
## 5 4.08688e-03 2.93200e-02 9.99190e-05 9.99190e-05 9.99190e-05 9.99190e-05
## 6 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05
## v12 v13 v14 v15 v16 v17
## 1 9.99820e-05 5.83628e-02 2.90473e-03 1.98357e-02 6.83355e-03 0.459098
## 2 9.99460e-05 7.69412e-03 2.59487e-02 4.60633e-02 9.99460e-05 0.335353
## 3 9.98740e-05 9.98740e-05 2.42597e-04 9.98740e-05 9.98740e-05 0.981889
## 4 2.32811e-02 1.26668e-02 9.99190e-05 3.02842e-02 9.99190e-05 0.834116
## 5 2.96501e-03 9.99190e-05 9.99190e-05 2.97077e-02 9.99190e-05 0.872147
## 6 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 9.98288e-05 0.998103
## v18 v19 v20
## 1 8.79653e-02 5.75547e-02 1.79983e-02
## 2 2.66581e-02 6.06222e-02 1.99036e-03
## 3 5.65165e-03 9.98740e-05 8.91697e-04
## 4 9.99190e-05 6.43857e-03 9.99190e-05
## 5 1.46587e-02 1.05423e-02 1.56174e-02
## 6 9.98288e-05 9.98288e-05 9.98288e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
color_palette_20 <-
c(
"#008080",
"magenta",
"#FF8C1A",
"green4",
"#B22222",
"#F49AC2",
"green",
"blue",
"#AE9393",
"yellow2",
"purple4",
"#75FAFF",
"#77DD77",
"#1E90FF",
"#B20CC9",
"#FFFF99",
"goldenrod",
"navy",
"orangered",
"purple"
)
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak20 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:20)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_20[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=20.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_20) +
expand_limits(y = c(0, 1.5))
color_palette_12 <-
c(
"#FF8C1A",
"yellow2",
"#1E90FF",
"#B22222",
"#F49AC2",
"blue",
"#75FAFF",
"magenta",
"green4",
"green",
"purple",
"orangered"
)
Mean admixture by country using ggplot
best = which.min(cross.entropy(project, K = 12)) #5
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 12, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:12)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_12[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=12.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_12)
Extract ancestry coefficients for k=12
change to correct matrix
leak12 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K12/run5/r2_0.01_b_r5.12.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak12)
## # A tibble: 6 × 12
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4.23e-2 9.77e-2 1.00e-4 4.21e-2 0.590 6.21e-2 2.51e-2 2.33e-2 2.21e-2 1.06e-2
## 2 3.17e-2 9.75e-2 1.31e-2 3.03e-2 0.479 8.86e-2 9.17e-2 1.00e-4 2.79e-2 5.20e-2
## 3 5.68e-3 1.17e-3 9.99e-5 9.99e-5 0.986 9.99e-5 9.99e-5 9.99e-5 5.94e-3 9.99e-5
## 4 1.80e-3 7.41e-3 3.62e-2 1.00e-4 0.827 4.48e-2 3.40e-2 1.00e-4 1.00e-4 4.86e-2
## 5 9.99e-5 9.99e-5 2.49e-2 1.37e-2 0.878 2.00e-2 2.47e-2 9.99e-5 9.99e-5 3.83e-2
## 6 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## # ℹ 2 more variables: X11 <dbl>, X12 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 4.23137e-02 9.77107e-02 9.99910e-05 4.20743e-02 0.589946 6.21370e-02
## 2 1002 OKI 3.16848e-02 9.74647e-02 1.30881e-02 3.03256e-02 0.478817 8.86259e-02
## 3 1003 OKI 5.67710e-03 1.16962e-03 9.99280e-05 9.99280e-05 0.986417 9.99280e-05
## 4 1004 OKI 1.79768e-03 7.41490e-03 3.62091e-02 9.99550e-05 0.826658 4.47996e-02
## 5 1005 OKI 9.99460e-05 9.99460e-05 2.48609e-02 1.37150e-02 0.877755 2.00195e-02
## 6 1006 OKI 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05 0.998901 9.99009e-05
## X7 X8 X9 X10 X11 X12
## 1 2.50839e-02 2.32812e-02 2.20865e-02 1.05599e-02 2.55935e-02 5.91129e-02
## 2 9.16646e-02 9.99910e-05 2.78675e-02 5.20009e-02 3.17949e-02 5.65665e-02
## 3 9.99280e-05 9.99280e-05 5.93690e-03 9.99280e-05 9.99280e-05 9.99280e-05
## 4 3.39939e-02 9.99550e-05 9.99550e-05 4.86267e-02 9.99550e-05 9.99550e-05
## 5 2.47145e-02 9.99460e-05 9.99460e-05 3.83352e-02 9.99460e-05 9.99460e-05
## 6 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05
Rename the columns
# Rename the columns starting from the third one
leak12 <- leak12 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak12)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 4.23137e-02 9.77107e-02 9.99910e-05 4.20743e-02 0.589946 6.21370e-02
## 2 1002 OKI 3.16848e-02 9.74647e-02 1.30881e-02 3.03256e-02 0.478817 8.86259e-02
## 3 1003 OKI 5.67710e-03 1.16962e-03 9.99280e-05 9.99280e-05 0.986417 9.99280e-05
## 4 1004 OKI 1.79768e-03 7.41490e-03 3.62091e-02 9.99550e-05 0.826658 4.47996e-02
## 5 1005 OKI 9.99460e-05 9.99460e-05 2.48609e-02 1.37150e-02 0.877755 2.00195e-02
## 6 1006 OKI 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05 0.998901 9.99009e-05
## v7 v8 v9 v10 v11 v12
## 1 2.50839e-02 2.32812e-02 2.20865e-02 1.05599e-02 2.55935e-02 5.91129e-02
## 2 9.16646e-02 9.99910e-05 2.78675e-02 5.20009e-02 3.17949e-02 5.65665e-02
## 3 9.99280e-05 9.99280e-05 5.93690e-03 9.99280e-05 9.99280e-05 9.99280e-05
## 4 3.39939e-02 9.99550e-05 9.99550e-05 4.86267e-02 9.99550e-05 9.99550e-05
## 5 2.47145e-02 9.99460e-05 9.99460e-05 3.83352e-02 9.99460e-05 9.99460e-05
## 6 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05 9.99009e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak12 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:12)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_12[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=12.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_12) +
expand_limits(y = c(0, 1.5))
color_palette_15 <-
c(
"#FF8C1A",
"yellow2",
"#1E90FF",
"#B22222",
"#AE8333",
"#B20CC9",
"#F49AC2",
"blue",
"purple4",
"#75FAFF",
"magenta",
"green4",
"green",
"purple",
"orangered"
)
using ggplot
best = which.min(cross.entropy(project, K = 15)) #5
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 15, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:15)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_15[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=15.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_15)
Extract ancestry coefficients for k=15
change to correct matrix
leak15 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K15/run5/r2_0.01_b_r5.15.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak15)
## # A tibble: 6 × 15
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.11e-1 2.61e-2 9.82e-2 3.86e-2 8.51e-3 1.07e-2 0.557 3.97e-2 9.31e-3 1.00e-4
## 2 1.06e-1 1.35e-2 2.39e-2 4.93e-2 1.00e-4 5.54e-2 0.466 1.62e-2 9.45e-2 4.65e-2
## 3 9.99e-5 9.99e-5 9.99e-5 9.99e-5 6.81e-3 9.99e-5 0.969 9.99e-5 1.16e-2 9.99e-5
## 4 1.72e-2 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.842 9.99e-5 9.99e-5 2.37e-2
## 5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 1.04e-2 0.888 2.61e-2 9.99e-5 7.55e-3
## 6 9.99e-5 9.99e-5 9.99e-5 9.99e-5 4.27e-3 9.99e-5 0.994 9.99e-5 9.99e-5 9.99e-5
## # ℹ 5 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 0.111112000 0.026075100 0.098216100 0.038634700 0.008512500
## 2 1002 OKI 0.105582000 0.013530700 0.023863800 0.049315600 0.000099982
## 3 1003 OKI 0.000099910 0.000099910 0.000099910 0.000099910 0.006810880
## 4 1004 OKI 0.017170000 0.000099919 0.000099919 0.000099919 0.000099919
## 5 1005 OKI 0.000099928 0.000099928 0.000099928 0.000099928 0.000099928
## 6 1006 OKI 0.000099883 0.000099883 0.000099883 0.000099883 0.004273470
## X6 X7 X8 X9 X10 X11
## 1 0.010740000 0.556509 0.039697900 0.009314820 0.000099982 0.001370110
## 2 0.055394000 0.465974 0.016217400 0.094522000 0.046479000 0.006631220
## 3 0.000099910 0.969329 0.000099910 0.011603400 0.000099910 0.000099910
## 4 0.000099919 0.842212 0.000099919 0.000099919 0.023659600 0.030946600
## 5 0.010440600 0.887931 0.026139900 0.000099928 0.007549700 0.010461000
## 6 0.000099883 0.994428 0.000099883 0.000099883 0.000099883 0.000099883
## X12 X13 X14 X15
## 1 0.042326300 0.034872400 0.000099982 0.022419200
## 2 0.012075400 0.016319500 0.000099982 0.093895300
## 3 0.000909957 0.010347400 0.000099910 0.000099910
## 4 0.000099919 0.000099919 0.077234200 0.007877870
## 5 0.000099928 0.006599350 0.050079000 0.000099928
## 6 0.000099883 0.000099883 0.000099883 0.000099883
Rename the columns
# Rename the columns starting from the third one
leak15 <- leak15 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak15)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 0.111112000 0.026075100 0.098216100 0.038634700 0.008512500
## 2 1002 OKI 0.105582000 0.013530700 0.023863800 0.049315600 0.000099982
## 3 1003 OKI 0.000099910 0.000099910 0.000099910 0.000099910 0.006810880
## 4 1004 OKI 0.017170000 0.000099919 0.000099919 0.000099919 0.000099919
## 5 1005 OKI 0.000099928 0.000099928 0.000099928 0.000099928 0.000099928
## 6 1006 OKI 0.000099883 0.000099883 0.000099883 0.000099883 0.004273470
## v6 v7 v8 v9 v10 v11
## 1 0.010740000 0.556509 0.039697900 0.009314820 0.000099982 0.001370110
## 2 0.055394000 0.465974 0.016217400 0.094522000 0.046479000 0.006631220
## 3 0.000099910 0.969329 0.000099910 0.011603400 0.000099910 0.000099910
## 4 0.000099919 0.842212 0.000099919 0.000099919 0.023659600 0.030946600
## 5 0.010440600 0.887931 0.026139900 0.000099928 0.007549700 0.010461000
## 6 0.000099883 0.994428 0.000099883 0.000099883 0.000099883 0.000099883
## v12 v13 v14 v15
## 1 0.042326300 0.034872400 0.000099982 0.022419200
## 2 0.012075400 0.016319500 0.000099982 0.093895300
## 3 0.000909957 0.010347400 0.000099910 0.000099910
## 4 0.000099919 0.000099919 0.077234200 0.007877870
## 5 0.000099928 0.006599350 0.050079000 0.000099928
## 6 0.000099883 0.000099883 0.000099883 0.000099883
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak15 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:15)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_15[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=15.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_15) +
expand_limits(y = c(0, 1.5))
using ggplot
best = which.min(cross.entropy(project, K = 6)) #5
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 6, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:6)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_6[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=6.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_6)
Extract ancestry coefficients for k=6
change to correct matrix
leak6 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K6/run5/r2_0.01_b_r5.6.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak6)
## # A tibble: 6 × 6
## X1 X2 X3 X4 X5 X6
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.174 0.172 0.150 0.350 0.131 0.0241
## 2 0.205 0.197 0.170 0.293 0.121 0.0131
## 3 0.160 0.156 0.123 0.400 0.117 0.0444
## 4 0.199 0.204 0.119 0.389 0.0885 0.000100
## 5 0.180 0.218 0.113 0.404 0.0847 0.000100
## 6 0.203 0.187 0.0976 0.404 0.108 0.000100
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 0.173708 0.171848 0.1501010 0.349694 0.1305770 0.024071800
## 2 1002 OKI 0.205148 0.197109 0.1701940 0.292993 0.1214940 0.013061600
## 3 1003 OKI 0.159720 0.156319 0.1226130 0.399926 0.1169840 0.044438900
## 4 1004 OKI 0.198631 0.204137 0.1192340 0.389391 0.0885066 0.000099991
## 5 1005 OKI 0.179729 0.218195 0.1134380 0.403849 0.0846894 0.000099991
## 6 1006 OKI 0.202770 0.187071 0.0975617 0.404205 0.1082920 0.000099991
Rename the columns
# Rename the columns starting from the third one
leak6 <- leak6 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak6)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 0.173708 0.171848 0.1501010 0.349694 0.1305770 0.024071800
## 2 1002 OKI 0.205148 0.197109 0.1701940 0.292993 0.1214940 0.013061600
## 3 1003 OKI 0.159720 0.156319 0.1226130 0.399926 0.1169840 0.044438900
## 4 1004 OKI 0.198631 0.204137 0.1192340 0.389391 0.0885066 0.000099991
## 5 1005 OKI 0.179729 0.218195 0.1134380 0.403849 0.0846894 0.000099991
## 6 1006 OKI 0.202770 0.187071 0.0975617 0.404205 0.1082920 0.000099991
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak6 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:6)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_6[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=6.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_6) +
expand_limits(y = c(0, 1.5))
color_palette_22 <-
c(
"#FF8C1A",
"yellow2",
"#77DD77",
"chocolate4",
"#B22222",
"purple",
"#B20CC9",
"#F49AC2",
"blue",
"#1E90FF",
"purple4",
"#FFFF99",
"#75FAFF",
"#AE9393",
"magenta",
"green4",
"navy",
"green",
"#008080",
"goldenrod3",
"coral",
"orangered2"
)
using ggplot
best = which.min(cross.entropy(project, K = 22)) #5
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 22, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:22)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_22[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_22)
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "MAF_1", "LEA_admixture_by_country_euro_global_k22_r01_MAF1.pdf"
),
width = 10,
height = 7,
units = "in"
)
Using ggplot2 for individual admixtures
Extract ancestry coefficients for k=22
change to correct matrix
leak22 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K22/run5/r2_0.01_b_r5.22.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak22)
## # A tibble: 6 × 22
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.13e-1 9.99e-5 9.99e-5 9.99e-5 0.494 5.05e-2 9.99e-5 1.79e-3 3.43e-2 5.14e-2
## 2 4.86e-2 1.00e-4 1.03e-2 2.78e-3 0.319 1.03e-3 3.56e-2 8.74e-3 6.26e-2 6.63e-2
## 3 9.98e-5 9.98e-5 9.98e-5 9.98e-5 0.980 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## 4 9.99e-5 9.99e-5 5.88e-3 3.40e-2 0.817 9.99e-5 1.80e-2 9.99e-5 5.80e-2 2.02e-2
## 5 9.99e-5 9.99e-5 9.99e-5 3.84e-3 0.844 5.71e-3 1.21e-2 9.99e-5 6.06e-2 6.65e-3
## 6 9.98e-5 9.98e-5 9.98e-5 9.98e-5 0.998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 12 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## # X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>, X21 <dbl>, X22 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 1.12653e-01 9.99280e-05 9.99280e-05 9.99280e-05 0.493779 5.05374e-02
## 2 1002 OKI 4.86145e-02 9.99550e-05 1.02890e-02 2.77542e-03 0.319323 1.02586e-03
## 3 1003 OKI 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 0.979697 9.98380e-05
## 4 1004 OKI 9.98920e-05 9.98920e-05 5.88100e-03 3.40031e-02 0.817342 9.98920e-05
## 5 1005 OKI 9.98830e-05 9.98830e-05 9.98830e-05 3.84252e-03 0.844145 5.71364e-03
## 6 1006 OKI 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 0.997904 9.98108e-05
## X7 X8 X9 X10 X11 X12
## 1 9.99280e-05 1.79134e-03 3.42946e-02 5.13805e-02 6.00100e-02 2.66775e-02
## 2 3.56242e-02 8.73696e-03 6.26307e-02 6.63284e-02 1.72237e-02 3.31449e-02
## 3 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05
## 4 1.80011e-02 9.98920e-05 5.80196e-02 2.01962e-02 2.09973e-02 9.98920e-05
## 5 1.21276e-02 9.98830e-05 6.05534e-02 6.65074e-03 2.46074e-02 9.98830e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
## X13 X14 X15 X16 X17 X18
## 1 9.99280e-05 9.99280e-05 1.24226e-02 5.42137e-02 2.79981e-02 9.00488e-03
## 2 9.99550e-05 9.26243e-02 2.57066e-02 1.43312e-01 7.71757e-02 9.99550e-05
## 3 9.98380e-05 9.98380e-05 9.98380e-05 4.15423e-03 9.98380e-05 4.38346e-03
## 4 9.98920e-05 1.39880e-02 9.98920e-05 8.58207e-03 1.79089e-03 9.98920e-05
## 5 9.98830e-05 9.98830e-05 9.98830e-05 2.81391e-02 9.98830e-05 9.98830e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
## X19 X20 X21 X22
## 1 3.64674e-02 2.79699e-02 9.99280e-05 9.99280e-05
## 2 3.60504e-02 1.89146e-02 9.99550e-05 9.99550e-05
## 3 9.98380e-05 9.98380e-05 9.96870e-03 9.98380e-05
## 4 9.98920e-05 9.98920e-05 9.98920e-05 9.98920e-05
## 5 9.98830e-05 1.29216e-02 9.98830e-05 9.98830e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
Rename the columns
# Rename the columns starting from the third one
leak22 <- leak22 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak22)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 1.12653e-01 9.99280e-05 9.99280e-05 9.99280e-05 0.493779 5.05374e-02
## 2 1002 OKI 4.86145e-02 9.99550e-05 1.02890e-02 2.77542e-03 0.319323 1.02586e-03
## 3 1003 OKI 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 0.979697 9.98380e-05
## 4 1004 OKI 9.98920e-05 9.98920e-05 5.88100e-03 3.40031e-02 0.817342 9.98920e-05
## 5 1005 OKI 9.98830e-05 9.98830e-05 9.98830e-05 3.84252e-03 0.844145 5.71364e-03
## 6 1006 OKI 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 0.997904 9.98108e-05
## v7 v8 v9 v10 v11 v12
## 1 9.99280e-05 1.79134e-03 3.42946e-02 5.13805e-02 6.00100e-02 2.66775e-02
## 2 3.56242e-02 8.73696e-03 6.26307e-02 6.63284e-02 1.72237e-02 3.31449e-02
## 3 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05 9.98380e-05
## 4 1.80011e-02 9.98920e-05 5.80196e-02 2.01962e-02 2.09973e-02 9.98920e-05
## 5 1.21276e-02 9.98830e-05 6.05534e-02 6.65074e-03 2.46074e-02 9.98830e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
## v13 v14 v15 v16 v17 v18
## 1 9.99280e-05 9.99280e-05 1.24226e-02 5.42137e-02 2.79981e-02 9.00488e-03
## 2 9.99550e-05 9.26243e-02 2.57066e-02 1.43312e-01 7.71757e-02 9.99550e-05
## 3 9.98380e-05 9.98380e-05 9.98380e-05 4.15423e-03 9.98380e-05 4.38346e-03
## 4 9.98920e-05 1.39880e-02 9.98920e-05 8.58207e-03 1.79089e-03 9.98920e-05
## 5 9.98830e-05 9.98830e-05 9.98830e-05 2.81391e-02 9.98830e-05 9.98830e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
## v19 v20 v21 v22
## 1 3.64674e-02 2.79699e-02 9.99280e-05 9.99280e-05
## 2 3.60504e-02 1.89146e-02 9.99550e-05 9.99550e-05
## 3 9.98380e-05 9.98380e-05 9.96870e-03 9.98380e-05
## 4 9.98920e-05 9.98920e-05 9.98920e-05 9.98920e-05
## 5 9.98830e-05 1.29216e-02 9.98830e-05 9.98830e-05
## 6 9.98108e-05 9.98108e-05 9.98108e-05 9.98108e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak22 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:22)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_22[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=22.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_22) +
expand_limits(y = c(0, 1.5))
## K = 23
## run 1 0.8674120
## run 2 0.8675108
## run 3 0.8679872
## run 4 0.8671792
## run 5 0.8675925
color_palette_23 <-
c(
"purple4",
"purple",
"orangered",
"#FF8C1A",
"#F49AC2",
"magenta",
"#AE9393",
"#FFFF99",
"orchid1",
"#1E90FF",
"chocolate4",
"#B20CC9",
"goldenrod",
"#77DD77",
"blue",
"navy",
"green",
"green4",
"#B22222",
"#008080",
"coral",
"yellow2",
"#75FAFF"
)
using ggplot
best = which.min(cross.entropy(project, K = 23))
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 23, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:23)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_23[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_23)
Extract ancestry coefficients for k=23
change to correct matrix
leak23 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K23/run4/r2_0.01_b_r4.23.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak23)
## # A tibble: 6 × 23
## X1 X2 X3 X4 X5 X6 X7 X8 X9
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0000999 0.172 0.0000999 3.30e-2 2.70e-2 9.99e-5 9.99e-5 4.42e-2 2.55e-2
## 2 0.0000999 0.175 0.0000999 1.81e-2 5.98e-2 3.26e-2 9.99e-5 9.96e-2 2.33e-2
## 3 0.0000998 0.0000998 0.0000998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 5.34e-3
## 4 0.0000998 0.998 0.0000998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## 5 0.0000998 0.998 0.0000998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## 6 0.0000998 0.0000998 0.0000998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 14 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## # X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>,
## # X21 <dbl>, X22 <dbl>, X23 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 9.99190e-05 1.72014e-01 9.99190e-05 3.29890e-02 2.69674e-02
## 2 1002 OKI 9.99460e-05 1.75082e-01 9.99460e-05 1.81437e-02 5.98223e-02
## 3 1003 OKI 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05
## 4 1004 OKI 9.98021e-05 9.97804e-01 9.98021e-05 9.98021e-05 9.98021e-05
## 5 1005 OKI 9.98018e-05 9.97804e-01 9.98018e-05 9.98018e-05 9.98018e-05
## 6 1006 OKI 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05
## X6 X7 X8 X9 X10 X11
## 1 9.99190e-05 9.99190e-05 4.42102e-02 2.55167e-02 9.99190e-05 9.99190e-05
## 2 3.26238e-02 9.99460e-05 9.96464e-02 2.32976e-02 1.37268e-02 2.23341e-02
## 3 9.98201e-05 9.98201e-05 9.98201e-05 5.34090e-03 9.98201e-05 9.98201e-05
## 4 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 5 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05
## 6 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05
## X12 X13 X14 X15 X16 X17
## 1 3.14792e-02 6.89065e-02 3.26504e-02 9.99190e-05 9.99190e-05 1.17634e-01
## 2 2.32446e-03 7.27989e-03 7.33037e-02 2.86953e-02 9.99460e-05 1.57901e-01
## 3 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 1.08933e-02
## 4 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 5 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05
## 6 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05
## X18 X19 X20 X21 X22 X23
## 1 2.25024e-02 9.99190e-05 1.01914e-01 2.37851e-01 5.57836e-02 2.86824e-02
## 2 3.26230e-02 9.99460e-05 7.49364e-02 1.37089e-01 4.05708e-02 9.99460e-05
## 3 9.98201e-05 9.98201e-05 9.98201e-05 9.81769e-01 9.98201e-05 9.98201e-05
## 4 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 5 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05
## 6 9.98016e-05 9.98016e-05 9.98016e-05 9.97804e-01 9.98016e-05 9.98016e-05
Rename the columns
# Rename the columns starting from the third one
leak23 <- leak23 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak23)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 9.99190e-05 1.72014e-01 9.99190e-05 3.29890e-02 2.69674e-02
## 2 1002 OKI 9.99460e-05 1.75082e-01 9.99460e-05 1.81437e-02 5.98223e-02
## 3 1003 OKI 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05
## 4 1004 OKI 9.98021e-05 9.97804e-01 9.98021e-05 9.98021e-05 9.98021e-05
## 5 1005 OKI 9.98018e-05 9.97804e-01 9.98018e-05 9.98018e-05 9.98018e-05
## 6 1006 OKI 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05
## v6 v7 v8 v9 v10 v11
## 1 9.99190e-05 9.99190e-05 4.42102e-02 2.55167e-02 9.99190e-05 9.99190e-05
## 2 3.26238e-02 9.99460e-05 9.96464e-02 2.32976e-02 1.37268e-02 2.23341e-02
## 3 9.98201e-05 9.98201e-05 9.98201e-05 5.34090e-03 9.98201e-05 9.98201e-05
## 4 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 5 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05
## 6 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05
## v12 v13 v14 v15 v16 v17
## 1 3.14792e-02 6.89065e-02 3.26504e-02 9.99190e-05 9.99190e-05 1.17634e-01
## 2 2.32446e-03 7.27989e-03 7.33037e-02 2.86953e-02 9.99460e-05 1.57901e-01
## 3 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 9.98201e-05 1.08933e-02
## 4 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 5 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05
## 6 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05 9.98016e-05
## v18 v19 v20 v21 v22 v23
## 1 2.25024e-02 9.99190e-05 1.01914e-01 2.37851e-01 5.57836e-02 2.86824e-02
## 2 3.26230e-02 9.99460e-05 7.49364e-02 1.37089e-01 4.05708e-02 9.99460e-05
## 3 9.98201e-05 9.98201e-05 9.98201e-05 9.81769e-01 9.98201e-05 9.98201e-05
## 4 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05 9.98021e-05
## 5 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05 9.98018e-05
## 6 9.98016e-05 9.98016e-05 9.98016e-05 9.97804e-01 9.98016e-05 9.98016e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak23 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:23)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_23[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=23.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_23) +
expand_limits(y = c(0, 1.5))
## K = 21
## run 1 0.8678011
## run 2 0.8674450
## run 3 0.8686613
## run 4 0.8677728
## run 5 0.8673978
color_palette_21 <-
c(
"#FF8C1A",
"yellow2",
"#77DD77",
"chocolate4",
"#B22222",
"purple",
"#B20CC9",
"#F49AC2",
"blue",
"#1E90FF",
"purple4",
"#FFFF99",
"#75FAFF",
"#AE9393",
"magenta",
"green4",
"navy",
"green",
"#008080",
"goldenrod3",
"orangered2"
)
using ggplot
best = which.min(cross.entropy(project, K = 21))
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 21, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:21)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_21[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_21)
Extract ancestry coefficients for k=21
change to correct matrix
leak21 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K21/run5/r2_0.01_b_r5.21.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak21)
## # A tibble: 6 × 21
## X1 X2 X3 X4 X5 X6 X7 X8 X9
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0275 0.0000999 0.0000999 4.04e-3 9.99e-5 1.79e-2 1.46e-3 3.42e-2 8.64e-2
## 2 0.0148 0.0115 0.0454 1.00e-4 1.00e-4 3.47e-4 5.23e-3 1.04e-2 2.87e-2
## 3 0.0000998 0.0000998 0.0000998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## 4 0.0000999 0.0000999 0.0000999 9.99e-5 5.75e-2 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 5 0.00210 0.0000999 0.0000999 9.99e-5 3.13e-2 9.99e-5 9.99e-5 5.84e-3 9.99e-5
## 6 0.0000998 0.0000998 0.0000998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 12 more variables: X10 <dbl>, X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>,
## # X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>, X21 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 2.75462e-02 9.99460e-05 9.99460e-05 4.04442e-03 9.99460e-05
## 2 1002 OKI 1.48332e-02 1.14721e-02 4.54085e-02 9.99640e-05 9.99640e-05
## 3 1003 OKI 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05
## 4 1004 OKI 9.98941e-05 9.98941e-05 9.98941e-05 9.98941e-05 5.74840e-02
## 5 1005 OKI 2.09787e-03 9.98830e-05 9.98830e-05 9.98830e-05 3.12814e-02
## 6 1006 OKI 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
## X6 X7 X8 X9 X10 X11
## 1 1.79090e-02 1.46433e-03 3.41894e-02 8.64120e-02 2.89535e-02 7.82764e-02
## 2 3.47153e-04 5.23083e-03 1.04371e-02 2.87482e-02 9.99640e-05 1.11851e-01
## 3 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05
## 4 9.98941e-05 9.98941e-05 9.98941e-05 9.98941e-05 1.10122e-03 7.62143e-04
## 5 9.98830e-05 9.98830e-05 5.84309e-03 9.98830e-05 9.98830e-05 1.28451e-02
## 6 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
## X12 X13 X14 X15 X16 X17
## 1 9.99460e-05 0.478998 2.65349e-02 4.93407e-02 1.85118e-02 9.99460e-05
## 2 5.74222e-02 0.335978 1.95733e-02 6.52214e-02 6.26335e-02 2.71121e-04
## 3 9.98381e-05 0.975063 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05
## 4 4.50163e-03 0.860375 9.98941e-05 9.98941e-05 9.98941e-05 8.72065e-03
## 5 9.60980e-03 0.890601 9.98830e-05 9.98830e-05 6.48132e-03 9.98830e-05
## 6 9.98199e-05 0.998004 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
## X18 X19 X20 X21
## 1 6.87504e-02 1.70532e-02 6.14159e-02 9.99460e-05
## 2 6.91112e-02 1.13466e-01 4.75945e-02 9.99640e-05
## 3 9.98381e-05 3.59147e-03 9.98381e-05 1.95484e-02
## 4 9.98941e-05 5.07547e-03 5.91446e-02 1.63704e-03
## 5 9.98830e-05 9.98830e-05 3.99420e-02 9.98830e-05
## 6 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
Rename the columns
# Rename the columns starting from the third one
leak21 <- leak21 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak21)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 2.75462e-02 9.99460e-05 9.99460e-05 4.04442e-03 9.99460e-05
## 2 1002 OKI 1.48332e-02 1.14721e-02 4.54085e-02 9.99640e-05 9.99640e-05
## 3 1003 OKI 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05
## 4 1004 OKI 9.98941e-05 9.98941e-05 9.98941e-05 9.98941e-05 5.74840e-02
## 5 1005 OKI 2.09787e-03 9.98830e-05 9.98830e-05 9.98830e-05 3.12814e-02
## 6 1006 OKI 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
## v6 v7 v8 v9 v10 v11
## 1 1.79090e-02 1.46433e-03 3.41894e-02 8.64120e-02 2.89535e-02 7.82764e-02
## 2 3.47153e-04 5.23083e-03 1.04371e-02 2.87482e-02 9.99640e-05 1.11851e-01
## 3 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05
## 4 9.98941e-05 9.98941e-05 9.98941e-05 9.98941e-05 1.10122e-03 7.62143e-04
## 5 9.98830e-05 9.98830e-05 5.84309e-03 9.98830e-05 9.98830e-05 1.28451e-02
## 6 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
## v12 v13 v14 v15 v16 v17
## 1 9.99460e-05 0.478998 2.65349e-02 4.93407e-02 1.85118e-02 9.99460e-05
## 2 5.74222e-02 0.335978 1.95733e-02 6.52214e-02 6.26335e-02 2.71121e-04
## 3 9.98381e-05 0.975063 9.98381e-05 9.98381e-05 9.98381e-05 9.98381e-05
## 4 4.50163e-03 0.860375 9.98941e-05 9.98941e-05 9.98941e-05 8.72065e-03
## 5 9.60980e-03 0.890601 9.98830e-05 9.98830e-05 6.48132e-03 9.98830e-05
## 6 9.98199e-05 0.998004 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
## v18 v19 v20 v21
## 1 6.87504e-02 1.70532e-02 6.14159e-02 9.99460e-05
## 2 6.91112e-02 1.13466e-01 4.75945e-02 9.99640e-05
## 3 9.98381e-05 3.59147e-03 9.98381e-05 1.95484e-02
## 4 9.98941e-05 5.07547e-03 5.91446e-02 1.63704e-03
## 5 9.98830e-05 9.98830e-05 3.99420e-02 9.98830e-05
## 6 9.98199e-05 9.98199e-05 9.98199e-05 9.98199e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak21 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:21)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_21[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=21.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_21) +
expand_limits(y = c(0, 1.5))
## K = 7
## run 1 0.8793121
## run 2 0.8800735
## run 3 0.8802178
## run 4 0.8798409
## run 5 0.8793035
using ggplot
best = which.min(cross.entropy(project, K = 7))
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:7)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_7[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_7)
Extract ancestry coefficients for k=7
change to correct matrix
leak7 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K7/run5/r2_0.01_b_r5.7.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak7)
## # A tibble: 6 × 7
## X1 X2 X3 X4 X5 X6 X7
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0756 0.0293 0.0751 0.0622 0.0574 0.0800 0.620
## 2 0.0512 0.0620 0.0632 0.0807 0.0627 0.125 0.556
## 3 0.000100 0.000100 0.0497 0.0150 0.0934 0.0266 0.815
## 4 0.0159 0.00257 0.0664 0.0291 0.0366 0.0927 0.757
## 5 0.000100 0.000100 0.0483 0.0153 0.0423 0.0944 0.800
## 6 0.000100 0.000100 0.0239 0.00847 0.0909 0.0414 0.835
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 0.075587600 0.029332800 0.0750965 0.06223380 0.0574401 0.0799922
## 2 1002 OKI 0.051196200 0.062006400 0.0631862 0.08066190 0.0626840 0.1246690
## 3 1003 OKI 0.000099982 0.000099982 0.0496697 0.01503730 0.0933872 0.0266181
## 4 1004 OKI 0.015876700 0.002571750 0.0663977 0.02908000 0.0366296 0.0927472
## 5 1005 OKI 0.000099982 0.000099982 0.0482861 0.01532970 0.0423157 0.0943542
## 6 1006 OKI 0.000099982 0.000099982 0.0239108 0.00847465 0.0909052 0.0413913
## X7
## 1 0.620317
## 2 0.555597
## 3 0.815088
## 4 0.756697
## 5 0.799514
## 6 0.835118
Rename the columns
# Rename the columns starting from the third one
leak7 <- leak7 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak7)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 0.075587600 0.029332800 0.0750965 0.06223380 0.0574401 0.0799922
## 2 1002 OKI 0.051196200 0.062006400 0.0631862 0.08066190 0.0626840 0.1246690
## 3 1003 OKI 0.000099982 0.000099982 0.0496697 0.01503730 0.0933872 0.0266181
## 4 1004 OKI 0.015876700 0.002571750 0.0663977 0.02908000 0.0366296 0.0927472
## 5 1005 OKI 0.000099982 0.000099982 0.0482861 0.01532970 0.0423157 0.0943542
## 6 1006 OKI 0.000099982 0.000099982 0.0239108 0.00847465 0.0909052 0.0413913
## v7
## 1 0.620317
## 2 0.555597
## 3 0.815088
## 4 0.756697
## 5 0.799514
## 6 0.835118
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak7 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:7)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_7[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_7) +
expand_limits(y = c(0, 1.5))
## K = 18
## run 1 0.8678589
## run 2 0.8684983
## run 3 0.8697050
## run 4 0.8691746
## run 5 0.8682401
color_palette_18 <-
c(
"#77DD77",
"yellow2",
"navy",
"#FFFF99",
"#AE9393",
"orangered2",
"#FF8C1A",
"chocolate4",
"#B22222",
"magenta",
"purple4",
"#1E90FF",
"purple",
"#008080",
"green",
"#75FAFF",
"blue",
"#F49AC2"
)
using ggplot
best = which.min(cross.entropy(project, K = 18))
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 18, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:18)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_18[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_18)
Extract ancestry coefficients for k=18
change to correct matrix
leak18 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K18/run5/r2_0.01_b_r5.18.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak18)
## # A tibble: 6 × 18
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3.07e-2 0.508 2.43e-2 1.00e-4 7.64e-3 1.97e-3 8.55e-3 1.03e-1 3.57e-2 8.77e-2
## 2 1.00e-4 0.320 6.73e-2 1.00e-4 1.56e-2 1.00e-4 1.00e-4 2.00e-2 9.73e-2 1.59e-1
## 3 7.75e-3 0.976 9.99e-5 7.81e-3 2.44e-3 9.99e-5 2.13e-3 9.99e-5 9.99e-5 9.99e-5
## 4 9.99e-5 0.854 1.58e-2 9.99e-5 9.99e-5 9.99e-5 9.99e-5 1.08e-2 4.80e-3 4.44e-2
## 5 2.30e-4 0.898 1.23e-2 9.99e-5 9.99e-5 2.88e-3 9.99e-5 9.99e-5 9.99e-5 2.98e-2
## 6 9.98e-5 0.998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 8 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## # X16 <dbl>, X17 <dbl>, X18 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 3.06744e-02 0.508398 2.42502e-02 9.99550e-05 7.63745e-03 1.96511e-03
## 2 1002 OKI 9.99550e-05 0.320112 6.72566e-02 9.99550e-05 1.56024e-02 9.99550e-05
## 3 1003 OKI 7.75016e-03 0.975955 9.98920e-05 7.80961e-03 2.44196e-03 9.98920e-05
## 4 1004 OKI 9.99280e-05 0.853619 1.57864e-02 9.99280e-05 9.99280e-05 9.99280e-05
## 5 1005 OKI 2.29525e-04 0.898306 1.23362e-02 9.99280e-05 9.99280e-05 2.87996e-03
## 6 1006 OKI 9.98468e-05 0.998303 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05
## X7 X8 X9 X10 X11 X12
## 1 8.54832e-03 1.03097e-01 3.56559e-02 8.76786e-02 9.99550e-05 6.91777e-02
## 2 9.99550e-05 1.99509e-02 9.73101e-02 1.59343e-01 2.04739e-02 3.14025e-02
## 3 2.12764e-03 9.98920e-05 9.98920e-05 9.98920e-05 9.98920e-05 9.98920e-05
## 4 9.99280e-05 1.07794e-02 4.80383e-03 4.43832e-02 1.58909e-02 9.99280e-05
## 5 9.99280e-05 9.99280e-05 9.99280e-05 2.97674e-02 2.18784e-02 2.08629e-03
## 6 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05
## X13 X14 X15 X16 X17 X18
## 1 3.88692e-02 9.99550e-05 9.99550e-05 9.99550e-05 5.42348e-02 2.93138e-02
## 2 4.06099e-02 9.99550e-05 3.87547e-02 1.12092e-01 1.60328e-02 6.05588e-02
## 3 9.98920e-05 9.98920e-05 9.98920e-05 2.71680e-03 9.98920e-05 9.98920e-05
## 4 9.99280e-05 8.33977e-03 2.62039e-02 9.99280e-05 1.30858e-02 6.30877e-03
## 5 9.99280e-05 9.99280e-05 1.43956e-02 9.99280e-05 3.06607e-03 1.42552e-02
## 6 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05
Rename the columns
# Rename the columns starting from the third one
leak18 <- leak18 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak18)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 3.06744e-02 0.508398 2.42502e-02 9.99550e-05 7.63745e-03 1.96511e-03
## 2 1002 OKI 9.99550e-05 0.320112 6.72566e-02 9.99550e-05 1.56024e-02 9.99550e-05
## 3 1003 OKI 7.75016e-03 0.975955 9.98920e-05 7.80961e-03 2.44196e-03 9.98920e-05
## 4 1004 OKI 9.99280e-05 0.853619 1.57864e-02 9.99280e-05 9.99280e-05 9.99280e-05
## 5 1005 OKI 2.29525e-04 0.898306 1.23362e-02 9.99280e-05 9.99280e-05 2.87996e-03
## 6 1006 OKI 9.98468e-05 0.998303 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05
## v7 v8 v9 v10 v11 v12
## 1 8.54832e-03 1.03097e-01 3.56559e-02 8.76786e-02 9.99550e-05 6.91777e-02
## 2 9.99550e-05 1.99509e-02 9.73101e-02 1.59343e-01 2.04739e-02 3.14025e-02
## 3 2.12764e-03 9.98920e-05 9.98920e-05 9.98920e-05 9.98920e-05 9.98920e-05
## 4 9.99280e-05 1.07794e-02 4.80383e-03 4.43832e-02 1.58909e-02 9.99280e-05
## 5 9.99280e-05 9.99280e-05 9.99280e-05 2.97674e-02 2.18784e-02 2.08629e-03
## 6 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05
## v13 v14 v15 v16 v17 v18
## 1 3.88692e-02 9.99550e-05 9.99550e-05 9.99550e-05 5.42348e-02 2.93138e-02
## 2 4.06099e-02 9.99550e-05 3.87547e-02 1.12092e-01 1.60328e-02 6.05588e-02
## 3 9.98920e-05 9.98920e-05 9.98920e-05 2.71680e-03 9.98920e-05 9.98920e-05
## 4 9.99280e-05 8.33977e-03 2.62039e-02 9.99280e-05 1.30858e-02 6.30877e-03
## 5 9.99280e-05 9.99280e-05 1.43956e-02 9.99280e-05 3.06607e-03 1.42552e-02
## 6 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05 9.98468e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak18 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:18)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_18[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=18.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_18) +
expand_limits(y = c(0, 1.5))
## K = 19
## run 1 0.8682022
## run 2 0.8680049
## run 3 0.8682069
## run 4 0.8680339
## run 5 0.8677646
color_palette_19 <-
c(
"#77DD77",
"yellow2",
"navy",
"#FFFF99",
"#AE9393",
"orangered2",
"#FF8C1A",
"chocolate4",
"#B22222",
"magenta",
"purple4",
"#1E90FF",
"purple",
"#008080",
"green",
"#75FAFF",
"blue",
"#F49AC2",
"#B20CC9"
)
best = which.min(cross.entropy(project, K = 19))
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 19, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:19)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_19[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=20.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_19)
Extract ancestry coefficients for k=19
change to correct matrix
leak19 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/snps_sets/r2_0.01_b.snmf/K19/run5/r2_0.01_b_r5.19.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak19)
## # A tibble: 6 × 19
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.64e-2 1.80e-2 0.496 1.00e-4 1.00e-4 7.31e-3 1.00e-4 1.02e-1 4.13e-2 6.34e-2
## 2 9.52e-2 2.62e-2 0.323 4.52e-2 1.95e-2 8.76e-2 1.00e-4 1.76e-1 1.00e-4 9.01e-3
## 3 9.99e-5 9.99e-3 0.982 9.99e-5 9.99e-5 1.39e-3 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 4 7.49e-3 9.99e-5 0.833 7.65e-3 9.99e-5 9.99e-5 1.45e-2 3.37e-2 4.38e-2 2.15e-3
## 5 2.12e-2 9.99e-5 0.864 1.51e-2 4.03e-3 9.99e-5 9.99e-5 2.76e-2 1.20e-2 1.07e-2
## 6 9.98e-5 9.98e-5 0.998 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5 9.98e-5
## # ℹ 9 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## # X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>
The fam file
fam_file <- here("euro_global/output/snps_sets/r2_0.01_b.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 1.63748e-02 1.80108e-02 0.496327 9.99640e-05 9.99640e-05 7.31110e-03
## 2 1002 OKI 9.51566e-02 2.62419e-02 0.323274 4.51663e-02 1.95366e-02 8.76055e-02
## 3 1003 OKI 9.98650e-05 9.98848e-03 0.981676 9.98650e-05 9.98650e-05 1.38856e-03
## 4 1004 OKI 7.49281e-03 9.99010e-05 0.833186 7.65312e-03 9.99010e-05 9.99010e-05
## 5 1005 OKI 2.12344e-02 9.99190e-05 0.863652 1.51044e-02 4.02701e-03 9.99190e-05
## 6 1006 OKI 9.98378e-05 9.98378e-05 0.998203 9.98378e-05 9.98378e-05 9.98378e-05
## X7 X8 X9 X10 X11 X12
## 1 9.99640e-05 1.02275e-01 4.13298e-02 6.33793e-02 2.61333e-02 4.30549e-03
## 2 9.99550e-05 1.75763e-01 9.99550e-05 9.00764e-03 8.99474e-03 9.99550e-05
## 3 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05
## 4 1.44714e-02 3.37349e-02 4.38463e-02 2.14673e-03 9.99010e-05 9.99010e-05
## 5 9.99190e-05 2.75958e-02 1.20219e-02 1.06768e-02 9.99190e-05 9.99190e-05
## 6 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05
## X13 X14 X15 X16 X17 X18
## 1 9.99640e-05 8.17507e-02 4.37441e-02 9.30773e-03 1.32076e-02 6.30280e-02
## 2 6.60810e-03 5.91668e-02 3.68035e-02 9.99550e-05 4.74601e-02 5.87159e-02
## 3 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05
## 4 5.63694e-02 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05
## 5 3.58518e-02 5.75777e-03 9.99190e-05 9.99190e-05 9.99190e-05 9.99190e-05
## 6 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05
## X19
## 1 1.31156e-02
## 2 9.99550e-05
## 3 5.44933e-03
## 4 9.99010e-05
## 5 3.17852e-03
## 6 9.98378e-05
Rename the columns
# Rename the columns starting from the third one
leak19 <- leak19 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak19)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 1.63748e-02 1.80108e-02 0.496327 9.99640e-05 9.99640e-05 7.31110e-03
## 2 1002 OKI 9.51566e-02 2.62419e-02 0.323274 4.51663e-02 1.95366e-02 8.76055e-02
## 3 1003 OKI 9.98650e-05 9.98848e-03 0.981676 9.98650e-05 9.98650e-05 1.38856e-03
## 4 1004 OKI 7.49281e-03 9.99010e-05 0.833186 7.65312e-03 9.99010e-05 9.99010e-05
## 5 1005 OKI 2.12344e-02 9.99190e-05 0.863652 1.51044e-02 4.02701e-03 9.99190e-05
## 6 1006 OKI 9.98378e-05 9.98378e-05 0.998203 9.98378e-05 9.98378e-05 9.98378e-05
## v7 v8 v9 v10 v11 v12
## 1 9.99640e-05 1.02275e-01 4.13298e-02 6.33793e-02 2.61333e-02 4.30549e-03
## 2 9.99550e-05 1.75763e-01 9.99550e-05 9.00764e-03 8.99474e-03 9.99550e-05
## 3 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05
## 4 1.44714e-02 3.37349e-02 4.38463e-02 2.14673e-03 9.99010e-05 9.99010e-05
## 5 9.99190e-05 2.75958e-02 1.20219e-02 1.06768e-02 9.99190e-05 9.99190e-05
## 6 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05
## v13 v14 v15 v16 v17 v18
## 1 9.99640e-05 8.17507e-02 4.37441e-02 9.30773e-03 1.32076e-02 6.30280e-02
## 2 6.60810e-03 5.91668e-02 3.68035e-02 9.99550e-05 4.74601e-02 5.87159e-02
## 3 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05 9.98650e-05
## 4 5.63694e-02 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05 9.99010e-05
## 5 3.58518e-02 5.75777e-03 9.99190e-05 9.99190e-05 9.99190e-05 9.99190e-05
## 6 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05 9.98378e-05
## v19
## 1 1.31156e-02
## 2 9.99550e-05
## 3 5.44933e-03
## 4 9.99010e-05
## 5 3.17852e-03
## 6 9.98378e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak19 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:19)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_19[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=19.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_19) +
expand_limits(y = c(0, 1.5))
genotype <- here(
"euro_global/output/neuroadmixture/euro_native2_italy_all.vcf"
)
d <- read.vcfR(
genotype
)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22537
## column count: 339
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22537
## Character matrix gt cols: 339
## skip: 0
## nrows: 22537
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## BEN BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC KAG
## 12 13 12 14 12 16 2 12 4 7 12 4 11 4 5 9 12 2 6 12
## KAN KAT KLP KUN LAM MAT OKI QNC ROM SIC SON SSK SUF SUU TAI TRE UTS YUN
## 11 6 4 4 9 12 12 11 4 9 3 12 6 6 7 12 12 9
Convert format
##
## - number of detected individuals: 330
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.geno"
##
## - number of detected individuals: 330
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.removed file, for more informations.
##
##
## - number of detected individuals: 330
## - number of detected loci: 22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.lfmm"
PCA for MAF 1% r2<0.01 snp set of euro_native_italy_all
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
##
## -n (number of individuals) 330
## -L (number of loci) 22537
## -K (number of principal components) 330
## -x (genotype file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.lfmm
## -a (eigenvalue file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.pca/euro_native2_italy_all.eigenvalues
## -e (eigenvector file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.pca/euro_native2_italy_all.eigenvectors
## -d (standard deviation file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.pca/euro_native2_italy_all.sdev
## -p (projection file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.pca/euro_native2_italy_all.projections
## -c data centered
## * pca class *
##
## project directory: /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/
## pca result directory: euro_native2_italy_all.pca/
## input file: euro_native2_italy_all.lfmm
## eigenvalue file: euro_native2_italy_all.eigenvalues
## eigenvector file: euro_native2_italy_all.eigenvectors
## standard deviation file: euro_native2_italy_all.sdev
## projection file: euro_native2_italy_all.projections
## pcaProject file: euro_native2_italy_all.pcaProject
## number of individuals: 330
## number of loci: 22537
## number of principal components: 330
## centered: TRUE
## scaled: FALSE
Test
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
##
## -n (number of eigenvalues) 330
## -i (input file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.pca/euro_native2_italy_all.eigenvalues
## -o (output file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.pca/euro_native2_italy_all.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)
Get values
# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
Check R symbols for plot
#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,33:127)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
scale_shape_manual(values=good.shapes[1:N]) +
geom_point()
sampling_loc <- read.csv(
here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_native.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_native.rds"))
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_native.rds"))
head(sampling_loc)
## Pop_City Location Latitude Longitude Continent Abbreviation Year Region
## 1 Brescia Brescia 45.53373 10.204450 Europe BRE 1995 Italy
## 2 Cesena Cesena 44.15287 12.244265 Europe CES 1995 Italy
## 3 Desenzano Desenzano 45.46289 10.549140 Europe DES 1995 Italy
## 4 Bologna Bologna 44.48478 11.366584 Europe ITB 2017 Italy
## 5 Imperia Imperia 43.87159 8.003559 Europe IMP 2017 Italy
## 6 Puglia Puglia 41.12213 16.844107 Europe ITP 2016 Italy
## Subregion order order2 orderold
## 1 West Europe 20 12 12
## 2 West Europe 24 16 16
## 3 West Europe 21 13 13
## 4 West Europe 23 15 15
## 5 West Europe 18 10 10
## 6 West Europe 28 20 20
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 38 Levels: BEN BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ITB ... YUN
## [1] 38
Check the regions
## [1] "Italy" "East Asia" "South Asia" "Southeast Asia"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 BEN -36.4286 -8.65616 9.67835 1.706060 0.242176 0.00620311 3.35510
## 2 BEN -35.6578 -7.99592 9.14433 3.316050 -2.987790 -0.63408700 4.14891
## 3 BEN -37.2921 -8.22387 8.54982 1.835040 -1.936330 -0.42146200 3.54288
## 4 BEN -35.6589 -8.20466 9.41346 2.407870 -1.585510 0.07330430 3.14577
## 5 BEN -36.3292 -8.46492 10.47310 0.864815 0.747834 -1.23031000 3.35605
## 6 BEN -35.8840 -8.60298 10.39800 1.301580 -2.034310 -0.08303760 2.72750
## PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15
## 1 -3.01092 -8.93709 1.257590 -4.37216 9.57095 1.807150 3.99259 -2.8512300
## 2 -4.05844 -6.56452 3.101820 -6.56300 9.34943 1.495770 1.91960 -0.0789511
## 3 -2.41577 -8.58320 1.848370 -6.13380 10.95160 2.024820 3.09653 -0.2232520
## 4 -2.41373 -7.71873 3.074200 -3.69073 9.34427 2.835000 2.65460 -1.6481300
## 5 -3.19459 -7.71933 3.112940 -3.06045 11.42760 0.341763 4.35146 -1.2772800
## 6 -3.45130 -9.82680 0.593669 -5.16226 10.53380 1.694970 3.81288 -2.9277000
## PC16 PC17 PC18 PC19 PC20 PC21 PC22
## 1 1.4269200 0.444874 -2.605780 -0.1387210 -0.548535 0.446162 0.679672
## 2 2.6657600 -0.459167 -0.675325 1.7449300 -0.219636 -0.583923 0.478380
## 3 -1.0003300 1.323620 -0.496820 0.4805230 -1.521700 -0.898755 1.032990
## 4 -0.0257458 -0.540294 -1.702020 -0.2572390 -1.113980 -0.932667 -1.856180
## 5 2.2095900 0.186720 0.709656 -1.4002400 -2.768490 2.423650 0.924281
## 6 1.5628700 0.690478 -1.002700 0.0920083 -3.523690 -3.039580 5.411600
## PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30
## 1 1.20414 -3.448410 8.67336 10.20080 3.5956500 -2.87586 1.13300 5.674920
## 2 1.63656 -1.828880 6.22152 6.88458 0.9441300 -5.16572 3.27890 4.441010
## 3 -1.27430 -2.677440 4.36255 5.60435 1.0782800 -7.61513 1.98766 1.732170
## 4 -1.35891 -2.573890 4.74408 7.11259 -0.0799014 -2.26633 2.12922 2.664020
## 5 0.92488 0.106894 5.41177 7.92296 1.1685500 -1.75458 2.26918 0.815352
## 6 -3.99058 -6.468870 10.78720 11.60920 1.0326100 -8.80816 6.29450 6.784490
## PC31 PC32 PC33 PC34 PC35 PC36 PC37
## 1 -1.35344000 0.507352 -8.41585 0.829387 12.26600 11.85820 4.74150
## 2 -0.00149523 0.226129 -9.49899 1.133870 7.95940 2.20301 1.24861
## 3 -3.30131000 -0.940338 -5.27423 0.421175 4.99281 2.88495 5.57283
## 4 -0.17599900 1.379430 -5.63710 2.246720 3.13143 6.34839 4.92737
## 5 -0.13300000 0.541537 -2.29314 0.668216 6.68419 5.72174 6.01297
## 6 -5.08747000 -0.171277 -23.00240 -3.675780 16.02970 12.80410 16.76260
## PC38 PC39 PC40 PC41 PC42 PC43 PC44
## 1 -9.306760 2.76689 -5.93442 -6.06762 6.25710 -3.59349000 -5.234970
## 2 0.942282 1.15206 -1.70704 -3.64219 4.40032 -4.88358000 -0.983047
## 3 -2.730500 1.48026 -2.75150 -3.23859 7.02737 -0.00871823 1.792620
## 4 -2.748640 4.86364 -3.10931 -3.63340 4.08623 -0.34372400 1.801030
## 5 -2.573450 -2.40112 -1.12456 -5.43754 4.60885 -2.15457000 -0.110867
## 6 -1.843590 3.79675 -16.48130 -4.03469 18.38850 3.29094000 12.381400
## PC45 PC46 PC47 PC48 PC49 PC50 PC51 PC52
## 1 4.988650 -4.982460 -1.85080 -1.710760 3.93953 -1.71044 -0.485108 -5.07246
## 2 3.155360 1.187990 1.96649 -2.717490 3.74892 -3.07740 3.763990 1.96752
## 3 4.232960 -0.399819 2.01636 -3.436120 -2.23064 4.40633 3.653800 -1.67868
## 4 4.671640 1.507510 2.25192 -1.914680 1.59632 3.10852 1.710710 -2.80248
## 5 0.368042 -3.879630 1.59489 0.537028 3.50036 -1.82312 3.326420 -1.81966
## 6 14.391200 9.710800 1.61274 -9.103410 2.49952 -5.82170 3.909670 -6.41516
## PC53 PC54 PC55 PC56 PC57 PC58 PC59
## 1 -3.498460 -0.591602 -3.4994500 -4.82154 -1.810640 -17.726900 -9.26768
## 2 -0.433642 -1.930470 -0.6457280 -2.88981 0.837444 -4.499700 -2.36933
## 3 1.006650 1.890690 -2.8024000 1.05884 1.743710 -3.537010 -3.22576
## 4 -1.712400 0.347678 -2.8923900 -1.44836 9.173920 -3.389600 -2.25018
## 5 1.983130 -1.238630 0.0951803 -2.85376 -0.779215 -0.808632 2.27708
## 6 -5.677470 5.034360 5.4366300 3.93041 3.065970 18.928700 7.49059
## PC60 PC61 PC62 PC63 PC64 PC65 PC66 PC67
## 1 -20.61790 12.84320 14.67310 -3.457630 -10.14490 -1.50620 0.232364 4.12503
## 2 -7.87076 5.24957 7.11435 -2.593260 -3.41792 1.14602 1.396230 -2.10571
## 3 -5.54479 3.61423 7.51875 4.003790 -1.75662 3.00994 5.822280 -4.10763
## 4 -8.13063 -0.47465 5.59817 -1.154140 -7.53385 1.28329 3.698950 -3.89698
## 5 -6.65000 7.30773 11.04180 -1.247250 -1.33184 -3.02103 -1.397850 -3.99500
## 6 35.27880 -8.97710 -21.74100 -0.392993 16.62620 -6.04496 -1.847560 4.15223
## PC68 PC69 PC70 PC71 PC72 PC73 PC74
## 1 1.536300 -13.48370 3.477350 3.720790 -5.916960 -1.536060 21.41410
## 2 0.410133 4.71199 -3.481590 4.972220 -1.976500 1.742720 -4.75006
## 3 4.377580 1.44084 0.462429 -4.110600 3.893530 0.941228 -7.14890
## 4 -3.201360 -2.81169 1.883300 -4.296660 8.613790 -4.352930 -8.34767
## 5 -8.322840 3.09596 -4.858230 0.499918 -6.630700 -6.729030 -8.89989
## 6 5.775870 8.48122 3.804050 0.434860 -0.890947 3.822750 5.00241
## PC75 PC76 PC77 PC78 PC79 PC80 PC81 PC82
## 1 4.816550 13.458700 13.34870 12.188700 -4.819460 -11.57360 6.17078 13.476400
## 2 1.530660 -5.740000 -1.32580 -0.241753 -0.534579 1.42928 -6.28200 -4.914640
## 3 -0.446741 0.993584 -2.48911 -7.698270 5.382240 4.70027 -1.42030 0.187968
## 4 1.683970 -2.658500 -4.73852 -7.112860 2.012310 7.32083 -1.56853 -7.722160
## 5 0.992625 -3.933490 -2.05930 -0.953750 -1.942150 -4.07214 0.95736 -2.942890
## 6 0.370085 -2.142860 7.26191 1.907220 -6.636400 -1.64103 5.30904 5.049200
## PC83 PC84 PC85 PC86 PC87 PC88 PC89 PC90
## 1 7.76193 -7.810620 17.38790 -4.99597 7.74147 -1.481580 0.176349 -0.501481
## 2 -3.09758 -2.063730 -2.48145 -3.63315 1.76304 -7.050320 -1.247400 9.195970
## 3 -2.73932 0.486628 -8.22308 0.24716 -5.48897 0.330582 1.724920 -6.792810
## 4 1.13173 -5.309550 -5.73748 -4.96979 -6.28995 1.659020 -9.198980 -5.984870
## 5 -3.89876 -0.117790 -13.50760 4.70942 -4.93501 -2.630050 4.784760 5.307010
## 6 3.58915 -2.854030 3.82292 5.45977 -4.10345 4.272530 1.657460 -3.883730
## PC91 PC92 PC93 PC94 PC95 PC96 PC97
## 1 4.202590 4.965650 -7.4087200 -6.208640 1.590710 2.09937 -5.15330
## 2 -14.704200 -5.108240 7.1066500 1.663840 1.899690 1.74474 2.87462
## 3 4.442330 -0.533804 3.6846500 -0.154139 -12.909800 7.37524 -7.33184
## 4 0.503548 2.005400 6.5935300 4.694260 -2.200410 -5.88301 -1.10462
## 5 -11.029100 3.502840 11.1816000 4.877440 -0.128761 2.21194 15.18890
## 6 -0.361817 -3.177760 0.0502092 -0.616075 -0.968541 1.85617 -5.25075
## PC98 PC99 PC100 PC101 PC102 PC103 PC104
## 1 -2.76715 4.668910 -6.09971 -2.169200 6.270300 3.977710 -6.981530
## 2 -3.58197 -7.732730 8.38126 -0.937371 -4.556590 -3.814630 -1.967820
## 3 3.53662 -6.219700 4.27379 4.082890 1.894790 0.424252 -4.777210
## 4 3.08432 0.489263 2.99202 1.501650 0.424027 6.592460 8.391140
## 5 4.53260 9.150640 -3.82560 -6.886630 1.589150 -0.669231 3.295550
## 6 0.41695 2.985840 1.25128 -1.136980 1.542230 -0.606253 0.763501
## PC105 PC106 PC107 PC108 PC109 PC110 PC111
## 1 -2.15452000 4.523020 3.80938 1.531350 0.702757 4.135630 -0.273608
## 2 6.43562000 -2.728800 5.92438 -2.866840 -5.487690 -3.161320 -2.511560
## 3 5.57791000 -0.992583 -2.43924 -11.109900 4.475740 6.632170 -6.241980
## 4 3.08297000 4.307370 4.92973 -3.010530 -7.851450 4.580160 5.154650
## 5 0.00101321 2.321840 -11.48910 6.417860 5.643610 -0.598633 -5.881490
## 6 -1.09509000 -0.248610 1.51874 0.343609 1.231330 -2.490520 1.721730
## PC112 PC113 PC114 PC115 PC116 PC117 PC118
## 1 1.94491 -2.29686 -0.308073 -0.999831 4.083370 -0.293493 0.883767
## 2 -3.36833 -6.05028 6.710480 6.181630 3.554460 -5.974590 5.684740
## 3 -4.87052 -3.96529 -5.388560 6.287600 -1.984700 4.083740 -9.429340
## 4 -1.91764 -2.23213 -11.369400 -6.917750 -8.183640 2.695530 7.313880
## 5 6.28290 -2.58825 -6.088260 -0.679319 6.029180 4.263300 -12.417700
## 6 -2.00787 -1.40786 2.166730 -1.347610 -0.300582 -1.301520 1.308540
## PC119 PC120 PC121 PC122 PC123 PC124 PC125
## 1 4.221450 -1.564040 7.911250 -2.61741 -3.79594 7.53077 1.673740
## 2 -15.715200 1.550870 -6.573890 1.83892 10.59710 -11.08400 -3.380720
## 3 -12.139900 0.353241 -14.197300 1.51723 -8.93805 -9.65874 7.466230
## 4 -9.708120 7.559850 -6.511010 1.86800 1.61614 3.80643 -6.509970
## 5 1.590080 -14.323000 6.601470 -6.67113 12.43840 -1.01275 -1.038080
## 6 -0.409233 -3.669070 -0.784169 -1.82787 2.42521 1.32555 -0.236803
## PC126 PC127 PC128 PC129 PC130 PC131 PC132
## 1 -0.791141 -3.44300 -3.594480 1.8163600 -2.23310 -3.75804 -3.43904
## 2 -3.964910 -3.31432 4.945450 -8.2090100 -6.61420 0.31984 -3.09043
## 3 9.292280 7.25150 6.932310 2.8002200 -3.07084 -6.12911 17.56150
## 4 -5.322430 3.51687 5.632890 -14.4926000 3.51536 -10.35130 -3.93269
## 5 -1.520330 20.49690 6.809590 -0.0800215 11.47930 -4.28857 1.40240
## 6 3.855290 -3.52590 0.411502 1.3495800 -1.18242 -1.39559 -1.16085
## PC133 PC134 PC135 PC136 PC137 PC138 PC139
## 1 1.2963300 -4.393090 0.8236190 -2.097590 -2.814670 -1.8934500 -0.74115
## 2 8.7048000 9.048130 10.3210000 -6.284710 6.913350 4.0879700 2.96463
## 3 -6.0236500 -3.814580 -10.3348000 4.016090 1.714060 1.6771600 5.07434
## 4 2.8089000 -0.546552 -0.5071200 8.565720 5.536310 -0.8850900 -3.03628
## 5 5.9091100 -2.025550 4.4254400 9.944990 -5.560400 0.0514701 5.69492
## 6 0.0403684 -1.547740 -0.0126975 -0.575174 -0.313609 1.3002400 1.27595
## PC140 PC141 PC142 PC143 PC144 PC145 PC146
## 1 0.297538 2.547830 -3.55896 -1.595990 1.91919 -0.110257 -0.402857
## 2 0.729823 -4.909300 4.44496 -1.743860 -5.16318 7.591420 14.467200
## 3 -1.128200 -8.056110 -11.82470 -2.224600 2.26824 -21.952000 7.497620
## 4 0.759772 0.230179 11.58190 -2.991780 -19.28690 8.705040 5.417540
## 5 -4.363940 16.202100 8.03622 -0.708072 4.29022 2.965250 -2.379100
## 6 -0.706910 -1.405860 -1.75542 -1.811070 -2.69975 2.217180 -0.493198
## PC147 PC148 PC149 PC150 PC151 PC152 PC153
## 1 -4.608490 3.688120 -0.43062 4.909980 -3.08522 0.601014 1.46085
## 2 0.877875 -6.242550 8.06753 -1.874390 2.12882 1.423480 -18.59620
## 3 -1.860260 -5.346860 -2.48336 1.330810 2.90265 -11.719100 -5.26154
## 4 -17.799200 -16.861000 3.81050 -0.053360 -4.62369 10.370900 -6.28892
## 5 8.841900 3.743100 3.11020 4.248280 6.69694 6.445800 -2.15495
## 6 -1.695370 0.791003 -1.15501 -0.141668 2.48447 -0.237788 -1.29166
## PC154 PC155 PC156 PC157 PC158 PC159 PC160
## 1 5.722670 -0.423216 4.201310 0.244095 -0.805949 -3.866640 5.424690
## 2 -0.625435 6.389990 14.936300 9.715080 13.279200 -8.555310 5.422110
## 3 -1.594170 10.364500 -1.123510 3.663520 11.720100 7.211250 -0.407956
## 4 2.757070 -8.743340 -6.234360 -4.000290 -2.407580 -1.000630 -4.656120
## 5 -25.056900 7.993380 -4.332630 -3.959700 -8.405300 0.794097 -7.704390
## 6 1.154230 0.163830 0.471986 -1.995310 -2.014020 -0.260112 -1.599450
## PC161 PC162 PC163 PC164 PC165 PC166 PC167
## 1 1.6498700 -1.359840 3.79428 -3.530000 -0.853487 -0.310305 -2.290050
## 2 -8.8495800 -11.654300 9.08494 -2.520500 -0.130562 5.301890 -0.270629
## 3 4.0321500 11.996100 7.98986 12.319600 2.272150 -9.494760 6.472930
## 4 0.0514306 2.547430 5.02832 4.478120 -6.070790 -2.540390 10.593900
## 5 -1.0143100 -15.175500 3.70779 0.459311 1.854200 5.176410 9.544460
## 6 -1.7903600 0.202701 1.25694 -2.376680 -2.890320 -1.451030 1.468570
## PC168 PC169 PC170 PC171 PC172 PC173 PC174
## 1 0.465481 2.676640 -1.601130 2.96868 -2.62084 -2.58270 -0.100016
## 2 -7.369730 1.328990 -5.188110 -15.25570 -5.76728 -16.53320 12.060300
## 3 15.124000 -9.525180 -1.132470 -5.85826 -2.58493 2.55936 9.871930
## 4 -2.576120 3.037520 21.085400 15.68240 -10.61770 13.02520 -9.889990
## 5 9.896820 4.577390 1.009540 10.29290 7.38586 -6.12128 7.591270
## 6 2.176990 0.565257 0.912449 2.95360 2.82096 -4.52211 -0.596217
## PC175 PC176 PC177 PC178 PC179 PC180 PC181
## 1 2.871010 -1.21489 0.508154 2.07517000 1.89598 -3.65197 -3.6299200
## 2 -0.476276 -1.65314 -4.676340 2.23210000 -7.27866 1.14088 -26.2318000
## 3 1.669020 4.47738 0.889116 -0.45733200 17.97750 1.53834 -6.5801600
## 4 -4.072420 -4.76307 -5.215450 7.32140000 2.34473 -5.22767 6.1979100
## 5 2.659320 4.42937 3.850640 -10.24460000 4.89775 9.28590 0.0956869
## 6 -0.773518 -1.25914 0.271291 0.00525602 -2.64764 -2.14407 -0.3952680
## PC182 PC183 PC184 PC185 PC186 PC187 PC188
## 1 1.24389 -3.302960 2.876120 -0.0440699 -3.30869 -1.603810 -0.268286
## 2 -18.35310 5.345250 -14.691200 -5.6640200 21.39570 6.865030 -14.165900
## 3 7.09082 -11.004800 13.481800 -20.5337000 2.79004 -4.536070 7.636240
## 4 9.04403 1.040480 13.061500 -4.2756500 2.94655 -1.229350 1.210330
## 5 -1.91249 -5.819030 -5.315890 1.6723300 4.47765 -8.252340 4.571220
## 6 1.94834 0.292433 0.871957 -0.8564560 -3.19218 0.221138 1.095620
## PC189 PC190 PC191 PC192 PC193 PC194 PC195
## 1 1.728610 0.634863 1.204040 -1.31276 -1.80258 1.567020 2.792490
## 2 0.945586 -1.555840 12.854200 -2.89874 -1.68527 0.112624 -13.116200
## 3 2.300320 -8.008130 6.444110 -8.98300 -13.88470 -12.383800 -0.113585
## 4 12.428800 -4.788980 -4.447500 15.73080 -5.70527 -5.846530 9.306030
## 5 -5.985770 1.879450 1.476470 -11.62790 -6.21661 -3.229850 7.531100
## 6 0.714980 0.681986 0.153076 -1.70962 -1.54565 1.120300 -1.223970
## PC196 PC197 PC198 PC199 PC200 PC201 PC202
## 1 -0.864158 -3.024560 3.592490 4.877320 1.03075 1.94791 -3.162600
## 2 7.083080 -10.919300 -0.240066 6.057200 9.91704 6.60590 -7.991950
## 3 -10.615000 -4.579790 0.957756 15.552400 1.77143 -4.19507 1.525850
## 4 23.423100 16.718100 -2.831820 -3.034520 6.46233 8.18282 -6.906330
## 5 1.602640 18.738600 4.732080 16.892400 -17.94380 7.59291 0.570049
## 6 -0.846690 0.846655 -1.244570 -0.327022 2.74285 -1.14568 -0.485728
## PC203 PC204 PC205 PC206 PC207 PC208 PC209
## 1 1.43958 -1.518410 1.95459 5.656330 -3.165260 2.09702 0.102735
## 2 3.68832 2.636990 -3.75071 -6.810350 13.593500 -7.44244 5.649990
## 3 -6.89536 12.060000 -8.39451 -7.701610 -10.363600 2.11047 -9.061770
## 4 2.90130 -13.414600 9.13907 -0.593586 -3.380060 -17.08950 -6.544610
## 5 -5.30091 8.201080 4.50963 21.209000 3.200770 -4.36198 10.915200
## 6 -4.64714 -0.358261 1.51279 2.381280 -0.843715 1.27899 0.294638
## PC210 PC211 PC212 PC213 PC214 PC215 PC216
## 1 -3.793070 3.757620 -1.913980 0.106880 4.64054 2.641460 1.057750
## 2 7.903730 -2.631820 0.925983 5.117520 -5.37771 2.993490 -4.576110
## 3 0.889264 0.122679 6.341800 -7.023340 5.47278 -15.066000 4.825350
## 4 1.718790 1.535190 -3.465960 -0.388398 -7.30977 5.921650 -0.304196
## 5 -2.290030 9.417030 3.900610 5.752560 -1.82579 0.352211 -3.288130
## 6 -0.507667 0.493733 -0.578154 3.186230 -2.51075 3.437370 0.422021
## PC217 PC218 PC219 PC220 PC221 PC222 PC223
## 1 -4.23964 5.261560 0.346457 1.696060 6.609610 -0.918140 -0.652525
## 2 -1.25051 -0.712649 -9.361090 1.373260 0.941486 -0.679007 -2.396680
## 3 1.89201 -2.000300 6.197500 -8.483430 3.722360 -4.141880 -5.493690
## 4 2.09185 2.883410 -8.287690 -4.182200 7.438670 0.968637 -7.885660
## 5 3.60629 2.308410 -6.502360 -1.966790 -2.588830 3.208160 7.530030
## 6 2.21642 1.201100 -0.757425 -0.203642 -2.354220 -0.330617 0.172852
## PC224 PC225 PC226 PC227 PC228 PC229 PC230
## 1 1.440030 0.375069 5.368990 -1.62975 -3.882370 2.55118 1.465770
## 2 1.158920 -1.649830 -3.525890 -1.56033 -0.939742 7.10125 -1.143480
## 3 -2.214400 -7.629850 -0.684586 -3.27709 4.970070 -3.72615 2.744160
## 4 1.612450 -1.170500 -4.701970 -2.02507 1.843920 -10.07330 0.651436
## 5 -2.161240 -2.060200 8.686100 5.51270 -7.836430 -3.78831 -0.415721
## 6 -0.940466 -1.301510 1.586080 1.95321 -0.276826 1.92731 -1.083310
## PC231 PC232 PC233 PC234 PC235 PC236 PC237
## 1 0.838979 -1.215150 -7.4422700 3.0388900 -2.2598200 5.87503 -6.326390
## 2 2.612370 -2.504560 -4.6264300 -0.3901230 -0.0653116 -1.64849 -1.762290
## 3 5.116110 -1.214120 -0.0121335 -2.9612000 2.7680300 -6.01301 2.921010
## 4 2.639330 -2.288750 -2.8794700 0.0593082 3.0394700 5.45513 -2.164020
## 5 -2.224360 3.966060 1.4207800 0.0534178 7.3592100 5.60633 -1.402640
## 6 -1.261690 -0.750792 -3.0197900 -0.8436020 -0.3856480 -1.27118 0.216776
## PC238 PC239 PC240 PC241 PC242 PC243 PC244
## 1 -2.972190 1.0442700 -0.253313 2.691210 1.332100 1.695790 -2.708640
## 2 -0.448896 1.0560500 4.273530 -1.223080 1.813660 -1.445580 0.299057
## 3 -2.602350 0.1641640 0.369107 1.247710 -2.398930 2.369080 -2.779110
## 4 -5.313580 2.2517800 -1.726960 -4.699740 0.200211 0.829226 -1.180550
## 5 1.251510 4.5528700 -1.537240 -4.584990 -0.205014 -1.850620 3.340830
## 6 2.936500 0.0154952 -1.353780 0.579329 -0.630658 1.092310 -3.475130
## PC245 PC246 PC247 PC248 PC249 PC250 PC251
## 1 8.148420 -4.276080 6.894890 -3.070850 -0.685647 -5.314940 13.460900
## 2 2.541220 0.374377 -1.145630 -2.622020 -0.538264 0.246818 0.918522
## 3 -1.185390 0.927747 1.294350 0.220313 -1.187620 1.801910 1.660520
## 4 0.761870 -2.820680 -2.012240 -2.364380 3.156110 1.353790 -1.896550
## 5 0.379745 2.580200 2.436040 1.724550 2.283230 -2.067250 0.392675
## 6 1.345160 1.201260 -0.564827 1.243670 0.439854 1.456870 -2.140350
## PC252 PC253 PC254 PC255 PC256 PC257 PC258
## 1 4.3786100 37.204600 -0.211792 -0.8296000 1.4709100 -13.7114000 10.404900
## 2 -0.2045570 -0.799595 -1.083990 0.0504868 0.3605240 -0.0786186 -1.371010
## 3 -0.0404249 3.268790 -1.071930 1.0412600 -0.8145450 -3.2581000 0.266288
## 4 -1.4496500 -0.126296 -1.365250 0.7934680 0.0579994 1.6465400 1.110700
## 5 -1.3385000 -1.459740 0.377419 -3.8941000 1.2156900 0.3805290 1.205490
## 6 0.9125970 -1.630110 -1.433200 -1.9005100 1.5761100 0.3080300 -0.951571
## PC259 PC260 PC261 PC262 PC263 PC264 PC265
## 1 8.246700 10.169900 12.3661000 10.551500 -6.5520300 -8.451820 -4.58806000
## 2 0.156113 0.078879 1.2198500 2.337660 -1.1434200 1.553940 -1.84909000
## 3 -3.401480 -0.537942 -2.6529100 0.552492 -0.5463700 -1.159910 0.68678400
## 4 0.848868 1.616660 -0.0403552 0.984601 -1.4430300 -1.720050 4.14152000
## 5 0.723288 -1.525150 0.1171330 -1.025430 0.0814161 1.786080 -0.00378826
## 6 1.005760 -1.625370 0.3543250 -0.717980 -0.2991210 0.335407 -0.70056100
## PC266 PC267 PC268 PC269 PC270 PC271 PC272
## 1 5.240320 0.84292000 -3.42696000 1.375530 0.2347510 -2.084360 0.8850150
## 2 -2.147090 0.00202997 0.21067500 1.608430 -0.9286760 -0.593916 0.6440190
## 3 -0.852395 -2.70650000 -2.68938000 1.477680 -0.1594480 -3.127140 0.2016290
## 4 -1.550920 2.73145000 -1.88184000 1.107690 0.0595403 -2.294110 -1.3184000
## 5 1.408520 -1.59335000 -0.00707087 0.581427 -0.0469112 -0.383952 0.0653132
## 6 0.791272 0.98820300 -1.80654000 -0.446946 -1.6573100 2.303220 1.4251300
## PC273 PC274 PC275 PC276 PC277 PC278 PC279
## 1 -7.066590 -3.831980 -2.041340 1.9109100 0.235371 -1.683040 0.593002
## 2 -1.887220 0.734744 0.478119 2.5843000 -0.726244 0.561962 -1.562550
## 3 0.972553 -0.362317 0.981879 1.6486200 2.052890 -0.744881 -0.248530
## 4 2.048610 -1.325460 -0.751355 -0.0172412 -0.121998 -0.577015 0.910234
## 5 1.610640 -3.079070 0.443442 -0.8247770 0.678580 -0.104648 -1.296460
## 6 -3.027620 0.441175 -5.661570 -1.9336500 0.394137 -1.752080 -8.592760
## PC280 PC281 PC282 PC283 PC284 PC285 PC286
## 1 3.294350 4.928870 0.22167 -1.839910 2.852200 -1.03176000 3.605580
## 2 -0.281637 0.499033 0.72375 0.540829 0.721245 -0.00990029 0.104565
## 3 0.812689 0.743964 -1.19117 -0.213697 -0.483257 -0.06532060 -0.756868
## 4 0.643373 1.707770 -1.24763 1.911780 0.774342 0.12405300 0.150799
## 5 1.410500 0.355993 0.48977 -0.415595 -1.320460 -0.58338300 0.178719
## 6 -3.012900 5.596500 4.84887 -6.007840 2.409990 2.69619000 9.282960
## PC287 PC288 PC289 PC290 PC291 PC292 PC293
## 1 0.750799 2.481190 -3.905700 -0.936461 0.5486000 2.6105100 -0.303720
## 2 0.214913 -1.469200 -0.289237 0.530904 0.9807980 -1.8608200 -0.260329
## 3 1.258940 -0.459047 0.236495 0.219122 0.0451856 -0.2709470 -0.144081
## 4 -0.602306 2.346140 0.246626 -0.666626 -1.0121300 -0.0633217 -0.455679
## 5 0.396745 0.575871 -0.809585 -0.373673 0.1655020 -0.5395460 0.786351
## 6 3.104320 -2.620390 -8.282950 32.762800 -26.0616000 5.9327800 5.437010
## PC294 PC295 PC296 PC297 PC298 PC299 PC300
## 1 -0.798181 0.478913 -1.2460100 0.796365 0.0845726 -1.4198000 0.172801
## 2 -0.493476 0.900055 -0.1344480 0.397938 1.0070100 0.9602260 -0.230228
## 3 -0.829300 -0.121166 1.1496600 -0.958856 0.8517270 -0.5905860 -0.110834
## 4 -0.840823 -0.674758 -0.0459401 0.461341 0.4481720 -0.0601338 0.250727
## 5 0.448526 -0.321390 -0.3594780 -0.674036 -0.0334604 0.7397040 0.590811
## 6 -5.968770 2.589400 -1.6682400 3.948190 -3.2865500 0.6164350 1.712460
## PC301 PC302 PC303 PC304 PC305 PC306 PC307
## 1 -0.02857240 0.784914 -0.111594 0.299842 0.1537890 0.257618 0.581506
## 2 0.00664791 -0.299704 -0.365714 0.161070 0.0510703 0.662804 1.036940
## 3 0.48116000 0.622439 -0.207147 0.375918 0.3409570 -0.491765 0.212827
## 4 -0.51127000 0.220587 0.334186 -0.752543 -0.0686399 -0.247551 0.710792
## 5 -0.08005430 0.672211 0.203599 0.177536 0.6002280 -0.438392 -0.123792
## 6 -1.55321000 -2.020450 -1.418340 -0.912601 0.8920440 0.499495 0.719343
## PC308 PC309 PC310 PC311 PC312 PC313 PC314
## 1 -0.912519 0.131409 0.297420 0.590261 -1.487690 -0.358880 0.11508000
## 2 -0.191389 -0.217103 0.644524 0.341326 -0.309193 -0.240516 0.12306700
## 3 0.656745 0.814825 -0.873942 0.326364 -0.447306 0.226318 0.10180300
## 4 -0.274965 0.646337 -0.385686 0.343867 0.224897 0.442280 -1.00193000
## 5 0.165050 -1.296820 0.112506 -0.108202 0.387461 -0.358981 -0.00209422
## 6 -1.782870 1.209840 -2.716690 -0.935250 0.423990 0.991128 -1.04000000
## PC315 PC316 PC317 PC318 PC319 PC320 PC321
## 1 -0.1361810 0.3707860 0.516543 -0.2106360 0.0629784 -0.2354070 0.0745866
## 2 0.2838960 0.0905737 0.497427 0.0790915 -0.1036950 0.0608285 0.0557892
## 3 0.2841800 0.4468840 0.175940 -0.2585500 -0.4107850 0.0991483 0.1526120
## 4 0.4180580 -0.3414670 0.120261 0.0687079 -0.3069000 0.1089410 0.2863580
## 5 0.1452500 -0.5249250 0.201887 0.2170580 0.4764040 -0.0473087 0.2351270
## 6 0.0775644 0.1263700 -0.248673 -0.3149170 0.7343540 0.4174750 0.6310480
## PC322 PC323 PC324 PC325 PC326 PC327
## 1 -0.26463200 -0.0522274 0.0492131 -0.4739130 -0.1011950 -0.02837090
## 2 -0.14297300 0.0105028 0.2550780 0.2915880 -0.1465930 -0.06746470
## 3 -0.27335000 0.1458950 0.6288990 0.1844770 0.0152865 0.10312400
## 4 -0.00865434 0.4698300 0.3435860 -0.1014130 0.1906390 0.00470319
## 5 0.19431100 0.0761079 0.0427247 0.0456039 0.3292730 0.01840060
## 6 -0.04306320 -0.1040430 -0.3736180 0.2089580 -0.2925900 -0.38677800
## PC328 PC329 PC330 Individual Pop_City Location Latitude
## 1 -0.06080890 -0.2694990 -1.75322e-06 255 Bengaluru India 12.9716
## 2 0.09760780 -0.2002560 -1.75322e-06 261 Bengaluru India 12.9716
## 3 -0.00548772 -0.0353852 -1.75322e-06 256 Bengaluru India 12.9716
## 4 0.02166250 0.0259966 -1.75322e-06 258 Bengaluru India 12.9716
## 5 -0.20685700 -0.0353230 -1.75322e-06 265 Bengaluru India 12.9716
## 6 -0.00352486 0.0335714 -1.75322e-06 262 Bengaluru India 12.9716
## Longitude Continent Year Region Subregion order order2 orderold
## 1 77.5946 Asia Unknown South Asia 60 52 52
## 2 77.5946 Asia Unknown South Asia 60 52 52
## 3 77.5946 Asia Unknown South Asia 60 52 52
## 4 77.5946 Asia Unknown South Asia 60 52 52
## 5 77.5946 Asia Unknown South Asia 60 52 52
## 6 77.5946 Asia Unknown South Asia 60 52 52
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "MAF_1","PCA_lea_native_italy_all_pc1_pc2_b.pdf"
),
width = 8,
height = 8,
units = "in"
)
PC1 and PC3
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "PCA_lea_native_italy_all_pc1_pc3_b.pdf"
),
width = 8,
height = 6,
units = "in"
)
Repeat, but separate out 1990s Italian pops into own region
sampling_loc <- read.csv(
here("scripts", "RMarkdowns", "output", "euro_global","lea", "MAF_1", "sampling_loc_italy_native_temporal.rds"
))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_native_temporal.rds"))
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_native_temporal.rds"))
head(sampling_loc)
## Pop_City Location Latitude Longitude Continent Abbreviation Year
## 1 Brescia Brescia 45.53373 10.204450 Europe BRE 1995
## 2 Cesena Cesena 44.15287 12.244265 Europe CES 1995
## 3 Desenzano Desenzano 45.46289 10.549140 Europe DES 1995
## 4 Bologna Bologna 44.48478 11.366584 Europe ITB 2017
## 5 Imperia Imperia 43.87159 8.003559 Europe IMP 2017
## 6 Puglia Puglia 41.12213 16.844107 Europe ITP 2016
## Region Subregion order order2 orderold
## 1 Italy (1995) West Europe 20 12 12
## 2 Italy (1995) West Europe 24 16 16
## 3 Italy (1995) West Europe 21 13 13
## 4 Italy (modern) West Europe 23 15 15
## 5 Italy (modern) West Europe 18 10 10
## 6 Italy (modern) West Europe 28 20 20
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 38 Levels: BEN BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ITB ... YUN
## [1] 38
Check the regions
## [1] "Italy (1995)" "Italy (modern)" "East Asia" "South Asia"
## [5] "Southeast Asia"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 BEN -36.4286 -8.65616 9.67835 1.706060 0.242176 0.00620311 3.35510
## 2 BEN -35.6578 -7.99592 9.14433 3.316050 -2.987790 -0.63408700 4.14891
## 3 BEN -37.2921 -8.22387 8.54982 1.835040 -1.936330 -0.42146200 3.54288
## 4 BEN -35.6589 -8.20466 9.41346 2.407870 -1.585510 0.07330430 3.14577
## 5 BEN -36.3292 -8.46492 10.47310 0.864815 0.747834 -1.23031000 3.35605
## 6 BEN -35.8840 -8.60298 10.39800 1.301580 -2.034310 -0.08303760 2.72750
## PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15
## 1 -3.01092 -8.93709 1.257590 -4.37216 9.57095 1.807150 3.99259 -2.8512300
## 2 -4.05844 -6.56452 3.101820 -6.56300 9.34943 1.495770 1.91960 -0.0789511
## 3 -2.41577 -8.58320 1.848370 -6.13380 10.95160 2.024820 3.09653 -0.2232520
## 4 -2.41373 -7.71873 3.074200 -3.69073 9.34427 2.835000 2.65460 -1.6481300
## 5 -3.19459 -7.71933 3.112940 -3.06045 11.42760 0.341763 4.35146 -1.2772800
## 6 -3.45130 -9.82680 0.593669 -5.16226 10.53380 1.694970 3.81288 -2.9277000
## PC16 PC17 PC18 PC19 PC20 PC21 PC22
## 1 1.4269200 0.444874 -2.605780 -0.1387210 -0.548535 0.446162 0.679672
## 2 2.6657600 -0.459167 -0.675325 1.7449300 -0.219636 -0.583923 0.478380
## 3 -1.0003300 1.323620 -0.496820 0.4805230 -1.521700 -0.898755 1.032990
## 4 -0.0257458 -0.540294 -1.702020 -0.2572390 -1.113980 -0.932667 -1.856180
## 5 2.2095900 0.186720 0.709656 -1.4002400 -2.768490 2.423650 0.924281
## 6 1.5628700 0.690478 -1.002700 0.0920083 -3.523690 -3.039580 5.411600
## PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30
## 1 1.20414 -3.448410 8.67336 10.20080 3.5956500 -2.87586 1.13300 5.674920
## 2 1.63656 -1.828880 6.22152 6.88458 0.9441300 -5.16572 3.27890 4.441010
## 3 -1.27430 -2.677440 4.36255 5.60435 1.0782800 -7.61513 1.98766 1.732170
## 4 -1.35891 -2.573890 4.74408 7.11259 -0.0799014 -2.26633 2.12922 2.664020
## 5 0.92488 0.106894 5.41177 7.92296 1.1685500 -1.75458 2.26918 0.815352
## 6 -3.99058 -6.468870 10.78720 11.60920 1.0326100 -8.80816 6.29450 6.784490
## PC31 PC32 PC33 PC34 PC35 PC36 PC37
## 1 -1.35344000 0.507352 -8.41585 0.829387 12.26600 11.85820 4.74150
## 2 -0.00149523 0.226129 -9.49899 1.133870 7.95940 2.20301 1.24861
## 3 -3.30131000 -0.940338 -5.27423 0.421175 4.99281 2.88495 5.57283
## 4 -0.17599900 1.379430 -5.63710 2.246720 3.13143 6.34839 4.92737
## 5 -0.13300000 0.541537 -2.29314 0.668216 6.68419 5.72174 6.01297
## 6 -5.08747000 -0.171277 -23.00240 -3.675780 16.02970 12.80410 16.76260
## PC38 PC39 PC40 PC41 PC42 PC43 PC44
## 1 -9.306760 2.76689 -5.93442 -6.06762 6.25710 -3.59349000 -5.234970
## 2 0.942282 1.15206 -1.70704 -3.64219 4.40032 -4.88358000 -0.983047
## 3 -2.730500 1.48026 -2.75150 -3.23859 7.02737 -0.00871823 1.792620
## 4 -2.748640 4.86364 -3.10931 -3.63340 4.08623 -0.34372400 1.801030
## 5 -2.573450 -2.40112 -1.12456 -5.43754 4.60885 -2.15457000 -0.110867
## 6 -1.843590 3.79675 -16.48130 -4.03469 18.38850 3.29094000 12.381400
## PC45 PC46 PC47 PC48 PC49 PC50 PC51 PC52
## 1 4.988650 -4.982460 -1.85080 -1.710760 3.93953 -1.71044 -0.485108 -5.07246
## 2 3.155360 1.187990 1.96649 -2.717490 3.74892 -3.07740 3.763990 1.96752
## 3 4.232960 -0.399819 2.01636 -3.436120 -2.23064 4.40633 3.653800 -1.67868
## 4 4.671640 1.507510 2.25192 -1.914680 1.59632 3.10852 1.710710 -2.80248
## 5 0.368042 -3.879630 1.59489 0.537028 3.50036 -1.82312 3.326420 -1.81966
## 6 14.391200 9.710800 1.61274 -9.103410 2.49952 -5.82170 3.909670 -6.41516
## PC53 PC54 PC55 PC56 PC57 PC58 PC59
## 1 -3.498460 -0.591602 -3.4994500 -4.82154 -1.810640 -17.726900 -9.26768
## 2 -0.433642 -1.930470 -0.6457280 -2.88981 0.837444 -4.499700 -2.36933
## 3 1.006650 1.890690 -2.8024000 1.05884 1.743710 -3.537010 -3.22576
## 4 -1.712400 0.347678 -2.8923900 -1.44836 9.173920 -3.389600 -2.25018
## 5 1.983130 -1.238630 0.0951803 -2.85376 -0.779215 -0.808632 2.27708
## 6 -5.677470 5.034360 5.4366300 3.93041 3.065970 18.928700 7.49059
## PC60 PC61 PC62 PC63 PC64 PC65 PC66 PC67
## 1 -20.61790 12.84320 14.67310 -3.457630 -10.14490 -1.50620 0.232364 4.12503
## 2 -7.87076 5.24957 7.11435 -2.593260 -3.41792 1.14602 1.396230 -2.10571
## 3 -5.54479 3.61423 7.51875 4.003790 -1.75662 3.00994 5.822280 -4.10763
## 4 -8.13063 -0.47465 5.59817 -1.154140 -7.53385 1.28329 3.698950 -3.89698
## 5 -6.65000 7.30773 11.04180 -1.247250 -1.33184 -3.02103 -1.397850 -3.99500
## 6 35.27880 -8.97710 -21.74100 -0.392993 16.62620 -6.04496 -1.847560 4.15223
## PC68 PC69 PC70 PC71 PC72 PC73 PC74
## 1 1.536300 -13.48370 3.477350 3.720790 -5.916960 -1.536060 21.41410
## 2 0.410133 4.71199 -3.481590 4.972220 -1.976500 1.742720 -4.75006
## 3 4.377580 1.44084 0.462429 -4.110600 3.893530 0.941228 -7.14890
## 4 -3.201360 -2.81169 1.883300 -4.296660 8.613790 -4.352930 -8.34767
## 5 -8.322840 3.09596 -4.858230 0.499918 -6.630700 -6.729030 -8.89989
## 6 5.775870 8.48122 3.804050 0.434860 -0.890947 3.822750 5.00241
## PC75 PC76 PC77 PC78 PC79 PC80 PC81 PC82
## 1 4.816550 13.458700 13.34870 12.188700 -4.819460 -11.57360 6.17078 13.476400
## 2 1.530660 -5.740000 -1.32580 -0.241753 -0.534579 1.42928 -6.28200 -4.914640
## 3 -0.446741 0.993584 -2.48911 -7.698270 5.382240 4.70027 -1.42030 0.187968
## 4 1.683970 -2.658500 -4.73852 -7.112860 2.012310 7.32083 -1.56853 -7.722160
## 5 0.992625 -3.933490 -2.05930 -0.953750 -1.942150 -4.07214 0.95736 -2.942890
## 6 0.370085 -2.142860 7.26191 1.907220 -6.636400 -1.64103 5.30904 5.049200
## PC83 PC84 PC85 PC86 PC87 PC88 PC89 PC90
## 1 7.76193 -7.810620 17.38790 -4.99597 7.74147 -1.481580 0.176349 -0.501481
## 2 -3.09758 -2.063730 -2.48145 -3.63315 1.76304 -7.050320 -1.247400 9.195970
## 3 -2.73932 0.486628 -8.22308 0.24716 -5.48897 0.330582 1.724920 -6.792810
## 4 1.13173 -5.309550 -5.73748 -4.96979 -6.28995 1.659020 -9.198980 -5.984870
## 5 -3.89876 -0.117790 -13.50760 4.70942 -4.93501 -2.630050 4.784760 5.307010
## 6 3.58915 -2.854030 3.82292 5.45977 -4.10345 4.272530 1.657460 -3.883730
## PC91 PC92 PC93 PC94 PC95 PC96 PC97
## 1 4.202590 4.965650 -7.4087200 -6.208640 1.590710 2.09937 -5.15330
## 2 -14.704200 -5.108240 7.1066500 1.663840 1.899690 1.74474 2.87462
## 3 4.442330 -0.533804 3.6846500 -0.154139 -12.909800 7.37524 -7.33184
## 4 0.503548 2.005400 6.5935300 4.694260 -2.200410 -5.88301 -1.10462
## 5 -11.029100 3.502840 11.1816000 4.877440 -0.128761 2.21194 15.18890
## 6 -0.361817 -3.177760 0.0502092 -0.616075 -0.968541 1.85617 -5.25075
## PC98 PC99 PC100 PC101 PC102 PC103 PC104
## 1 -2.76715 4.668910 -6.09971 -2.169200 6.270300 3.977710 -6.981530
## 2 -3.58197 -7.732730 8.38126 -0.937371 -4.556590 -3.814630 -1.967820
## 3 3.53662 -6.219700 4.27379 4.082890 1.894790 0.424252 -4.777210
## 4 3.08432 0.489263 2.99202 1.501650 0.424027 6.592460 8.391140
## 5 4.53260 9.150640 -3.82560 -6.886630 1.589150 -0.669231 3.295550
## 6 0.41695 2.985840 1.25128 -1.136980 1.542230 -0.606253 0.763501
## PC105 PC106 PC107 PC108 PC109 PC110 PC111
## 1 -2.15452000 4.523020 3.80938 1.531350 0.702757 4.135630 -0.273608
## 2 6.43562000 -2.728800 5.92438 -2.866840 -5.487690 -3.161320 -2.511560
## 3 5.57791000 -0.992583 -2.43924 -11.109900 4.475740 6.632170 -6.241980
## 4 3.08297000 4.307370 4.92973 -3.010530 -7.851450 4.580160 5.154650
## 5 0.00101321 2.321840 -11.48910 6.417860 5.643610 -0.598633 -5.881490
## 6 -1.09509000 -0.248610 1.51874 0.343609 1.231330 -2.490520 1.721730
## PC112 PC113 PC114 PC115 PC116 PC117 PC118
## 1 1.94491 -2.29686 -0.308073 -0.999831 4.083370 -0.293493 0.883767
## 2 -3.36833 -6.05028 6.710480 6.181630 3.554460 -5.974590 5.684740
## 3 -4.87052 -3.96529 -5.388560 6.287600 -1.984700 4.083740 -9.429340
## 4 -1.91764 -2.23213 -11.369400 -6.917750 -8.183640 2.695530 7.313880
## 5 6.28290 -2.58825 -6.088260 -0.679319 6.029180 4.263300 -12.417700
## 6 -2.00787 -1.40786 2.166730 -1.347610 -0.300582 -1.301520 1.308540
## PC119 PC120 PC121 PC122 PC123 PC124 PC125
## 1 4.221450 -1.564040 7.911250 -2.61741 -3.79594 7.53077 1.673740
## 2 -15.715200 1.550870 -6.573890 1.83892 10.59710 -11.08400 -3.380720
## 3 -12.139900 0.353241 -14.197300 1.51723 -8.93805 -9.65874 7.466230
## 4 -9.708120 7.559850 -6.511010 1.86800 1.61614 3.80643 -6.509970
## 5 1.590080 -14.323000 6.601470 -6.67113 12.43840 -1.01275 -1.038080
## 6 -0.409233 -3.669070 -0.784169 -1.82787 2.42521 1.32555 -0.236803
## PC126 PC127 PC128 PC129 PC130 PC131 PC132
## 1 -0.791141 -3.44300 -3.594480 1.8163600 -2.23310 -3.75804 -3.43904
## 2 -3.964910 -3.31432 4.945450 -8.2090100 -6.61420 0.31984 -3.09043
## 3 9.292280 7.25150 6.932310 2.8002200 -3.07084 -6.12911 17.56150
## 4 -5.322430 3.51687 5.632890 -14.4926000 3.51536 -10.35130 -3.93269
## 5 -1.520330 20.49690 6.809590 -0.0800215 11.47930 -4.28857 1.40240
## 6 3.855290 -3.52590 0.411502 1.3495800 -1.18242 -1.39559 -1.16085
## PC133 PC134 PC135 PC136 PC137 PC138 PC139
## 1 1.2963300 -4.393090 0.8236190 -2.097590 -2.814670 -1.8934500 -0.74115
## 2 8.7048000 9.048130 10.3210000 -6.284710 6.913350 4.0879700 2.96463
## 3 -6.0236500 -3.814580 -10.3348000 4.016090 1.714060 1.6771600 5.07434
## 4 2.8089000 -0.546552 -0.5071200 8.565720 5.536310 -0.8850900 -3.03628
## 5 5.9091100 -2.025550 4.4254400 9.944990 -5.560400 0.0514701 5.69492
## 6 0.0403684 -1.547740 -0.0126975 -0.575174 -0.313609 1.3002400 1.27595
## PC140 PC141 PC142 PC143 PC144 PC145 PC146
## 1 0.297538 2.547830 -3.55896 -1.595990 1.91919 -0.110257 -0.402857
## 2 0.729823 -4.909300 4.44496 -1.743860 -5.16318 7.591420 14.467200
## 3 -1.128200 -8.056110 -11.82470 -2.224600 2.26824 -21.952000 7.497620
## 4 0.759772 0.230179 11.58190 -2.991780 -19.28690 8.705040 5.417540
## 5 -4.363940 16.202100 8.03622 -0.708072 4.29022 2.965250 -2.379100
## 6 -0.706910 -1.405860 -1.75542 -1.811070 -2.69975 2.217180 -0.493198
## PC147 PC148 PC149 PC150 PC151 PC152 PC153
## 1 -4.608490 3.688120 -0.43062 4.909980 -3.08522 0.601014 1.46085
## 2 0.877875 -6.242550 8.06753 -1.874390 2.12882 1.423480 -18.59620
## 3 -1.860260 -5.346860 -2.48336 1.330810 2.90265 -11.719100 -5.26154
## 4 -17.799200 -16.861000 3.81050 -0.053360 -4.62369 10.370900 -6.28892
## 5 8.841900 3.743100 3.11020 4.248280 6.69694 6.445800 -2.15495
## 6 -1.695370 0.791003 -1.15501 -0.141668 2.48447 -0.237788 -1.29166
## PC154 PC155 PC156 PC157 PC158 PC159 PC160
## 1 5.722670 -0.423216 4.201310 0.244095 -0.805949 -3.866640 5.424690
## 2 -0.625435 6.389990 14.936300 9.715080 13.279200 -8.555310 5.422110
## 3 -1.594170 10.364500 -1.123510 3.663520 11.720100 7.211250 -0.407956
## 4 2.757070 -8.743340 -6.234360 -4.000290 -2.407580 -1.000630 -4.656120
## 5 -25.056900 7.993380 -4.332630 -3.959700 -8.405300 0.794097 -7.704390
## 6 1.154230 0.163830 0.471986 -1.995310 -2.014020 -0.260112 -1.599450
## PC161 PC162 PC163 PC164 PC165 PC166 PC167
## 1 1.6498700 -1.359840 3.79428 -3.530000 -0.853487 -0.310305 -2.290050
## 2 -8.8495800 -11.654300 9.08494 -2.520500 -0.130562 5.301890 -0.270629
## 3 4.0321500 11.996100 7.98986 12.319600 2.272150 -9.494760 6.472930
## 4 0.0514306 2.547430 5.02832 4.478120 -6.070790 -2.540390 10.593900
## 5 -1.0143100 -15.175500 3.70779 0.459311 1.854200 5.176410 9.544460
## 6 -1.7903600 0.202701 1.25694 -2.376680 -2.890320 -1.451030 1.468570
## PC168 PC169 PC170 PC171 PC172 PC173 PC174
## 1 0.465481 2.676640 -1.601130 2.96868 -2.62084 -2.58270 -0.100016
## 2 -7.369730 1.328990 -5.188110 -15.25570 -5.76728 -16.53320 12.060300
## 3 15.124000 -9.525180 -1.132470 -5.85826 -2.58493 2.55936 9.871930
## 4 -2.576120 3.037520 21.085400 15.68240 -10.61770 13.02520 -9.889990
## 5 9.896820 4.577390 1.009540 10.29290 7.38586 -6.12128 7.591270
## 6 2.176990 0.565257 0.912449 2.95360 2.82096 -4.52211 -0.596217
## PC175 PC176 PC177 PC178 PC179 PC180 PC181
## 1 2.871010 -1.21489 0.508154 2.07517000 1.89598 -3.65197 -3.6299200
## 2 -0.476276 -1.65314 -4.676340 2.23210000 -7.27866 1.14088 -26.2318000
## 3 1.669020 4.47738 0.889116 -0.45733200 17.97750 1.53834 -6.5801600
## 4 -4.072420 -4.76307 -5.215450 7.32140000 2.34473 -5.22767 6.1979100
## 5 2.659320 4.42937 3.850640 -10.24460000 4.89775 9.28590 0.0956869
## 6 -0.773518 -1.25914 0.271291 0.00525602 -2.64764 -2.14407 -0.3952680
## PC182 PC183 PC184 PC185 PC186 PC187 PC188
## 1 1.24389 -3.302960 2.876120 -0.0440699 -3.30869 -1.603810 -0.268286
## 2 -18.35310 5.345250 -14.691200 -5.6640200 21.39570 6.865030 -14.165900
## 3 7.09082 -11.004800 13.481800 -20.5337000 2.79004 -4.536070 7.636240
## 4 9.04403 1.040480 13.061500 -4.2756500 2.94655 -1.229350 1.210330
## 5 -1.91249 -5.819030 -5.315890 1.6723300 4.47765 -8.252340 4.571220
## 6 1.94834 0.292433 0.871957 -0.8564560 -3.19218 0.221138 1.095620
## PC189 PC190 PC191 PC192 PC193 PC194 PC195
## 1 1.728610 0.634863 1.204040 -1.31276 -1.80258 1.567020 2.792490
## 2 0.945586 -1.555840 12.854200 -2.89874 -1.68527 0.112624 -13.116200
## 3 2.300320 -8.008130 6.444110 -8.98300 -13.88470 -12.383800 -0.113585
## 4 12.428800 -4.788980 -4.447500 15.73080 -5.70527 -5.846530 9.306030
## 5 -5.985770 1.879450 1.476470 -11.62790 -6.21661 -3.229850 7.531100
## 6 0.714980 0.681986 0.153076 -1.70962 -1.54565 1.120300 -1.223970
## PC196 PC197 PC198 PC199 PC200 PC201 PC202
## 1 -0.864158 -3.024560 3.592490 4.877320 1.03075 1.94791 -3.162600
## 2 7.083080 -10.919300 -0.240066 6.057200 9.91704 6.60590 -7.991950
## 3 -10.615000 -4.579790 0.957756 15.552400 1.77143 -4.19507 1.525850
## 4 23.423100 16.718100 -2.831820 -3.034520 6.46233 8.18282 -6.906330
## 5 1.602640 18.738600 4.732080 16.892400 -17.94380 7.59291 0.570049
## 6 -0.846690 0.846655 -1.244570 -0.327022 2.74285 -1.14568 -0.485728
## PC203 PC204 PC205 PC206 PC207 PC208 PC209
## 1 1.43958 -1.518410 1.95459 5.656330 -3.165260 2.09702 0.102735
## 2 3.68832 2.636990 -3.75071 -6.810350 13.593500 -7.44244 5.649990
## 3 -6.89536 12.060000 -8.39451 -7.701610 -10.363600 2.11047 -9.061770
## 4 2.90130 -13.414600 9.13907 -0.593586 -3.380060 -17.08950 -6.544610
## 5 -5.30091 8.201080 4.50963 21.209000 3.200770 -4.36198 10.915200
## 6 -4.64714 -0.358261 1.51279 2.381280 -0.843715 1.27899 0.294638
## PC210 PC211 PC212 PC213 PC214 PC215 PC216
## 1 -3.793070 3.757620 -1.913980 0.106880 4.64054 2.641460 1.057750
## 2 7.903730 -2.631820 0.925983 5.117520 -5.37771 2.993490 -4.576110
## 3 0.889264 0.122679 6.341800 -7.023340 5.47278 -15.066000 4.825350
## 4 1.718790 1.535190 -3.465960 -0.388398 -7.30977 5.921650 -0.304196
## 5 -2.290030 9.417030 3.900610 5.752560 -1.82579 0.352211 -3.288130
## 6 -0.507667 0.493733 -0.578154 3.186230 -2.51075 3.437370 0.422021
## PC217 PC218 PC219 PC220 PC221 PC222 PC223
## 1 -4.23964 5.261560 0.346457 1.696060 6.609610 -0.918140 -0.652525
## 2 -1.25051 -0.712649 -9.361090 1.373260 0.941486 -0.679007 -2.396680
## 3 1.89201 -2.000300 6.197500 -8.483430 3.722360 -4.141880 -5.493690
## 4 2.09185 2.883410 -8.287690 -4.182200 7.438670 0.968637 -7.885660
## 5 3.60629 2.308410 -6.502360 -1.966790 -2.588830 3.208160 7.530030
## 6 2.21642 1.201100 -0.757425 -0.203642 -2.354220 -0.330617 0.172852
## PC224 PC225 PC226 PC227 PC228 PC229 PC230
## 1 1.440030 0.375069 5.368990 -1.62975 -3.882370 2.55118 1.465770
## 2 1.158920 -1.649830 -3.525890 -1.56033 -0.939742 7.10125 -1.143480
## 3 -2.214400 -7.629850 -0.684586 -3.27709 4.970070 -3.72615 2.744160
## 4 1.612450 -1.170500 -4.701970 -2.02507 1.843920 -10.07330 0.651436
## 5 -2.161240 -2.060200 8.686100 5.51270 -7.836430 -3.78831 -0.415721
## 6 -0.940466 -1.301510 1.586080 1.95321 -0.276826 1.92731 -1.083310
## PC231 PC232 PC233 PC234 PC235 PC236 PC237
## 1 0.838979 -1.215150 -7.4422700 3.0388900 -2.2598200 5.87503 -6.326390
## 2 2.612370 -2.504560 -4.6264300 -0.3901230 -0.0653116 -1.64849 -1.762290
## 3 5.116110 -1.214120 -0.0121335 -2.9612000 2.7680300 -6.01301 2.921010
## 4 2.639330 -2.288750 -2.8794700 0.0593082 3.0394700 5.45513 -2.164020
## 5 -2.224360 3.966060 1.4207800 0.0534178 7.3592100 5.60633 -1.402640
## 6 -1.261690 -0.750792 -3.0197900 -0.8436020 -0.3856480 -1.27118 0.216776
## PC238 PC239 PC240 PC241 PC242 PC243 PC244
## 1 -2.972190 1.0442700 -0.253313 2.691210 1.332100 1.695790 -2.708640
## 2 -0.448896 1.0560500 4.273530 -1.223080 1.813660 -1.445580 0.299057
## 3 -2.602350 0.1641640 0.369107 1.247710 -2.398930 2.369080 -2.779110
## 4 -5.313580 2.2517800 -1.726960 -4.699740 0.200211 0.829226 -1.180550
## 5 1.251510 4.5528700 -1.537240 -4.584990 -0.205014 -1.850620 3.340830
## 6 2.936500 0.0154952 -1.353780 0.579329 -0.630658 1.092310 -3.475130
## PC245 PC246 PC247 PC248 PC249 PC250 PC251
## 1 8.148420 -4.276080 6.894890 -3.070850 -0.685647 -5.314940 13.460900
## 2 2.541220 0.374377 -1.145630 -2.622020 -0.538264 0.246818 0.918522
## 3 -1.185390 0.927747 1.294350 0.220313 -1.187620 1.801910 1.660520
## 4 0.761870 -2.820680 -2.012240 -2.364380 3.156110 1.353790 -1.896550
## 5 0.379745 2.580200 2.436040 1.724550 2.283230 -2.067250 0.392675
## 6 1.345160 1.201260 -0.564827 1.243670 0.439854 1.456870 -2.140350
## PC252 PC253 PC254 PC255 PC256 PC257 PC258
## 1 4.3786100 37.204600 -0.211792 -0.8296000 1.4709100 -13.7114000 10.404900
## 2 -0.2045570 -0.799595 -1.083990 0.0504868 0.3605240 -0.0786186 -1.371010
## 3 -0.0404249 3.268790 -1.071930 1.0412600 -0.8145450 -3.2581000 0.266288
## 4 -1.4496500 -0.126296 -1.365250 0.7934680 0.0579994 1.6465400 1.110700
## 5 -1.3385000 -1.459740 0.377419 -3.8941000 1.2156900 0.3805290 1.205490
## 6 0.9125970 -1.630110 -1.433200 -1.9005100 1.5761100 0.3080300 -0.951571
## PC259 PC260 PC261 PC262 PC263 PC264 PC265
## 1 8.246700 10.169900 12.3661000 10.551500 -6.5520300 -8.451820 -4.58806000
## 2 0.156113 0.078879 1.2198500 2.337660 -1.1434200 1.553940 -1.84909000
## 3 -3.401480 -0.537942 -2.6529100 0.552492 -0.5463700 -1.159910 0.68678400
## 4 0.848868 1.616660 -0.0403552 0.984601 -1.4430300 -1.720050 4.14152000
## 5 0.723288 -1.525150 0.1171330 -1.025430 0.0814161 1.786080 -0.00378826
## 6 1.005760 -1.625370 0.3543250 -0.717980 -0.2991210 0.335407 -0.70056100
## PC266 PC267 PC268 PC269 PC270 PC271 PC272
## 1 5.240320 0.84292000 -3.42696000 1.375530 0.2347510 -2.084360 0.8850150
## 2 -2.147090 0.00202997 0.21067500 1.608430 -0.9286760 -0.593916 0.6440190
## 3 -0.852395 -2.70650000 -2.68938000 1.477680 -0.1594480 -3.127140 0.2016290
## 4 -1.550920 2.73145000 -1.88184000 1.107690 0.0595403 -2.294110 -1.3184000
## 5 1.408520 -1.59335000 -0.00707087 0.581427 -0.0469112 -0.383952 0.0653132
## 6 0.791272 0.98820300 -1.80654000 -0.446946 -1.6573100 2.303220 1.4251300
## PC273 PC274 PC275 PC276 PC277 PC278 PC279
## 1 -7.066590 -3.831980 -2.041340 1.9109100 0.235371 -1.683040 0.593002
## 2 -1.887220 0.734744 0.478119 2.5843000 -0.726244 0.561962 -1.562550
## 3 0.972553 -0.362317 0.981879 1.6486200 2.052890 -0.744881 -0.248530
## 4 2.048610 -1.325460 -0.751355 -0.0172412 -0.121998 -0.577015 0.910234
## 5 1.610640 -3.079070 0.443442 -0.8247770 0.678580 -0.104648 -1.296460
## 6 -3.027620 0.441175 -5.661570 -1.9336500 0.394137 -1.752080 -8.592760
## PC280 PC281 PC282 PC283 PC284 PC285 PC286
## 1 3.294350 4.928870 0.22167 -1.839910 2.852200 -1.03176000 3.605580
## 2 -0.281637 0.499033 0.72375 0.540829 0.721245 -0.00990029 0.104565
## 3 0.812689 0.743964 -1.19117 -0.213697 -0.483257 -0.06532060 -0.756868
## 4 0.643373 1.707770 -1.24763 1.911780 0.774342 0.12405300 0.150799
## 5 1.410500 0.355993 0.48977 -0.415595 -1.320460 -0.58338300 0.178719
## 6 -3.012900 5.596500 4.84887 -6.007840 2.409990 2.69619000 9.282960
## PC287 PC288 PC289 PC290 PC291 PC292 PC293
## 1 0.750799 2.481190 -3.905700 -0.936461 0.5486000 2.6105100 -0.303720
## 2 0.214913 -1.469200 -0.289237 0.530904 0.9807980 -1.8608200 -0.260329
## 3 1.258940 -0.459047 0.236495 0.219122 0.0451856 -0.2709470 -0.144081
## 4 -0.602306 2.346140 0.246626 -0.666626 -1.0121300 -0.0633217 -0.455679
## 5 0.396745 0.575871 -0.809585 -0.373673 0.1655020 -0.5395460 0.786351
## 6 3.104320 -2.620390 -8.282950 32.762800 -26.0616000 5.9327800 5.437010
## PC294 PC295 PC296 PC297 PC298 PC299 PC300
## 1 -0.798181 0.478913 -1.2460100 0.796365 0.0845726 -1.4198000 0.172801
## 2 -0.493476 0.900055 -0.1344480 0.397938 1.0070100 0.9602260 -0.230228
## 3 -0.829300 -0.121166 1.1496600 -0.958856 0.8517270 -0.5905860 -0.110834
## 4 -0.840823 -0.674758 -0.0459401 0.461341 0.4481720 -0.0601338 0.250727
## 5 0.448526 -0.321390 -0.3594780 -0.674036 -0.0334604 0.7397040 0.590811
## 6 -5.968770 2.589400 -1.6682400 3.948190 -3.2865500 0.6164350 1.712460
## PC301 PC302 PC303 PC304 PC305 PC306 PC307
## 1 -0.02857240 0.784914 -0.111594 0.299842 0.1537890 0.257618 0.581506
## 2 0.00664791 -0.299704 -0.365714 0.161070 0.0510703 0.662804 1.036940
## 3 0.48116000 0.622439 -0.207147 0.375918 0.3409570 -0.491765 0.212827
## 4 -0.51127000 0.220587 0.334186 -0.752543 -0.0686399 -0.247551 0.710792
## 5 -0.08005430 0.672211 0.203599 0.177536 0.6002280 -0.438392 -0.123792
## 6 -1.55321000 -2.020450 -1.418340 -0.912601 0.8920440 0.499495 0.719343
## PC308 PC309 PC310 PC311 PC312 PC313 PC314
## 1 -0.912519 0.131409 0.297420 0.590261 -1.487690 -0.358880 0.11508000
## 2 -0.191389 -0.217103 0.644524 0.341326 -0.309193 -0.240516 0.12306700
## 3 0.656745 0.814825 -0.873942 0.326364 -0.447306 0.226318 0.10180300
## 4 -0.274965 0.646337 -0.385686 0.343867 0.224897 0.442280 -1.00193000
## 5 0.165050 -1.296820 0.112506 -0.108202 0.387461 -0.358981 -0.00209422
## 6 -1.782870 1.209840 -2.716690 -0.935250 0.423990 0.991128 -1.04000000
## PC315 PC316 PC317 PC318 PC319 PC320 PC321
## 1 -0.1361810 0.3707860 0.516543 -0.2106360 0.0629784 -0.2354070 0.0745866
## 2 0.2838960 0.0905737 0.497427 0.0790915 -0.1036950 0.0608285 0.0557892
## 3 0.2841800 0.4468840 0.175940 -0.2585500 -0.4107850 0.0991483 0.1526120
## 4 0.4180580 -0.3414670 0.120261 0.0687079 -0.3069000 0.1089410 0.2863580
## 5 0.1452500 -0.5249250 0.201887 0.2170580 0.4764040 -0.0473087 0.2351270
## 6 0.0775644 0.1263700 -0.248673 -0.3149170 0.7343540 0.4174750 0.6310480
## PC322 PC323 PC324 PC325 PC326 PC327
## 1 -0.26463200 -0.0522274 0.0492131 -0.4739130 -0.1011950 -0.02837090
## 2 -0.14297300 0.0105028 0.2550780 0.2915880 -0.1465930 -0.06746470
## 3 -0.27335000 0.1458950 0.6288990 0.1844770 0.0152865 0.10312400
## 4 -0.00865434 0.4698300 0.3435860 -0.1014130 0.1906390 0.00470319
## 5 0.19431100 0.0761079 0.0427247 0.0456039 0.3292730 0.01840060
## 6 -0.04306320 -0.1040430 -0.3736180 0.2089580 -0.2925900 -0.38677800
## PC328 PC329 PC330 Individual Pop_City Location Latitude
## 1 -0.06080890 -0.2694990 -1.75322e-06 255 Bengaluru India 12.9716
## 2 0.09760780 -0.2002560 -1.75322e-06 261 Bengaluru India 12.9716
## 3 -0.00548772 -0.0353852 -1.75322e-06 256 Bengaluru India 12.9716
## 4 0.02166250 0.0259966 -1.75322e-06 258 Bengaluru India 12.9716
## 5 -0.20685700 -0.0353230 -1.75322e-06 265 Bengaluru India 12.9716
## 6 -0.00352486 0.0335714 -1.75322e-06 262 Bengaluru India 12.9716
## Longitude Continent Year Region Subregion order order2 orderold
## 1 77.5946 Asia Unknown South Asia 60 52 52
## 2 77.5946 Asia Unknown South Asia 60 52 52
## 3 77.5946 Asia Unknown South Asia 60 52 52
## 4 77.5946 Asia Unknown South Asia 60 52 52
## 5 77.5946 Asia Unknown South Asia 60 52 52
## 6 77.5946 Asia Unknown South Asia 60 52 52
# # ____________________________________________________________________________
# # save the pca plot ####
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "MAF_1", "PCA_lea_native_italy_all_temporal_pc1_pc2_b.pdf"
),
width = 8,
height = 8,
units = "in"
)
PC1 and PC3
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22642
## column count: 362
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22642
## Character matrix gt cols: 362
## skip: 0
## nrows: 22642
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22642
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## BEN BER BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC
## 12 12 13 12 14 12 16 2 12 4 7 12 4 11 4 5 9 12 2 6
## KAG KAN KAT KLP KUN LAM MAT OKI PAL QNC ROM SIC SON SSK SUF SUU TAI TRE UTS YUN
## 12 11 6 4 4 9 12 12 11 11 4 9 3 12 6 6 7 12 12 9
Convert format
##
## - number of detected individuals: 353
## - number of detected loci: 22642
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.geno"
##
## - number of detected individuals: 353
## - number of detected loci: 22642
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.removed file, for more informations.
##
##
## - number of detected individuals: 353
## - number of detected loci: 22642
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.lfmm"
PCA for SNP Set 3 for italy_all_US
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
##
## -n (number of individuals) 353
## -L (number of loci) 22642
## -K (number of principal components) 353
## -x (genotype file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.lfmm
## -a (eigenvalue file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.pca/dapc_italy_all_and_US.eigenvalues
## -e (eigenvector file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.pca/dapc_italy_all_and_US.eigenvectors
## -d (standard deviation file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.pca/dapc_italy_all_and_US.sdev
## -p (projection file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.pca/dapc_italy_all_and_US.projections
## -c data centered
## * pca class *
##
## project directory: /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/
## pca result directory: dapc_italy_all_and_US.pca/
## input file: dapc_italy_all_and_US.lfmm
## eigenvalue file: dapc_italy_all_and_US.eigenvalues
## eigenvector file: dapc_italy_all_and_US.eigenvectors
## standard deviation file: dapc_italy_all_and_US.sdev
## projection file: dapc_italy_all_and_US.projections
## pcaProject file: dapc_italy_all_and_US.pcaProject
## number of individuals: 353
## number of loci: 22642
## number of principal components: 353
## centered: TRUE
## scaled: FALSE
Test
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
##
## -n (number of eigenvalues) 353
## -i (input file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.pca/dapc_italy_all_and_US.eigenvalues
## -o (output file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.pca/dapc_italy_all_and_US.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)
Get values
# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
Check R symbols for plot
#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,33:127)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
scale_shape_manual(values=good.shapes[1:N]) +
geom_point()
separate out 1990s Italian pops into own region
sampling_loc <- read.csv(
here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_all_US_temporal.csv"
))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_italy_all_US_temporal.rds"))
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global","lea", "sampling_loc_italy_all_US_temporal.rds"))
head(sampling_loc)
## Pop_City Location Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.92910 Americas BER 2018
## 2 Palm Beach USA 26.70560 -80.03640 Americas PAL 2018
## 3 Brescia Brescia 45.53373 10.20445 Europe BRE 1995
## 4 Cesena Cesena 44.15287 12.24427 Europe CES 1995
## 5 Desenzano Desenzano 45.46289 10.54914 Europe DES 1995
## 6 Bologna Bologna 44.48478 11.36658 Europe ITB 2017
## Region Subregion order order2 orderold
## 1 United States 1 NA 75
## 2 United States 3 NA 77
## 3 Italy (1995) West Europe 20 12 12
## 4 Italy (1995) West Europe 24 16 16
## 5 Italy (1995) West Europe 21 13 13
## 6 Italy (modern) West Europe 23 15 15
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 40 Levels: BEN BER BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ... YUN
## [1] 40
Check the regions
## [1] "United States" "Italy (1995)" "Italy (modern)" "East Asia"
## [5] "South Asia" "Southeast Asia"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 BEN -37.2163 -6.90353 8.16388 1.015140 -2.97139 1.216970 2.13081
## 2 BEN -38.1887 -8.46262 9.77705 1.759510 0.60285 0.718323 3.56887
## 3 BEN -38.9790 -8.01976 8.76755 1.819310 -1.70063 0.935598 3.33746
## 4 BEN -38.0194 -8.22571 10.65540 0.928921 1.01601 1.707930 3.33731
## 5 BEN -37.4027 -7.80192 9.28229 3.267360 -2.53762 1.591770 4.66121
## 6 BEN -37.4843 -8.34769 10.70030 1.298730 -1.92420 0.541393 2.62710
## PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15
## 1 0.5684750 2.02544 -7.37037 6.38881 0.520803 7.69074 -3.68301 1.65370
## 2 -0.4230660 2.12577 -8.71463 6.16739 0.829171 7.87555 -5.71411 4.18477
## 3 -0.8433900 3.22544 -7.93577 5.46541 0.356936 10.77630 -4.93038 3.19678
## 4 0.4307510 3.59423 -6.98231 6.08042 3.081570 9.18732 -4.72208 5.23138
## 5 0.9992170 2.46233 -6.41999 6.78189 -0.472345 9.88053 -3.36828 1.86085
## 6 -0.0327903 2.74647 -9.73923 4.67111 0.448524 9.52015 -6.28360 4.53345
## PC16 PC17 PC18 PC19 PC20 PC21 PC22
## 1 1.057740 0.325190 -2.627180 -1.411280 -0.812558 0.822262 -1.825470
## 2 0.626415 -0.900056 -2.301140 0.190287 -2.100780 -0.682164 -0.916025
## 3 -1.313700 -1.368550 0.318453 0.770833 -0.186347 -1.338750 -1.464200
## 4 0.423200 0.223283 -1.895550 -0.951273 1.353340 0.160229 -3.740640
## 5 -1.240070 0.272152 -2.009790 -1.818460 -0.832699 -1.632900 0.949766
## 6 1.500170 -0.395267 -2.100190 -0.170254 0.542526 -2.972270 -4.421940
## PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30
## 1 0.2692770 -0.313204 2.275700 -2.66022 1.861520 4.67830 5.329780 9.38405
## 2 0.7452640 0.349514 0.348397 -4.04510 -0.845730 8.25081 6.409090 9.06703
## 3 0.2266230 1.009580 3.223050 -1.67743 -0.471676 3.85200 0.658986 5.68706
## 4 0.3508680 -1.300530 0.366478 -2.86441 -1.131490 5.12790 0.581550 7.25504
## 5 0.0403922 0.597904 -0.475996 -3.05369 0.221962 5.79812 3.803570 6.04938
## 6 2.9205000 4.985500 5.500240 -1.63308 -0.399567 9.70931 7.013580 11.14980
## PC31 PC32 PC33 PC34 PC35 PC36 PC37
## 1 -0.590222 2.622320 -3.53950 1.114820 -2.001980 4.409940 0.397127
## 2 -0.468867 -0.410797 -3.82882 2.299420 -5.857600 3.508330 -0.162496
## 3 -2.213030 5.280780 -3.79915 1.054540 -2.789350 5.472000 -0.159691
## 4 -1.352950 0.219452 -1.26560 -0.458891 -0.435605 0.910645 1.595290
## 5 -2.483460 3.640380 -2.92439 -0.730481 -4.511460 4.320720 -1.012690
## 6 -0.934066 5.314380 -6.94339 -0.098423 -7.024860 11.169800 -6.095850
## PC38 PC39 PC40 PC41 PC42 PC43 PC44 PC45
## 1 0.395983 -9.57939 -7.43704 7.239610 2.504920 -0.759847 -5.22493 3.40859
## 2 -0.352294 -11.82960 -11.19050 9.416400 0.278659 -5.999260 -7.15720 7.44748
## 3 -1.235990 -7.42145 -3.76834 2.079530 -1.904390 -1.981260 -4.74069 4.14637
## 4 -0.166100 -7.98883 -3.14392 4.433250 0.852985 -3.483240 -7.57404 1.96215
## 5 -2.503830 -9.17584 -4.84570 0.468827 2.982720 2.120710 -5.19782 2.03000
## 6 -7.941000 -29.42410 -8.62099 8.761730 1.911680 -1.126970 -7.09627 17.12020
## PC46 PC47 PC48 PC49 PC50 PC51 PC52
## 1 2.090570 -5.77720 -1.189860 -2.29581 0.318810 -2.08005 -2.191190
## 2 2.152260 -9.49303 0.520355 -3.18648 -3.929510 -1.69894 -2.595630
## 3 3.511120 -4.69953 -3.713010 -4.54833 2.151300 -1.58466 3.035130
## 4 1.916040 -4.73867 -0.566523 1.22846 0.567066 -1.54684 0.428433
## 5 0.602696 -6.50677 -3.278270 -2.82465 -2.895220 2.26975 1.829660
## 6 11.857900 -10.71730 -10.966300 -16.49510 13.759100 5.21100 4.245830
## PC53 PC54 PC55 PC56 PC57 PC58 PC59
## 1 4.954960 -2.37780 0.896697 5.395320 -1.145590 3.564010 4.524750
## 2 0.727126 -1.18380 -3.342650 -0.150400 5.392310 1.196610 -3.677540
## 3 0.354250 -1.79022 -0.877577 4.954680 0.556605 -0.534725 1.182060
## 4 4.543530 -2.27671 1.616000 1.550080 3.946260 0.625859 -0.564126
## 5 1.885540 -2.71967 0.819113 0.963668 -0.251802 2.420420 -3.017930
## 6 3.951710 -3.01572 -3.935450 -4.748690 0.830369 6.884580 0.388583
## PC60 PC61 PC62 PC63 PC64 PC65 PC66
## 1 -0.8263280 1.81974 2.431430 -0.0643969 5.55498 1.467660 -1.481830
## 2 4.2603700 -0.50301 1.587340 9.3115500 24.78930 -5.398620 -2.891800
## 3 1.8544800 3.69886 -1.483870 1.6366100 5.74501 -4.580270 -1.837320
## 4 -1.5422500 -1.47563 -0.427616 0.0958617 2.36394 1.228640 0.898035
## 5 -0.0808026 -3.34171 -0.117048 4.0755800 6.48018 -0.246435 -2.077610
## 6 6.6604400 3.12734 0.171965 -9.6483300 -31.62390 8.028320 9.381630
## PC67 PC68 PC69 PC70 PC71 PC72 PC73
## 1 -4.164680 5.61138 3.83808 -2.128040 -0.238589 -0.193283 2.562090
## 2 -11.437800 15.92480 10.77720 5.812610 -9.094440 -3.195210 4.952080
## 3 -0.726276 4.80709 6.07589 6.022580 3.459730 4.732810 -1.252730
## 4 -9.353560 11.07980 6.58152 -0.119536 1.329850 0.602433 0.284996
## 5 -7.027850 7.80645 4.26135 0.089026 -2.194810 2.416980 -2.026810
## 6 21.644600 -13.80390 -15.37180 -13.442600 9.503160 -7.263210 1.913940
## PC74 PC75 PC76 PC77 PC78 PC79 PC80
## 1 0.958617 2.73364 0.451171 -1.822620 -0.182561 5.19936 0.509525
## 2 -3.655770 4.69057 -7.160320 0.509124 0.973660 1.24334 -18.565300
## 3 -0.527297 -5.21244 -0.237189 -0.149916 -5.155280 1.92483 7.837730
## 4 -6.093060 6.15596 6.629350 -5.568830 4.620180 3.79086 8.497740
## 5 -2.007370 -1.22402 2.184120 -0.407072 4.365160 -5.47635 3.338510
## 6 4.064630 -11.47350 4.288710 4.533900 -0.786008 -1.17659 -4.751030
## PC81 PC82 PC83 PC84 PC85 PC86 PC87
## 1 6.00692 4.448750 -0.177065 -6.95261 -2.195040 -6.588350 -0.603661
## 2 -5.45099 -6.672300 -25.049500 5.37501 -3.476640 12.910200 9.164480
## 3 5.36210 -2.170700 1.175030 -1.73433 1.102810 -7.643620 -2.121850
## 4 2.81163 3.213330 3.649690 -3.38869 -1.975100 0.826779 -0.434818
## 5 -1.17468 2.968970 4.821630 -5.96955 -0.746094 -2.057260 -3.501540
## 6 -4.26167 0.785808 -0.277082 1.04392 7.450440 11.629900 1.364960
## PC88 PC89 PC90 PC91 PC92 PC93 PC94
## 1 5.414430 0.013979 -7.48452 0.604923 -6.464180 -0.227803 9.92392
## 2 -3.135480 10.394300 12.25910 2.177440 -17.264200 10.370700 2.87782
## 3 5.473760 0.921444 -3.14565 -2.906390 10.198800 -1.234940 -3.32567
## 4 4.239260 -2.228360 -3.70182 -4.045790 4.185270 -11.283600 -3.71541
## 5 -0.630829 -6.814160 2.20122 -0.038764 -4.362270 -7.512580 -1.73986
## 6 -3.797450 3.386640 5.68336 -1.140120 0.343172 3.261860 2.55321
## PC95 PC96 PC97 PC98 PC99 PC100 PC101 PC102
## 1 9.26586 -5.553240 6.405000 7.71460 1.578660 3.86968 -4.16344 -4.629450
## 2 -2.91785 -3.076880 1.792370 -8.33705 -0.705961 -3.03898 -3.44964 -1.440070
## 3 1.46641 4.765990 -0.495241 -3.93044 4.082810 -1.04107 1.32525 -1.015940
## 4 4.88287 1.909630 -13.020100 12.89230 4.879510 4.60853 -1.34667 1.444590
## 5 -4.95794 0.840153 -2.639320 16.81240 -0.852836 2.29255 -2.58181 -4.006070
## 6 5.63180 -1.502850 1.442360 -1.49457 1.237340 1.95710 7.42489 0.815122
## PC103 PC104 PC105 PC106 PC107 PC108 PC109 PC110
## 1 9.847060 4.8570900 -1.49122 -3.16346 8.35051 -7.86179 -4.939470 -14.34130
## 2 -5.080550 -1.6806900 3.14699 3.41234 -5.48929 5.30144 4.239760 4.70956
## 3 -5.939040 -13.0688000 -4.61475 5.21346 5.81333 -6.32581 -5.191790 3.16850
## 4 10.532900 -2.8967000 4.32134 -9.25436 -7.55576 2.95399 0.543659 8.26517
## 5 2.880730 2.7698900 2.40714 2.61896 6.88885 -6.74691 -1.607330 -2.02634
## 6 -0.813424 -0.0401658 -3.34307 2.14064 0.32628 1.01315 2.053410 1.38524
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## 1 2.813680 -1.215830 -4.616600 -12.32540 1.4200400 -3.335950 -2.194070
## 2 -3.656750 2.254870 -4.340980 1.05799 0.9666860 5.049780 -6.964390
## 3 1.780190 12.550200 8.518010 -6.79152 -6.4443700 -0.924222 -2.078180
## 4 3.564250 -7.616980 2.356720 1.48668 1.5370100 12.315200 1.249730
## 5 -2.931870 1.525940 -0.646879 -10.68990 -0.9625750 -8.422430 7.790970
## 6 -0.892481 -0.754794 0.484680 1.77637 -0.0906339 -2.070330 -0.843481
## PC118 PC119 PC120 PC121 PC122 PC123 PC124
## 1 -1.517170 11.09280 -1.07037 5.999500 -12.01400 -2.401110 3.324020
## 2 1.095350 -2.07567 -2.52639 0.986412 1.93845 2.775770 -0.389459
## 3 -10.150900 -7.12411 3.62448 -4.445940 4.95399 -3.153290 9.177360
## 4 5.372170 3.15508 0.14191 -5.484870 -2.75911 -0.381302 11.563700
## 5 -1.834240 -2.59692 2.84946 -5.990300 3.67164 -7.873220 -5.824640
## 6 0.200012 2.03898 2.95781 -1.131490 1.93482 -2.615500 -2.845660
## PC125 PC126 PC127 PC128 PC129 PC130 PC131
## 1 5.819390 -0.184176 -9.95533 -7.791950 0.222528 -7.367430 -3.82414
## 2 -4.275520 -0.156791 4.53326 2.181980 -3.652290 -6.869180 5.46631
## 3 -0.651213 -1.687460 -11.28530 -3.282510 11.855900 12.340000 -2.63842
## 4 0.448775 -4.826850 3.99225 0.146227 4.174790 -3.065300 17.56640
## 5 -2.044640 -8.998780 -11.52720 -2.180850 -5.849920 2.257200 -3.70467
## 6 1.043160 -0.170034 1.60509 -1.560590 -1.995800 -0.411025 2.51800
## PC132 PC133 PC134 PC135 PC136 PC137 PC138
## 1 -1.517100 2.02710 13.197500 6.910530 9.58284 1.504540 4.62906
## 2 0.458472 5.06087 -3.139790 -1.852510 -3.16872 0.101584 -0.57667
## 3 10.638600 -1.48781 0.696223 0.476868 7.76636 6.271820 -6.82674
## 4 -7.274210 3.05790 7.581370 -2.686990 14.64790 -10.996300 2.58538
## 5 -0.836249 -13.62470 5.703340 3.110320 5.70559 -2.291810 6.24592
## 6 0.320501 2.33693 2.097010 2.409500 -2.71610 -1.073870 1.74500
## PC139 PC140 PC141 PC142 PC143 PC144 PC145
## 1 -4.872290 -1.291600 -1.870690 4.475900 -2.00113 10.11170 1.32238
## 2 -0.874231 -0.611146 7.191280 0.649931 2.33304 6.18937 -1.19040
## 3 1.443740 11.043400 -0.293550 -6.267920 16.97040 -8.07867 -6.61166
## 4 -5.814640 -2.796800 -21.454500 -2.731940 4.55432 2.23595 -0.85853
## 5 11.926800 3.954420 -7.137120 1.038880 -13.61840 2.54991 7.13757
## 6 -1.090150 0.526904 0.864949 -0.346388 1.39443 0.45065 1.35301
## PC146 PC147 PC148 PC149 PC150 PC151 PC152
## 1 -1.836910 -2.6950000 0.9994530 -3.72830 -12.221500 5.772740 -1.73444
## 2 -1.120740 -4.1102800 0.0942148 -1.55061 3.695220 0.553763 -1.71085
## 3 7.738200 -0.0127532 -0.4983650 2.25453 0.294455 11.570100 -4.45945
## 4 5.118430 1.7560400 1.0225900 6.86183 6.374410 -13.516400 5.90705
## 5 -4.475320 -0.8183350 7.3425100 -4.87818 -5.551660 3.412620 1.08903
## 6 0.369585 -1.2836200 0.9649440 -3.31766 0.602201 1.625320 1.74354
## PC153 PC154 PC155 PC156 PC157 PC158 PC159
## 1 -17.90740 -5.202690 5.948770 4.352760 -7.03850 3.721990 -2.53728
## 2 4.57882 -2.035130 1.097780 -1.403490 -2.30925 -0.547093 -5.01209
## 3 -2.70191 -0.463896 -5.194520 11.143000 -16.59010 6.087970 8.52568
## 4 -5.39607 -1.034730 13.884800 -1.188220 1.31077 9.279310 6.27999
## 5 1.52049 10.034000 -0.660857 -9.585580 -3.52619 -8.759880 10.72900
## 6 3.28944 1.946040 -1.082770 -0.161758 2.02381 1.520940 -1.02833
## PC160 PC161 PC162 PC163 PC164 PC165 PC166
## 1 9.286490 -9.80941 0.0634464 3.571930 -6.132830 4.235850 3.480150
## 2 1.236520 -2.21464 -7.0336700 -1.324130 1.166360 2.326180 0.219571
## 3 0.248725 3.63218 2.1174100 9.625760 -1.044560 -10.892000 3.389200
## 4 -2.620920 4.13362 7.7879300 5.602260 7.883490 7.553580 -3.489980
## 5 8.114180 -6.67751 -2.6727900 10.302600 8.936990 3.837210 9.753110
## 6 1.227530 1.03112 2.1029300 0.466882 -0.194435 -0.195545 -2.296970
## PC167 PC168 PC169 PC170 PC171 PC172 PC173
## 1 -13.885100 2.28453 15.83310 -5.623020 -5.10345 11.89280 -3.83828
## 2 -1.301960 1.23902 -2.80288 1.054320 -1.04918 -1.81956 -3.33645
## 3 7.104330 -1.00865 -1.28397 -4.899100 -10.42260 5.35418 -6.82687
## 4 12.771300 -1.97726 9.16515 -5.071190 16.88840 -6.48527 10.52040
## 5 4.548820 2.43470 -16.38980 -19.603700 -4.55623 -5.94214 -6.49890
## 6 0.225724 -2.11463 -1.58292 0.135736 -1.70491 -1.45946 -1.01384
## PC174 PC175 PC176 PC177 PC178 PC179 PC180
## 1 4.118520 8.078020 5.5310600 6.064530 6.878710 5.79570 -5.64530
## 2 -3.883720 0.636334 0.7006900 -0.405249 -2.239920 -1.10821 -1.59937
## 3 5.880920 9.774880 14.9522000 1.395240 7.712630 -4.14718 3.70241
## 4 -4.617630 -10.420200 9.6121900 12.939200 0.630583 -6.50927 5.83060
## 5 -0.427483 -5.076660 -1.6409300 2.821980 -7.651990 9.41352 -10.88510
## 6 -2.940440 -1.401290 0.0991526 2.884440 1.868630 -2.89713 2.92469
## PC181 PC182 PC183 PC184 PC185 PC186 PC187
## 1 -4.232930 -10.011700 -6.900140 -7.28992 -8.52278 -9.221240 -15.849700
## 2 0.367756 -0.723134 2.786030 2.18826 5.27580 1.031870 0.758629
## 3 -12.397500 3.606680 -1.200270 4.70236 -2.17793 -1.670930 15.494500
## 4 8.520050 -2.173560 0.606372 7.70590 4.36108 -0.644644 7.131180
## 5 -6.071170 -7.321510 -17.684600 4.75439 14.73380 -4.374830 3.541290
## 6 1.631840 -1.592960 -0.082407 2.27033 3.60718 -0.237748 -1.583130
## PC188 PC189 PC190 PC191 PC192 PC193 PC194
## 1 1.858910 -14.540400 7.463540 -3.40255 -7.90779 -5.510220 2.836420
## 2 0.842773 1.966160 -0.766134 6.77392 -1.67406 -0.471702 3.168310
## 3 -5.628550 -3.069020 -6.544690 1.29123 -18.02510 -6.845210 18.699600
## 4 2.896110 0.170743 -2.430260 -12.04440 -9.58860 3.117910 -3.227580
## 5 -5.221950 2.308260 23.749200 1.19344 -6.49064 24.275400 -6.881190
## 6 -1.254430 -2.052290 0.536871 2.47689 3.41933 -1.140760 0.470326
## PC195 PC196 PC197 PC198 PC199 PC200 PC201
## 1 -5.35120 1.8103600 6.842580 6.201950 6.92850 -5.49036 7.270970
## 2 3.00008 0.5952670 1.789050 -0.458206 1.15611 -3.66120 -1.524690
## 3 5.67847 -18.1178000 2.453550 1.633080 -3.38287 6.39387 -11.282700
## 4 7.48080 0.1467490 8.882330 -2.151750 1.86752 10.29380 -3.301030
## 5 -15.42870 -0.0696018 -10.162800 -15.774900 12.47940 1.10893 -12.733700
## 6 -0.18008 0.7341080 0.862116 1.880360 -1.91350 -1.12129 0.238685
## PC202 PC203 PC204 PC205 PC206 PC207 PC208
## 1 -12.462000 -4.778000 -18.035900 -0.532531 -9.1278400 9.715630 -0.490794
## 2 -1.121240 3.152790 4.485210 -1.588600 2.6936100 -0.265028 3.652950
## 3 6.566590 3.958870 0.279617 7.692290 3.5672300 -6.330670 -0.627159
## 4 3.758650 -2.192670 6.208360 2.795050 -2.1705800 15.685800 12.157100
## 5 12.713200 2.427950 4.088340 7.799830 2.9429500 -8.044550 -4.305950
## 6 0.468914 -0.850908 1.368320 1.401280 -0.0735814 -0.125449 1.217980
## PC209 PC210 PC211 PC212 PC213 PC214 PC215 PC216
## 1 -3.513740 3.54397 0.387571 -8.82160 -5.607230 -7.51641 9.35403 5.879680
## 2 -0.547561 2.43223 -0.115685 2.52513 3.542480 1.53285 -3.91949 -2.502880
## 3 13.251000 8.07358 -12.135400 10.15140 6.645870 5.15132 22.14520 -0.149686
## 4 14.197100 7.63045 13.660500 -7.23077 14.946300 3.46075 1.00408 -8.565910
## 5 -3.101940 4.10934 -6.322110 1.98161 -5.087440 3.28440 -12.12970 -0.572817
## 6 0.898895 -3.03037 -0.278060 2.67619 -0.484659 1.74179 2.35686 -1.467420
## PC217 PC218 PC219 PC220 PC221 PC222 PC223
## 1 19.26480 -3.137400 11.071400 12.870400 0.6398130 15.06190 -7.59028
## 2 5.22980 0.542928 -0.760684 -1.837190 -3.5223500 -3.32979 -2.54436
## 3 -4.02030 -4.666050 5.202530 -3.887920 -3.9502300 -3.45665 -4.47217
## 4 8.56317 6.649540 3.872890 -0.284108 8.4878100 -13.31170 7.53663
## 5 -11.88450 -8.144070 -0.525392 8.363680 7.0502000 -0.24391 7.37316
## 6 1.61979 1.787520 -3.385790 0.427645 -0.0499479 -0.25317 -1.00382
## PC224 PC225 PC226 PC227 PC228 PC229 PC230
## 1 -8.084790 -11.51260 -3.937810 5.87411 -5.462010 2.14933 1.695830
## 2 -5.009810 -4.07868 2.953850 2.36136 -3.607180 2.04615 -0.831913
## 3 0.973225 -3.51610 -9.974220 -15.96100 -2.225650 -5.37068 -3.130120
## 4 -13.264000 8.38465 -4.163090 7.01938 -0.106498 1.82626 5.171280
## 5 3.195760 7.03553 4.082500 4.38256 3.189240 -3.53044 4.546750
## 6 -0.613000 4.03901 -0.884451 3.22195 2.753130 1.76970 0.466559
## PC231 PC232 PC233 PC234 PC235 PC236 PC237
## 1 1.553860 4.782800 -2.749260 -1.755180 2.863740 3.936750 6.0190400
## 2 -0.554079 -2.683960 -2.205200 2.327890 -0.794698 -2.039530 -4.7288600
## 3 -1.777080 -7.009510 3.806610 6.175080 -0.279089 -2.644020 -7.5750100
## 4 1.190240 3.605350 4.879270 -0.953465 -5.975150 8.113830 -3.1210700
## 5 -3.606990 5.225810 2.335190 -1.474530 0.259147 -0.596752 -4.3258300
## 6 0.567009 -0.536693 0.116808 -1.449060 -1.321010 -0.473417 -0.0846443
## PC238 PC239 PC240 PC241 PC242 PC243 PC244 PC245
## 1 -1.404510 5.60743 8.38844 -10.578200 -3.051890 5.500160 2.06766 1.14805
## 2 5.396560 -3.71861 -1.26536 2.768260 -4.888940 -3.971290 -2.00073 -1.30643
## 3 -4.153380 -1.31689 5.67011 -1.000720 7.487610 -1.052430 1.62645 -3.22069
## 4 1.788340 -8.19962 -3.20668 2.451300 -6.313990 -0.160164 5.35240 2.39894
## 5 0.927021 6.15589 -1.15607 2.413450 -1.357190 -5.044420 -4.46584 1.77346
## 6 -1.182870 -1.12245 -2.62583 -0.753323 -0.798388 -0.989399 -1.66093 2.58336
## PC246 PC247 PC248 PC249 PC250 PC251 PC252
## 1 3.767430 -2.647600 3.49967 -4.310880 5.14785 -4.961910 0.8333450
## 2 -2.770960 -2.242400 7.05074 -4.556240 4.37798 2.057740 -2.7946600
## 3 -5.211310 -1.750010 -3.45672 4.118110 -3.59872 0.185661 0.1621490
## 4 2.353740 -0.527736 -2.51637 3.154160 3.15777 4.508400 -1.3894500
## 5 -4.113550 -2.481920 2.19272 -0.951083 -2.42434 -0.968350 -0.0275769
## 6 -0.427994 -1.246580 1.75902 -0.277301 -2.47773 -1.489000 -0.6051430
## PC253 PC254 PC255 PC256 PC257 PC258 PC259
## 1 1.854340 3.13650 -0.664829 0.693134 3.361710 1.045370 2.326450
## 2 -0.637841 1.42913 2.944340 4.867670 0.522718 -0.724822 9.524000
## 3 -3.735260 1.58096 -1.438430 -0.637720 3.342480 -1.603180 -1.239580
## 4 7.226070 -2.70647 -3.467850 0.287083 -2.452230 3.241980 -0.614516
## 5 -2.384700 -1.79120 -1.706680 1.994750 -3.487750 -0.333868 0.502889
## 6 1.605310 2.65217 -1.368280 0.835537 1.831050 -1.047120 -1.092050
## PC260 PC261 PC262 PC263 PC264 PC265 PC266
## 1 -2.777920 -1.95322000 -2.07877 3.195600 1.345990 0.275427 -2.559010
## 2 7.989790 1.62617000 -7.60646 0.835935 -9.781800 0.590346 15.608200
## 3 3.265430 0.44738300 0.48502 1.386520 0.846490 1.482330 1.789920
## 4 -3.071270 1.70066000 1.04902 1.217850 -2.983800 -1.021330 1.205720
## 5 0.902227 0.00190388 -2.03280 -1.769550 -0.307306 0.582840 -0.509289
## 6 1.204670 -2.69179000 1.10412 0.537858 1.179470 1.118340 -0.719916
## PC267 PC268 PC269 PC270 PC271 PC272 PC273
## 1 -2.143000 0.863042 2.778440 0.136910 -1.467990 -0.711070 -0.601976
## 2 -20.080400 27.223100 -6.868710 4.004540 -12.531500 4.774630 4.627560
## 3 -3.104600 2.450220 0.212684 0.821071 -2.337130 -0.119570 1.141260
## 4 0.916439 -3.144310 -0.736625 -1.191730 -2.186240 -0.803661 -1.491480
## 5 -0.646843 -0.637993 1.081910 -0.639125 0.359545 -0.338795 1.158290
## 6 2.016850 -0.178199 0.759125 -2.812540 -0.594396 -1.094580 -0.428685
## PC274 PC275 PC276 PC277 PC278 PC279 PC280
## 1 1.4780100 0.823670 -1.714060 -0.67909 -1.5034000 4.8798000 1.340700
## 2 0.3553940 -13.032400 -14.587000 4.76811 -6.6642900 10.7046000 -6.946140
## 3 -2.3262700 0.184944 2.053030 -3.68246 -0.0973468 -0.0198046 -0.174498
## 4 0.7328620 2.012800 -0.843853 2.44489 -1.3144300 -0.4585440 0.346058
## 5 -0.2701790 -1.378740 0.557797 1.05570 -0.7874850 -1.6605900 -0.342479
## 6 -0.0309142 0.667012 0.401887 2.09692 0.1748000 0.0920300 0.182606
## PC281 PC282 PC283 PC284 PC285 PC286 PC287
## 1 0.73774800 -0.219410 -0.5817050 -0.914899 0.552053 -0.749923 -0.0673759
## 2 -6.68965000 -5.031680 5.1597100 -2.424530 -2.720410 -2.685180 -0.4999390
## 3 -0.00611086 0.374657 -2.0810500 -2.684240 -2.272230 -2.613870 -1.1641700
## 4 0.97405400 1.765050 0.2351030 -0.813811 -0.250675 -0.863614 -0.1390070
## 5 -3.11698000 -0.793641 -1.4055100 1.493910 -0.186053 -1.002550 0.1591750
## 6 -0.78684400 -1.379630 0.0313976 -1.351390 -1.358130 -0.467317 1.1953000
## PC288 PC289 PC290 PC291 PC292 PC293 PC294
## 1 0.0877016 0.0127312 -0.525059 -1.15551 0.971415 -0.0652187 2.0796200
## 2 0.7871490 3.4397300 -0.929990 -5.32491 -4.162390 -2.0194400 1.5909600
## 3 0.8057580 -1.3788600 0.852070 -1.61029 1.228690 0.5189210 1.0591500
## 4 -0.2088700 -1.8083600 1.809490 -1.00467 -1.324430 0.3877700 -0.0342288
## 5 -0.4940000 1.3250200 -2.141980 -1.87450 0.797641 0.9049020 1.4598000
## 6 -1.8641400 2.1354400 -1.734800 2.29779 -7.476230 -3.6850200 -0.9018080
## PC295 PC296 PC297 PC298 PC299 PC300 PC301
## 1 1.071730 1.3868200 0.0542372 -0.4815720 0.0264312 -1.983140 0.274870
## 2 -1.104370 -2.7377700 -1.1016000 -4.1093200 -2.5193700 -0.312920 -1.207590
## 3 -2.240230 0.3100860 -0.3687790 -1.0236800 -0.2783530 -1.164010 -0.369251
## 4 -0.101127 0.7541810 0.0470820 -1.6237800 0.8645190 0.181799 -0.813301
## 5 -0.410506 0.0671651 -1.9652200 -0.0239154 0.1945640 1.038650 -0.250763
## 6 -0.275706 -2.2459900 -7.3447300 2.1935900 8.0920400 1.262790 -5.090690
## PC302 PC303 PC304 PC305 PC306 PC307 PC308
## 1 -1.458250 0.709720 -0.0421059 0.407448 0.7782790 -0.7052840 2.2297900
## 2 0.725306 1.298510 -1.8290600 -4.483180 1.4911100 -2.0684600 2.0522900
## 3 0.122073 -0.604087 0.5332070 1.050690 0.7760090 0.0206425 0.0605125
## 4 0.476817 -1.173360 -0.4931780 0.412802 0.0402457 -0.8641810 0.7106590
## 5 -0.781629 0.257777 -0.0882721 0.395462 0.3311180 -0.8031150 -0.8485220
## 6 -1.147150 -2.120710 -9.1826800 -10.265700 5.8447000 -13.4441000 0.0841197
## PC309 PC310 PC311 PC312 PC313 PC314 PC315
## 1 -0.267768 -0.8472460 -0.314386 -0.986070 0.553302 0.101887 0.292182
## 2 -1.573330 0.5858130 1.393560 1.970490 -0.465429 2.210850 -0.145276
## 3 0.300007 -0.0108822 -0.100460 -0.327450 1.493130 -0.710889 -0.210780
## 4 -0.689473 0.5151830 -0.599547 -0.048379 -0.942814 0.947402 0.224050
## 5 0.270386 -1.5381500 -1.361500 -0.536435 0.384209 0.242467 -0.828820
## 6 36.385600 13.4505000 8.969700 -3.973680 3.632820 2.349070 -1.537220
## PC316 PC317 PC318 PC319 PC320 PC321 PC322
## 1 -0.0578941 0.925388 0.612744 -0.511085 0.74431400 0.2642700 0.573491
## 2 1.2308400 0.763512 0.197456 1.483330 -0.02875870 -0.0537602 -0.802819
## 3 -0.7862020 -0.946648 -1.213510 0.452176 0.02074090 0.4314920 -0.409122
## 4 -0.1229940 -0.436630 0.167505 -0.648077 0.76464900 -0.0972988 -0.766162
## 5 0.3399550 0.436367 -1.076140 -1.207180 -0.00981003 0.0539852 0.526952
## 6 2.0425900 3.318330 3.219610 -0.500412 1.61449000 -1.7660700 2.150020
## PC323 PC324 PC325 PC326 PC327 PC328 PC329
## 1 -0.996313 0.2486230 -0.573440 0.3714150 0.234298 0.6489650 0.303600
## 2 -0.493265 -0.0437834 -0.176258 0.3449410 0.481877 -0.7397780 -0.228351
## 3 -0.223398 -0.6961550 -0.299019 -0.6208470 0.113228 0.5344440 0.968375
## 4 -0.130816 -0.2181390 -0.514174 -0.0958358 -0.546446 0.1743940 -1.256220
## 5 -0.150911 -0.0108711 -0.221861 0.4702230 1.031290 -0.0843454 -0.547098
## 6 -1.039850 0.8745920 -0.942692 0.3012320 0.716169 -1.5554400 1.768970
## PC330 PC331 PC332 PC333 PC334 PC335 PC336
## 1 -0.118850 0.2704280 -0.184674 -0.0537026 0.268162 0.532530 -0.6034540
## 2 0.797714 -0.1556930 -0.525773 1.2804600 -0.354151 0.941927 -0.0285140
## 3 -0.538072 0.3225270 -0.345629 0.2606370 0.196721 0.426817 -0.0717353
## 4 -0.215704 0.0500965 0.153409 -0.3820600 -0.259605 0.134653 -0.0705485
## 5 0.566725 -0.1695040 -0.310048 0.2401800 -0.253752 0.357621 -0.0620567
## 6 -0.699025 1.7625100 0.972912 -0.7788220 0.965009 1.320090 1.2295900
## PC337 PC338 PC339 PC340 PC341 PC342 PC343
## 1 -0.6487000 -0.3366380 -0.1954530 0.150603 -0.0493815 -0.3354550 0.1317770
## 2 0.1047380 -0.3295320 -0.3829760 -0.272557 0.1843130 -0.0238051 -0.1792370
## 3 0.4291140 -0.4008970 -0.0405974 -0.346916 -0.0321387 0.3521720 0.1974460
## 4 0.0210873 0.6379810 0.3861810 -0.355886 -0.2158510 -0.5362330 -0.5423710
## 5 0.4152080 -0.0417838 -0.2362560 -0.242030 -0.2350190 0.0032567 0.0690464
## 6 0.3178510 -0.0229162 0.7649860 -0.148682 0.3228920 -0.6665610 0.0555563
## PC344 PC345 PC346 PC347 PC348 PC349 PC350
## 1 0.1981880 -0.3480010 0.1173250 -0.0416354 0.0267634 -0.3073740 -0.2022120
## 2 -0.0380013 -0.1843660 -0.1410460 0.0572115 0.4692510 -0.1033020 -0.0375376
## 3 -0.0204594 -0.2057300 -0.1673220 -0.6778110 0.0527420 0.0355308 0.0356868
## 4 -0.2253400 0.0426583 0.1013450 -0.2063460 0.2040370 0.2990810 -0.0189545
## 5 0.0388593 -0.0861736 -0.0923959 -0.3225470 -0.2266730 -0.1478300 -0.0452171
## 6 -0.4401270 -0.1043480 0.0843801 0.2537730 -0.2020580 -0.2614700 -0.3745610
## PC351 PC352 PC353 Individual Pop_City Location Latitude
## 1 0.00836228 -0.0332632 2.42165e-07 266 Bengaluru India 12.9716
## 2 -0.07021830 0.2772360 2.42165e-07 255 Bengaluru India 12.9716
## 3 -0.02586280 0.0257898 2.42165e-07 256 Bengaluru India 12.9716
## 4 -0.21985700 0.0395047 2.42165e-07 265 Bengaluru India 12.9716
## 5 0.08758560 0.1838020 2.42165e-07 261 Bengaluru India 12.9716
## 6 -0.01765120 -0.0508445 2.42165e-07 262 Bengaluru India 12.9716
## Longitude Continent Year Region Subregion order order2 orderold
## 1 77.5946 Asia Unknown South Asia 60 52 52
## 2 77.5946 Asia Unknown South Asia 60 52 52
## 3 77.5946 Asia Unknown South Asia 60 52 52
## 4 77.5946 Asia Unknown South Asia 60 52 52
## 5 77.5946 Asia Unknown South Asia 60 52 52
## 6 77.5946 Asia Unknown South Asia 60 52 52
genotype <- here(
"euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.vcf"
)
d <- read.vcfR(
genotype
)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22537
## column count: 323
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22537
## Character matrix gt cols: 323
## skip: 0
## nrows: 22537
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## ALD ALV BEN BER CAM CHA CRO GEL GRA GRC HAI HAN HOC HUN INJ INW JAF KAC KAG KAN
## 10 12 12 12 12 12 12 2 11 10 12 4 7 12 11 4 2 6 12 11
## KAT KLP KUN LAM MAT OKI PAL QNC SON SSK SUF SUU TAI TIR UTS YUN
## 6 4 4 9 12 12 11 11 3 12 6 6 7 4 12 9
Convert format
##
## - number of detected individuals: 314
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.geno"
##
## - number of detected individuals: 314
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.removed file, for more informations.
##
##
## - number of detected individuals: 314
## - number of detected loci: 22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.lfmm"
PCA for MAF 1% r2<0.01 snp set of euro_native2_albania_croatia_greece_US
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
##
## -n (number of individuals) 314
## -L (number of loci) 22537
## -K (number of principal components) 314
## -x (genotype file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.lfmm
## -a (eigenvalue file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.pca/euro_native2_albania_croatia_greece_US.eigenvalues
## -e (eigenvector file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.pca/euro_native2_albania_croatia_greece_US.eigenvectors
## -d (standard deviation file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.pca/euro_native2_albania_croatia_greece_US.sdev
## -p (projection file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.pca/euro_native2_albania_croatia_greece_US.projections
## -c data centered
## * pca class *
##
## project directory: /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/
## pca result directory: euro_native2_albania_croatia_greece_US.pca/
## input file: euro_native2_albania_croatia_greece_US.lfmm
## eigenvalue file: euro_native2_albania_croatia_greece_US.eigenvalues
## eigenvector file: euro_native2_albania_croatia_greece_US.eigenvectors
## standard deviation file: euro_native2_albania_croatia_greece_US.sdev
## projection file: euro_native2_albania_croatia_greece_US.projections
## pcaProject file: euro_native2_albania_croatia_greece_US.pcaProject
## number of individuals: 314
## number of loci: 22537
## number of principal components: 314
## centered: TRUE
## scaled: FALSE
Test
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
##
## -n (number of eigenvalues) 314
## -i (input file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.pca/euro_native2_albania_croatia_greece_US.eigenvalues
## -o (output file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.pca/euro_native2_albania_croatia_greece_US.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)
Get values
# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
sampling_loc <- read.csv(
here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_albania_croatia_greece_US.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_albania_croatia_greece_US.rds"))
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_albania_croatia_greece_US.rds"))
head(sampling_loc)
## Pop_City Location Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.92910 Americas BER 2018
## 2 Palm Beach USA 26.70560 -80.03640 Americas PAL 2018
## 3 Dubrovnik Croatia 42.60654 18.22661 Europe CRO 2017
## 4 Vlore Albania 40.46600 19.48970 Europe ALV 2020
## 5 Durres Albania 41.29704 19.50373 Europe ALD 2018
## 6 Tirana Albania 41.31473 19.83172 Europe TIR 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 3 NA 77
## 3 Southern Europe East Europe 31 23 23
## 4 Southern Europe East Europe 32 24 24
## 5 Southern Europe East Europe 33 25 25
## 6 Southern Europe East Europe 34 26 26
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 36 Levels: ALD ALV BEN BER CAM CHA CRO GEL GRA GRC HAI HAN HOC HUN INJ ... YUN
## [1] 36
Check the regions
## [1] "North America" "Southern Europe" "East Asia" "South Asia"
## [5] "Southeast Asia"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 ALD 33.6545 1.847770 -30.1859 11.95910 -4.4866900 3.505150 0.860564
## 2 ALD 34.8737 2.012080 -33.8088 15.53010 -1.5636900 4.878090 1.074120
## 3 ALD 34.2735 1.827370 -33.3481 13.08000 -4.3894700 3.894920 3.086600
## 4 ALD 35.3477 1.389580 -33.3868 13.39940 -1.2816600 0.604832 0.331101
## 5 ALD 31.9907 0.623082 -27.3660 9.80694 -0.0119409 1.703960 -0.403977
## 6 ALD 32.9900 0.373753 -35.7319 11.88800 -0.3668650 2.408060 0.810319
## PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15
## 1 -1.294480 -15.72440 10.57200 -7.57900 0.306803 1.626140 -7.51101 31.01680
## 2 -1.152290 -19.12860 11.94860 -7.17698 -2.854640 3.366370 -8.22972 19.83230
## 3 -1.591870 -15.82210 11.37940 -6.61994 -1.406800 1.335060 -6.13617 12.04520
## 4 -0.371745 -2.61902 -4.47102 -2.13649 -0.800955 -0.755948 1.60279 -4.44627
## 5 0.339965 -11.02060 5.71820 2.04755 -1.004190 2.191210 -5.82602 17.14610
## 6 -2.613870 -13.67560 6.68720 -4.27629 -2.585940 -4.477190 -11.55440 22.37860
## PC16 PC17 PC18 PC19 PC20 PC21 PC22 PC23
## 1 -2.48287 -4.706000 -6.46377 -7.771850 -13.96380 -9.27508 4.08026 4.49743
## 2 -3.93641 -2.762380 -2.79927 -2.907130 -14.86400 -8.93777 10.41130 12.39160
## 3 -4.35073 0.873146 -5.22129 -1.610730 -12.05890 -7.13241 7.42712 11.57910
## 4 1.08533 1.155160 3.67332 -12.226500 -10.59080 -6.95033 4.71994 4.02882
## 5 -5.18930 0.374034 -3.66742 0.169997 -5.76635 -9.85443 6.20592 2.31409
## 6 -5.32986 1.919290 3.33214 42.968000 32.35080 11.49690 -7.52173 -16.05490
## PC24 PC25 PC26 PC27 PC28 PC29 PC30 PC31
## 1 -7.941060 -8.91267 -2.05442 2.88808 1.55396 -1.53999 -1.64546 5.004950
## 2 -7.863100 -7.74425 2.14457 9.39956 -7.13180 -8.22522 -5.38359 6.276240
## 3 -11.627100 -11.28060 -2.42858 9.48418 -7.62069 -14.40570 -11.99440 11.077000
## 4 -0.419253 -6.92549 5.46390 6.14941 -0.75930 3.93828 -3.14222 0.856983
## 5 -17.710400 -1.36983 -4.04200 -4.07820 9.49657 1.08740 1.60221 -1.521270
## 6 3.340100 14.53330 -4.38522 9.39643 14.73800 6.23730 -7.50660 0.646253
## PC32 PC33 PC34 PC35 PC36 PC37 PC38
## 1 2.73399 2.12740 0.992012 -2.343770 -8.68481 0.830522 0.514518
## 2 4.07887 7.24171 4.992420 -1.836340 -4.50153 -0.606885 -0.540377
## 3 -0.32891 13.11460 2.855670 -7.541280 -2.87208 2.869660 -3.359010
## 4 10.13540 2.48688 4.170620 -0.426741 -11.20110 -4.764150 -11.483700
## 5 3.45475 4.04732 -6.953400 -7.382410 14.54970 3.170570 2.209260
## 6 -1.06384 -9.06869 14.345700 2.146430 -24.29830 1.265940 -0.156519
## PC39 PC40 PC41 PC42 PC43 PC44 PC45 PC46
## 1 4.878880 10.95370 0.911399 3.0333100 -1.27587 1.266400 6.08136 -2.20252
## 2 -0.401524 4.16874 -0.413553 3.2211400 4.31395 2.161870 1.76064 -2.25617
## 3 3.518950 4.13668 -5.858940 -0.0632954 -4.99706 5.550840 -3.20248 -1.21974
## 4 -5.583580 1.15464 3.412100 -5.8628200 -2.12944 9.625950 -2.92786 -6.79578
## 5 4.876490 4.13275 -6.373340 5.3835500 -5.84265 0.134066 13.88050 2.10961
## 6 -12.620200 3.76460 -5.818740 6.6802200 6.92779 0.182684 -1.45777 -5.49437
## PC47 PC48 PC49 PC50 PC51 PC52 PC53
## 1 0.715354 -4.884480 9.46724 -5.49459 1.245290 4.96518 5.8071400
## 2 2.666250 -1.928840 -6.92761 5.64964 -0.435540 -6.20389 0.5962470
## 3 -2.249120 -2.250060 -13.90950 3.60353 4.554800 -6.27148 0.0720639
## 4 20.923900 -2.710540 5.72659 -7.00088 -0.291436 -3.92261 5.1775100
## 5 0.893004 0.369954 8.99940 -17.95580 -7.204090 2.28608 16.4855000
## 6 -6.365360 -0.960018 10.47180 5.38179 -3.231840 2.33383 5.5777200
## PC54 PC55 PC56 PC57 PC58 PC59 PC60 PC61
## 1 6.13875 -2.78973 6.917520 3.64115 4.61293 1.538960 4.0828200 -2.77608
## 2 -5.50915 1.25105 5.983920 4.43292 12.86350 -3.601760 1.0253100 -8.86598
## 3 -19.74750 3.54473 19.259600 6.42519 14.73150 -1.314380 -3.5901200 -8.25783
## 4 6.29928 12.13010 -0.814202 -5.85231 1.87534 22.005700 3.3382100 14.37180
## 5 8.48926 -5.37201 -7.356940 -3.75368 9.53201 -0.890918 -4.8544300 -7.65264
## 6 0.94694 -2.19742 -3.469640 -2.18860 -3.81891 2.898470 0.0619372 -7.63990
## PC62 PC63 PC64 PC65 PC66 PC67 PC68
## 1 -7.49693 5.356770 1.414720 6.83071 1.72852 -2.965640 -2.183760
## 2 -2.24644 -0.672646 4.764640 1.77717 -4.59098 -4.130500 -0.918205
## 3 5.13453 -12.671400 5.495980 1.25184 7.66209 -9.235030 1.829610
## 4 -9.58395 8.561870 -5.654850 -15.71400 -11.09880 -3.875670 1.085360
## 5 -1.19577 2.130770 -9.225300 -2.20116 -2.03796 -0.723815 11.654900
## 6 -5.13762 5.392660 -0.120972 -0.18269 -6.98170 1.719570 2.028280
## PC69 PC70 PC71 PC72 PC73 PC74 PC75
## 1 1.192830 3.766200 0.622794 5.065640 -4.80467 2.94089 -3.245190
## 2 1.086950 2.528770 -5.799790 -3.321690 -3.55036 -7.34971 -1.050960
## 3 0.767803 0.559735 2.385130 -2.868020 4.38976 10.59250 5.077560
## 4 -7.096480 -4.754940 -3.892830 -10.667200 -3.24553 -2.24196 -10.083300
## 5 9.692490 4.236640 -4.310380 -1.038800 5.48001 -2.55295 5.165410
## 6 3.991450 0.350207 2.870570 -0.573346 2.13657 1.53847 -0.196955
## PC76 PC77 PC78 PC79 PC80 PC81 PC82
## 1 -0.0129554 0.0505564 12.68150 1.601140 -7.33282 2.788560 7.703260
## 2 -5.2097800 7.5951400 10.35960 -0.291698 2.16434 0.296412 -0.934301
## 3 -0.4583840 0.0416165 2.64185 -1.717290 3.63620 0.214529 -7.629770
## 4 -6.5937600 7.4637200 -6.78586 6.868990 7.03817 -2.634720 5.319950
## 5 2.9004200 6.7934100 -8.50726 -7.055580 11.34130 -3.577970 1.304170
## 6 -2.2571200 -3.4150700 -2.61168 2.072780 -2.94126 0.272714 -2.538370
## PC83 PC84 PC85 PC86 PC87 PC88 PC89
## 1 -9.866020 -4.31495 0.6777720 -5.51019 -6.924420 1.31937000 -6.27135
## 2 0.436573 -4.93466 2.8169900 -1.25202 -2.881640 -3.90148000 -5.87395
## 3 1.010660 -3.70488 -2.0284700 -3.73672 -2.211940 0.00993857 -3.52806
## 4 4.236790 -12.09670 0.0380884 -1.77589 4.614420 3.89719000 3.17119
## 5 7.152270 -8.92949 -1.0369200 8.31742 -5.384580 10.74410000 1.63387
## 6 -2.946150 1.43230 0.8272230 -1.73772 0.658895 5.02566000 -2.06215
## PC90 PC91 PC92 PC93 PC94 PC95 PC96
## 1 1.145260 -10.156500 -3.13334 -1.783820 -3.588170 -4.35788 7.1361700
## 2 -1.608050 2.109830 -2.38463 -1.962430 2.246590 -4.25180 -4.6616200
## 3 3.092150 -2.204930 -3.23484 -0.249128 7.762050 4.78274 2.0568400
## 4 0.407724 -0.451571 6.58912 -4.365150 -8.650070 -1.14843 -8.2902300
## 5 6.507940 4.727920 5.91863 4.992710 -0.342017 -1.48456 -18.1406000
## 6 -1.103690 5.434020 2.28493 -0.614166 -1.243970 -4.61006 0.0451897
## PC97 PC98 PC99 PC100 PC101 PC102 PC103 PC104
## 1 8.78241 -1.912010 -3.323490 4.93145 -10.537200 3.716780 -3.45889 -4.66733
## 2 2.91688 -1.175150 5.053420 3.14203 -6.980030 -3.292910 2.03871 1.98330
## 3 3.87105 0.136814 -3.648920 -1.23847 7.505590 2.325360 3.80764 5.79557
## 4 6.20959 -1.575340 10.112200 -1.60873 -4.491970 0.520230 -4.67215 -1.39170
## 5 6.59724 -2.519420 -2.018210 -2.76594 -1.344410 -5.595290 3.15003 4.51741
## 6 5.23598 -3.400290 0.370388 1.21478 -0.811618 0.276597 -1.02941 1.94314
## PC105 PC106 PC107 PC108 PC109 PC110 PC111
## 1 3.6164400 -3.621440 3.778800 -1.881240 0.4265100 -3.20713 0.943682
## 2 5.5188800 -2.850750 0.756583 3.277790 0.2377210 3.06538 -0.598388
## 3 2.8100700 9.656480 3.418960 0.493882 -1.0775100 3.91338 -4.807900
## 4 6.7866000 -0.581409 4.729670 -4.434640 -0.0155062 -1.20471 11.342700
## 5 -17.8347000 -14.263700 4.946380 4.077720 0.6006550 -8.46081 2.171890
## 6 -0.0202892 2.111370 -4.683750 -0.244502 -0.4694190 3.42689 2.672000
## PC112 PC113 PC114 PC115 PC116 PC117 PC118
## 1 4.650600 0.568530 5.8164100 -8.45628 -1.037270 -0.779554 1.309430
## 2 1.985000 0.853602 2.5881100 -2.08731 -2.665640 0.679625 -4.251210
## 3 -0.506207 -3.699900 -4.1380100 -2.26648 6.692060 -1.139400 2.083100
## 4 -7.456190 0.423242 4.1909500 1.79892 -3.504240 0.202567 2.130380
## 5 -1.378830 -7.304760 -0.0255998 4.65554 7.095080 4.593360 -3.398440
## 6 0.633097 2.289390 -4.2970600 -2.22632 0.232146 3.973770 0.172575
## PC119 PC120 PC121 PC122 PC123 PC124 PC125
## 1 -6.83216 -0.238747 -2.3355200 -6.26133 -9.03430 1.0777400 1.102470
## 2 -1.40493 -5.859750 -0.6081220 -6.28296 -3.55222 -0.3966300 1.668450
## 3 1.48528 -0.877686 0.6227140 6.41087 2.76417 0.8582210 1.009050
## 4 15.06220 -2.202070 -2.6446100 -2.16875 1.00702 11.1506000 -5.773470
## 5 3.61705 6.696050 0.0467786 1.19695 5.11210 11.1839000 -11.471500
## 6 -2.73901 0.487846 1.1426000 2.56311 2.86882 -0.0639249 0.696654
## PC126 PC127 PC128 PC129 PC130 PC131 PC132
## 1 -3.264220 2.49648 2.71035 -8.39245 5.29770 -0.604751 0.0319948
## 2 -1.615450 -1.42929 -2.63967 4.49418 1.57293 1.928240 -2.9189900
## 3 6.307040 3.51173 3.87306 1.50706 -1.24510 1.392200 3.1599800
## 4 -1.118350 -2.48191 5.86796 3.93594 1.18478 -2.317890 -6.7275100
## 5 -0.769771 -6.14800 -6.02020 1.90101 -1.86867 -9.847430 1.9986900
## 6 -1.714860 1.23419 -4.09984 1.65576 -2.69937 2.469880 2.5896300
## PC133 PC134 PC135 PC136 PC137 PC138 PC139
## 1 -0.00476579 5.396020 2.611530 -0.328788 0.175361 -4.91154 -2.362130
## 2 -4.04194000 -1.379200 0.117704 4.871980 -2.606570 1.76225 -3.363510
## 3 0.41838800 0.375410 4.759850 2.854450 -4.663850 2.03150 5.358980
## 4 -1.30709000 4.628990 -1.042330 -5.263840 -1.519600 3.19468 0.663090
## 5 11.21110000 1.863470 -1.545080 0.652939 1.649230 -3.61138 -0.299006
## 6 -0.21671300 0.743186 2.001340 1.809520 -0.341460 -5.32700 0.773553
## PC140 PC141 PC142 PC143 PC144 PC145 PC146
## 1 -6.396970 -1.67544 2.895390 2.192780 4.327200 -5.081270 -3.47263
## 2 3.239580 -5.49091 5.059210 -8.234230 -0.621355 0.906740 -1.06434
## 3 4.073650 3.54339 -1.613140 7.888170 5.158350 -0.368051 -1.46049
## 4 1.624300 -0.96298 5.114330 0.541375 -9.324060 4.952700 -4.09967
## 5 1.563700 1.84025 2.954910 -5.051490 -8.961600 -10.829700 4.28655
## 6 0.315918 2.70990 0.907912 -1.555190 -0.523389 0.646551 1.10398
## PC147 PC148 PC149 PC150 PC151 PC152 PC153
## 1 -7.4019400 -0.085435 2.1186700 2.42665 0.850691 6.17052 4.9082400
## 2 -6.1629700 1.479820 -6.0286100 5.07042 -1.528910 1.71706 1.7883000
## 3 -0.6345720 -0.779144 0.0375354 -4.07633 1.652520 -1.62459 -1.2402500
## 4 -1.2463100 2.824750 5.6019900 2.88964 0.326503 3.09846 5.8434700
## 5 1.9688900 5.211240 -0.1552900 -3.11523 -3.405690 -7.51049 -9.3838200
## 6 0.0573525 3.712280 -1.5083900 1.30896 0.358595 -3.18367 -0.0334206
## PC154 PC155 PC156 PC157 PC158 PC159 PC160 PC161
## 1 -1.69769 12.751200 -1.343380 7.72398 -5.795900 -10.400700 -3.71151 -4.729100
## 2 -3.39364 6.417370 -5.319130 1.00849 2.560790 7.208550 -7.04635 4.689130
## 3 -1.40850 0.903666 -7.892730 3.00922 -3.642500 4.311480 7.35473 -0.321652
## 4 -2.54236 -1.151910 2.195310 3.45469 0.929473 -2.629330 7.35077 -4.724400
## 5 1.26893 -1.233260 1.084270 -7.30525 6.647590 -0.758297 -8.51190 -0.609865
## 6 -4.09577 -0.159922 0.233928 1.69582 4.161510 5.042550 2.35631 -1.284000
## PC162 PC163 PC164 PC165 PC166 PC167 PC168
## 1 -5.447390 2.201770 -0.0248024 -9.807610 -8.331380 2.93591 1.68750
## 2 2.198770 -3.485170 3.1277400 -10.353700 2.718670 2.76854 -3.49151
## 3 1.414800 1.948990 -1.3431700 1.076840 6.111980 -5.86223 4.33017
## 4 -1.014150 -4.131350 5.3142300 -2.862470 -6.931190 6.11905 1.52432
## 5 -4.819830 0.327660 -8.3835400 9.995780 -0.727753 2.26051 -4.25010
## 6 0.864983 -0.301886 0.9082320 -0.218509 4.348150 -1.98231 1.49800
## PC169 PC170 PC171 PC172 PC173 PC174 PC175 PC176
## 1 -3.30902 6.371800 7.503040 -4.195200 11.637700 6.88266 0.837633 8.369330
## 2 4.53768 3.450040 -2.112630 -2.179480 0.109162 -1.93880 -5.196740 -4.097020
## 3 2.69640 -2.306310 6.697740 4.062030 -1.173660 4.34290 -2.586690 -4.394000
## 4 -2.10771 -3.178930 -3.468040 -2.929220 -1.594560 5.58657 -0.110492 0.460886
## 5 -7.87725 -0.359289 -8.879580 -0.999904 10.144000 -6.52697 -0.201275 1.116080
## 6 -0.17832 -2.886290 0.131796 -2.520810 -3.283840 0.51122 -0.870987 0.438179
## PC177 PC178 PC179 PC180 PC181 PC182 PC183 PC184
## 1 -10.78100 5.979880 0.306682 -2.936590 4.932730 1.75031 -0.583489 1.14002
## 2 4.27266 0.121533 -7.721210 2.301580 -0.538046 2.58522 2.494530 1.62119
## 3 3.18118 -1.431240 1.373830 5.654720 2.734180 -4.02361 -0.945256 -1.33957
## 4 -1.63536 9.507150 -2.250480 0.297913 -4.907280 1.42801 1.772740 -6.59006
## 5 -5.69393 5.592520 2.322930 -5.613610 7.529220 -4.21566 3.385570 4.78116
## 6 2.47028 4.040490 0.961758 -1.334240 6.868490 2.88158 -1.996070 -3.50053
## PC185 PC186 PC187 PC188 PC189 PC190 PC191
## 1 3.133300 3.199240 -4.3345700 0.682097 5.680720 1.5806700 -0.962661
## 2 2.885220 -0.103630 -4.1233500 4.916050 1.006000 10.3081000 3.801520
## 3 -6.638540 3.729430 1.1629700 -0.875242 -8.670040 -0.0632216 -0.411967
## 4 0.178439 0.341242 -0.7702740 5.564390 -5.282250 -3.1110000 0.207438
## 5 -1.714870 -4.521330 0.8891020 -7.905830 -0.191294 -7.9036900 0.200192
## 6 0.901250 -1.863460 -0.0184047 1.450870 -0.657083 2.2024500 0.887188
## PC192 PC193 PC194 PC195 PC196 PC197 PC198 PC199
## 1 7.061020 -8.33520 -0.302678 0.647908 -3.35035 -8.25312 8.361610 -6.014980
## 2 -5.814570 4.67226 2.321640 -6.180880 -1.99485 -6.17516 1.487010 -9.180200
## 3 1.495610 3.27276 -8.704910 5.037340 9.93779 -2.37767 2.042930 2.851730
## 4 0.794471 -8.16763 0.454237 6.753870 -1.08893 5.28204 5.740390 -1.778770
## 5 2.372750 -1.10369 13.818600 5.244370 2.67536 2.80470 -5.927800 7.338330
## 6 -2.665490 6.94193 -0.591069 -5.780620 -1.84512 -2.18502 0.484737 0.690277
## PC200 PC201 PC202 PC203 PC204 PC205 PC206 PC207
## 1 -6.931610 -4.586820 -4.564240 2.126820 3.10813 3.53048 8.388060 7.67917
## 2 -1.576450 1.869260 -0.937083 1.492150 -5.71744 6.39860 4.218390 -1.81072
## 3 1.233270 -6.941700 -3.707590 -6.891040 -2.04340 -7.37748 -2.193030 -3.10757
## 4 1.209880 -2.477590 -1.762960 0.835563 -0.21097 1.39658 0.392192 2.91430
## 5 4.525660 0.686656 0.375180 4.172130 -1.76877 -3.61801 6.061070 -5.40620
## 6 0.097467 -3.098260 5.006010 -0.805526 -2.75955 -3.50576 0.277770 -1.02450
## PC208 PC209 PC210 PC211 PC212 PC213 PC214 PC215
## 1 -7.96133 1.617330 -2.793220 0.711674 4.924160 9.7029700 -5.84028 -2.61507
## 2 9.81179 7.525790 5.887680 4.568250 3.662950 6.1630100 5.38136 0.53293
## 3 -6.73944 0.461928 0.121859 -2.041100 -2.615150 -0.0967062 2.54300 -2.28002
## 4 -4.03631 2.789960 -2.504020 8.738590 5.403160 -3.6337300 7.38863 8.21427
## 5 8.72548 -6.731350 -8.032340 -1.955300 -0.959485 -6.8370900 -8.42257 -7.74669
## 6 1.84056 1.573760 3.382260 -0.328414 -1.186070 0.8472400 2.57702 1.19381
## PC216 PC217 PC218 PC219 PC220 PC221 PC222
## 1 -4.05649 -1.400540 -5.2599400 11.66840 -11.990400 -2.59632 -4.280710
## 2 7.24621 -0.947783 2.4520900 -3.50296 -0.797493 -14.54210 -8.040200
## 3 -5.33901 3.356270 -0.0312963 -11.05110 2.600810 4.51269 0.935934
## 4 6.30084 -2.807530 7.0508700 5.31521 -6.573740 2.61616 1.336970
## 5 13.14020 -1.417840 -13.5184000 -1.07235 -4.257080 4.63861 -0.014990
## 6 1.13016 -5.947810 2.5275500 -0.87560 2.747300 5.81640 2.835400
## PC223 PC224 PC225 PC226 PC227 PC228 PC229
## 1 1.94251 5.891960 -0.449854 3.92675 2.794570 11.87960 6.13497000
## 2 -1.11754 3.872850 8.925680 -8.55261 9.333840 -1.28196 10.33650000
## 3 11.25670 1.421700 -11.232400 3.01496 6.800130 4.60103 -1.33927000
## 4 -2.83960 4.244050 6.280090 5.59610 1.874810 -1.72194 -5.03377000
## 5 -12.46080 -5.103430 3.664240 -7.43294 -6.226510 1.61341 -1.64098000
## 6 -1.21133 0.921029 -1.758570 -1.27298 -0.214518 -2.59506 -0.00348635
## PC230 PC231 PC232 PC233 PC234 PC235 PC236
## 1 -2.80267 -7.83308 8.85412 9.1812000 -1.254760 6.2623100 16.05450
## 2 19.19680 -11.41270 -1.09810 -5.5716400 -1.405510 3.7062700 -7.10752
## 3 -8.82161 4.70603 7.07786 -0.0711724 1.705160 -4.2771400 -3.51692
## 4 9.41425 9.06451 7.58435 -5.4198800 -1.647100 5.5584300 3.19031
## 5 -10.67500 -3.73103 4.58325 11.7961000 -6.267800 -0.0855602 -6.33932
## 6 -2.50674 2.88390 5.19936 0.1536130 -0.485274 0.0751213 -2.45290
## PC237 PC238 PC239 PC240 PC241 PC242 PC243
## 1 -1.8641200 -0.431303 -7.267520 -9.430690 -9.013110 10.631200 -1.764860
## 2 1.6818200 -3.470010 0.157354 -1.640930 -3.601660 -0.992202 -1.892720
## 3 -0.0640565 -5.376040 -7.815560 -7.395830 -2.348790 1.930630 0.963152
## 4 7.0473000 -5.052070 -1.325840 -1.970020 -0.931347 -5.650630 6.926360
## 5 11.8571000 0.748320 -6.559410 1.255750 2.173390 1.652140 6.927640
## 6 -2.9325900 -2.290630 -7.875850 -0.830932 -4.696940 3.204330 0.256871
## PC244 PC245 PC246 PC247 PC248 PC249 PC250
## 1 -11.902600 1.91390 -1.37661 15.236400 4.64691 -0.287894 2.80942
## 2 -0.269483 8.59781 -7.95790 -24.016300 -2.45026 -6.486090 -10.06590
## 3 6.695980 -5.20846 -4.87823 -3.626420 -3.16881 6.738720 5.11283
## 4 12.496000 -4.74525 8.57286 -3.619320 -8.54000 7.208910 -7.90051
## 5 -0.745293 2.93907 1.85637 -1.097090 1.36290 -1.547450 -2.38768
## 6 3.414920 -4.00801 -3.18730 -0.303565 -3.15522 3.801380 -2.45670
## PC251 PC252 PC253 PC254 PC255 PC256 PC257
## 1 18.34910 6.021330 -1.75432 14.983700 4.642250 8.69091 -0.669913
## 2 -16.37770 -12.183600 12.46670 -5.029640 -12.460200 -20.54960 1.190570
## 3 -2.60577 6.802720 0.21410 6.770780 -4.476870 3.22644 -1.509920
## 4 -9.78929 15.557500 -15.83700 -12.138300 -3.103950 7.62455 -3.339510
## 5 -2.49837 0.984903 3.65400 3.357680 0.206083 -4.79476 3.970740
## 6 -3.27229 -0.948397 4.13363 0.582239 -0.279581 3.39691 0.180836
## PC258 PC259 PC260 PC261 PC262 PC263 PC264
## 1 -3.30789 -2.3865600 3.374870 2.667370 4.798790 0.62135 2.703780
## 2 2.79875 0.1673550 -7.790910 2.164710 -0.518439 -4.98390 -0.884782
## 3 5.23692 -0.0362197 -4.863860 4.267020 -4.548640 5.31758 -1.741960
## 4 -2.43300 -1.6650700 15.606300 2.924460 -11.917900 -0.21808 1.082820
## 5 5.09175 1.2463300 -4.538360 1.354460 -1.329800 -1.58370 -2.255690
## 6 -1.13881 -1.3954600 0.408745 0.716955 -0.532828 -1.33913 0.801797
## PC265 PC266 PC267 PC268 PC269 PC270 PC271
## 1 1.649910 -0.0200100 2.177510 -3.3099700 1.992740 1.927030 1.1373600
## 2 5.105700 1.8963400 -7.360250 0.7860970 1.227000 -4.218570 3.3524400
## 3 -6.865580 6.7687000 12.324800 0.4335670 0.266546 1.411440 -10.1173000
## 4 -3.427430 -5.1275700 1.426160 0.0933183 -2.021640 4.516900 -0.0350495
## 5 2.392090 0.0928426 2.462270 -0.1195320 -0.392513 1.111330 -0.7518710
## 6 -0.170483 1.4088800 -0.299909 -0.7428170 2.715280 -0.955953 0.7243900
## PC272 PC273 PC274 PC275 PC276 PC277 PC278
## 1 -1.275740 -2.48432 5.20039 -2.861720 1.715530 -1.039670 -3.764230
## 2 -0.739302 1.69314 7.49680 -3.132360 1.161560 0.920478 -3.508810
## 3 10.122500 11.51660 -22.89080 4.621690 -2.174330 -8.311050 13.397500
## 4 0.863329 2.40725 -1.75043 2.406890 -0.203735 -1.697610 3.169310
## 5 3.764930 6.72967 -4.72925 0.524955 -1.068460 -1.648630 1.862390
## 6 -0.965874 -1.33428 1.68967 0.438172 -0.258886 -1.323900 -0.575133
## PC279 PC280 PC281 PC282 PC283 PC284 PC285
## 1 3.872290 2.653390 0.9812440 1.369250 0.9536940 -1.4131500 1.296170
## 2 0.762351 -1.768900 0.0420279 0.807682 -0.3029830 -0.7922200 0.970225
## 3 -7.084490 -0.756776 1.0913600 -1.590870 -0.9827410 4.7706700 -9.088740
## 4 -3.343070 -1.896350 0.4949320 -1.056020 0.3940330 1.2254800 -0.808598
## 5 -0.774658 1.337520 -3.7078600 1.433060 -0.0421016 0.0227108 -1.267880
## 6 -1.375490 -0.560864 2.6851600 -2.156240 0.7733240 0.0510981 0.533246
## PC286 PC287 PC288 PC289 PC290 PC291 PC292
## 1 1.408590 -0.692493 -1.692940 -0.508341 1.3155600 1.09893 -0.0469909
## 2 1.358240 -1.295590 -0.961377 -0.888247 -2.6110500 -1.79874 -0.3693740
## 3 -0.883345 0.656211 1.236220 -1.339480 0.9317510 2.41135 1.4819400
## 4 0.618072 2.211330 0.284783 0.238092 0.0885846 -1.28587 1.2646700
## 5 0.156034 -0.429487 0.074908 1.610000 0.3284930 1.19665 -1.4438900
## 6 0.568228 0.591674 -1.437410 0.723332 -0.5755580 0.31937 -1.0081300
## PC293 PC294 PC295 PC296 PC297 PC298 PC299
## 1 -0.4883560 0.182678 -0.1851750 -0.2663890 -0.00320062 0.205618 -0.698171
## 2 -0.6974270 1.594630 0.5744470 -0.6161530 0.09362940 -1.101430 1.076890
## 3 2.9971100 -2.384980 0.0578765 -1.0670000 -0.27161200 0.949054 -0.522237
## 4 -0.0437913 -1.023070 -0.6816160 0.5671030 0.20787300 -0.917020 -1.890380
## 5 -0.9559760 1.207940 0.2997290 0.9107150 0.27095500 0.473866 1.609970
## 6 -1.4407400 -0.192525 1.6442300 -0.0874211 1.08679000 0.187795 -0.667890
## PC300 PC301 PC302 PC303 PC304 PC305 PC306
## 1 0.4137050 0.5621150 -0.485694 0.257417 -0.2055540 0.1367620 -0.2688380
## 2 -0.6001580 0.6394160 -0.149942 -0.583650 1.2917800 -0.5095320 0.5222870
## 3 0.5338530 -0.3134630 1.039260 -0.670045 0.2227220 0.4046490 -0.9773220
## 4 0.0682799 -0.0738791 -0.316639 0.165937 0.2978430 0.0403902 -0.0669559
## 5 -0.1535990 -0.6139670 -0.632838 0.220250 -0.0752629 -0.7223190 0.2742670
## 6 0.8562820 -1.9606300 -0.304926 0.285378 0.0572905 0.7771180 -4.8020200
## PC307 PC308 PC309 PC310 PC311 PC312 PC313
## 1 -0.258987 -0.179910 0.336991 -0.382592 0.025427 -0.147654 0.0352362
## 2 -0.765504 -1.110230 -1.812130 -0.340148 -0.294537 -0.184055 -0.0485052
## 3 0.308184 0.203520 -0.519026 -0.142370 -1.055930 0.441421 -0.1585510
## 4 0.382680 0.193674 0.826433 0.736911 0.221377 0.174860 0.1945740
## 5 0.269982 0.224447 -0.584473 -0.540086 0.811890 -0.276770 -0.1835570
## 6 -2.782440 10.372100 31.352700 -0.486031 7.850800 -0.217323 -0.3897350
## PC314 Individual Pop_City Location Latitude Longitude Continent Year
## 1 1.29438e-06 801 Durres Albania 41.29704 19.50373 Europe 2018
## 2 1.29438e-06 802 Durres Albania 41.29704 19.50373 Europe 2018
## 3 1.29438e-06 803 Durres Albania 41.29704 19.50373 Europe 2018
## 4 1.29438e-06 804 Durres Albania 41.29704 19.50373 Europe 2018
## 5 1.29438e-06 805 Durres Albania 41.29704 19.50373 Europe 2018
## 6 1.29438e-06 806 Durres Albania 41.29704 19.50373 Europe 2018
## Region Subregion order order2 orderold
## 1 Southern Europe East Europe 33 25 25
## 2 Southern Europe East Europe 33 25 25
## 3 Southern Europe East Europe 33 25 25
## 4 Southern Europe East Europe 33 25 25
## 5 Southern Europe East Europe 33 25 25
## 6 Southern Europe East Europe 33 25 25
genotype <- here(
"euro_global/output/neuroadmixture/native_far_east_euro2.vcf"
)
d <- read.vcfR(
genotype
)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22537
## column count: 347
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22537
## Character matrix gt cols: 347
## skip: 0
## nrows: 22537
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## ALU ARM BEN CAM CHA GEL GES HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KER KLP
## 12 10 12 12 12 2 12 12 4 7 12 11 4 2 6 12 11 6 12 4
## KRA KUN LAM MAT OKI QNC RAR SEV SOC SON SSK SUF SUU TAI TIK UTS YUN
## 12 4 9 12 12 11 12 12 12 3 12 6 6 7 12 12 9
Convert format
##
## - number of detected individuals: 338
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.geno"
##
## - number of detected individuals: 338
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.removed file, for more informations.
##
##
## - number of detected individuals: 338
## - number of detected loci: 22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.lfmm"
PCA for MAF 1% r2<0.01 snp set of euro_native2_albania_croatia_greece_US
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
##
## -n (number of individuals) 338
## -L (number of loci) 22537
## -K (number of principal components) 338
## -x (genotype file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.lfmm
## -a (eigenvalue file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.pca/native_far_east_euro2.eigenvalues
## -e (eigenvector file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.pca/native_far_east_euro2.eigenvectors
## -d (standard deviation file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.pca/native_far_east_euro2.sdev
## -p (projection file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.pca/native_far_east_euro2.projections
## -c data centered
## * pca class *
##
## project directory: /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/
## pca result directory: native_far_east_euro2.pca/
## input file: native_far_east_euro2.lfmm
## eigenvalue file: native_far_east_euro2.eigenvalues
## eigenvector file: native_far_east_euro2.eigenvectors
## standard deviation file: native_far_east_euro2.sdev
## projection file: native_far_east_euro2.projections
## pcaProject file: native_far_east_euro2.pcaProject
## number of individuals: 338
## number of loci: 22537
## number of principal components: 338
## centered: TRUE
## scaled: FALSE
Test
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
##
## -n (number of eigenvalues) 338
## -i (input file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.pca/native_far_east_euro2.eigenvalues
## -o (output file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.pca/native_far_east_euro2.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)
Get values
# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
sampling_loc <- read.csv(
here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_far_east_euro.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_far_east_euro.rds"))
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_far_east_euro.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Sevastopol, Crimea Ukraine 44.54125 33.51400 Europe SEV 2021
## 2 Alushta Ukraine 44.68289 34.40368 Europe ALU 2021
## 3 Kerch, Crimea Ukraine 45.35246 36.47015 Europe KER 2021
## 4 Krasnodar Russia 44.95504 39.02782 Europe KRA 2017
## 5 Sochi Russia 43.60042 39.74533 Europe SOC 2021
## 6 Tikhoretsk Russia 45.85460 40.12560 Europe TIK 2021
## Region Subregion order order2 orderold
## 1 Eastern Europe East Europe 42 34 34
## 2 Eastern Europe East Europe 43 35 35
## 3 Eastern Europe East Europe 44 36 36
## 4 Eastern Europe East Europe 45 37 37
## 5 Eastern Europe East Europe 46 38 38
## 6 Eastern Europe East Europe 47 39 39
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 37 Levels: ALU ARM BEN CAM CHA GEL GES HAI HAN HOC HUN INJ INW JAF KAC ... YUN
## [1] 37
Check the regions
## [1] "Eastern Europe" "East Asia" "South Asia" "Southeast Asia"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6
## 1 ALU 20.4856 -0.305611 -2.550140 -7.28735 -24.20470 6.38975
## 2 ALU 25.3823 4.337260 -0.388604 -5.43395 -20.36060 8.95141
## 3 ALU 20.0237 2.626390 -0.786929 -10.38780 -29.93820 8.93216
## 4 ALU 25.8712 -3.342340 -1.110070 -2.41729 2.61012 -3.53675
## 5 ALU 30.0675 -11.041700 0.724509 -4.64010 -5.47538 9.77449
## 6 ALU 27.3772 -2.688700 0.959256 -1.42843 -1.32056 -4.38881
## PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14
## 1 -15.04960 6.67978 0.151167 38.2655 -6.15882 -8.821650 -24.86590 -3.87666
## 2 -12.67250 4.17658 -0.154637 31.6130 -3.83496 -1.228910 -8.24218 3.89523
## 3 -19.74160 8.25617 1.521170 44.7494 -1.77796 -3.928850 -15.47780 4.22217
## 4 6.09960 4.60456 0.107691 31.9499 6.25326 0.350372 5.77296 12.58190
## 5 -7.78187 2.21053 0.984531 24.2537 9.42648 -2.278880 1.63767 -27.95230
## 6 2.18008 1.82266 0.788612 27.3708 -2.65027 -0.994772 -4.55721 -17.14900
## PC15 PC16 PC17 PC18 PC19 PC20 PC21 PC22
## 1 -6.17760 14.824700 -9.01089 7.14638 13.722300 8.52422 7.00553 -3.77419
## 2 -4.40492 -0.229220 -9.98679 -5.77431 -4.806080 17.98710 19.70860 -9.18956
## 3 -6.60865 5.091250 -17.67700 2.63932 0.991916 10.78830 11.84630 -11.81950
## 4 -5.58323 -3.184320 -18.61840 1.67256 -4.746580 14.92690 17.16900 -7.24129
## 5 -2.96934 -8.900110 21.58010 -29.60820 -15.574500 1.08231 -5.34274 8.00136
## 6 -1.06223 -0.968945 13.54250 -20.00430 -14.981500 5.18956 1.51858 10.53780
## PC23 PC24 PC25 PC26 PC27 PC28 PC29
## 1 -1.45222 -3.332710 -4.2012100 -9.21728 4.76234 -0.891124 5.2582300
## 2 -11.60540 15.355200 -4.0634100 -1.13517 1.33338 14.433700 -3.7651200
## 3 -9.34223 8.623670 -3.7577200 -6.51587 5.63725 6.830240 0.0136676
## 4 -6.96422 9.800260 -10.1645000 -13.08660 12.53640 -2.196250 -8.5672800
## 5 7.71842 -9.136000 0.0228503 -9.11044 10.12690 -2.903270 1.2629200
## 6 3.21686 0.968363 -5.4992600 -2.00759 8.71201 -3.956690 -10.9915000
## PC30 PC31 PC32 PC33 PC34 PC35 PC36
## 1 -11.54270 0.105329 -5.48342 -6.168920 -2.445510 2.38026 -18.168900
## 2 -5.08792 -3.830590 -3.33753 4.723430 -2.689220 4.57595 6.566870
## 3 -10.27700 1.422650 -7.00011 -6.931120 -5.608190 11.83910 -6.569350
## 4 -4.63713 13.878400 -7.89427 -0.547714 -4.742410 7.84252 -6.930040
## 5 -17.04900 -11.385000 1.81386 2.792030 3.853960 -13.36160 3.102920
## 6 -1.97113 11.571500 3.23825 1.899260 -0.426233 -4.60796 0.468608
## PC37 PC38 PC39 PC40 PC41 PC42 PC43
## 1 0.638639 5.96370 1.638110 -2.702090 26.01240 2.872620 -6.5748000
## 2 2.467160 -0.13457 2.271750 -0.695783 -25.92740 3.957250 9.0044100
## 3 2.511160 -1.24808 0.509379 6.589810 -1.63697 4.799590 -0.0233594
## 4 2.121700 -1.87570 1.823560 2.841060 2.42338 0.418901 -11.3373000
## 5 2.344010 -3.17460 7.938730 0.777851 -1.85059 -5.698850 14.5095000
## 6 5.034230 4.93773 10.499000 -15.493800 3.24924 5.950570 14.8428000
## PC44 PC45 PC46 PC47 PC48 PC49 PC50 PC51
## 1 16.89850 1.788520 5.62376 8.75830 7.335620 3.93678 -7.88436 -0.00527219
## 2 -13.97630 0.418266 -4.15255 -5.67872 -9.702130 5.36903 8.58153 6.94867000
## 3 2.98640 -1.076080 1.37242 -1.21517 -0.602785 2.66775 -3.25599 8.65494000
## 4 5.15674 3.924090 4.79590 4.03105 10.211600 1.34531 -9.18752 2.72248000
## 5 -1.09257 -2.114320 -2.75617 3.44852 0.473827 0.79640 -6.17943 3.01706000
## 6 -1.87532 -11.292000 -3.75054 6.02935 6.197380 3.29759 -3.60343 -0.55713100
## PC52 PC53 PC54 PC55 PC56 PC57 PC58
## 1 2.340900 12.99150 8.848370 -1.60283 -20.37070 -0.376401 11.458700
## 2 -4.383410 -9.85162 -11.181800 3.11030 26.42630 -1.008880 -18.740200
## 3 -6.471730 -0.49460 -0.560385 4.99467 -1.27009 1.606140 0.764857
## 4 -1.318270 -3.21074 0.506548 1.69150 -9.63551 -1.613160 -6.314400
## 5 0.859159 5.97654 2.418050 -2.52355 -5.83637 3.143980 7.944640
## 6 4.578160 1.21431 6.930270 -10.54760 3.27519 8.432160 9.756380
## PC59 PC60 PC61 PC62 PC63 PC64 PC65
## 1 12.856000 -3.76451 3.046620 3.70025 -1.03461 1.963410 -4.801280
## 2 -4.022410 11.09840 -2.703890 -2.78526 6.80457 -1.793960 1.971330
## 3 -6.825280 1.63018 3.468500 -2.68255 -3.66434 7.675350 -1.717530
## 4 2.388530 -8.23049 1.187890 13.60900 -8.75254 -7.880010 8.460010
## 5 7.744390 3.05013 1.542050 -6.87117 9.60592 -1.401980 -7.486110
## 6 -0.982531 -1.11583 0.763478 10.81760 16.40710 -0.705895 -0.986492
## PC66 PC67 PC68 PC69 PC70 PC71 PC72
## 1 1.1013800 6.9687800 0.677785 -14.196900 1.932510 -2.730120 0.852579
## 2 0.0759335 -13.2050000 1.579390 17.570800 0.696840 -5.388440 1.549200
## 3 9.6270600 -0.2945390 -3.152270 -0.185804 -1.788710 2.772100 -1.140440
## 4 -19.0307000 0.0222812 13.416700 -9.458960 3.892460 -5.184720 -5.684060
## 5 -6.0241000 4.3416200 2.450220 0.687401 0.928587 -0.543575 -1.444860
## 6 -13.5157000 2.9042000 19.716400 -6.014460 -3.473160 -3.559860 -1.434370
## PC73 PC74 PC75 PC76 PC77 PC78 PC79 PC80
## 1 -0.0272235 0.442396 -0.734162 1.60670 8.34790 -1.57096 -2.325790 -2.66391
## 2 3.7382100 3.371150 1.717450 2.09539 -12.74000 -1.72072 4.329370 -2.44019
## 3 -3.2626400 -3.607260 -3.840580 -2.75547 6.71369 1.34046 0.637695 2.15170
## 4 0.7048390 -3.747130 1.718730 4.18192 -3.58733 4.14767 -4.507960 -5.19651
## 5 -5.9070400 -4.062740 -4.659250 -1.10407 3.31766 2.18593 -6.823190 -4.00661
## 6 6.0514700 -4.166360 -4.344890 -7.77497 4.97026 -1.38811 -3.049480 1.18698
## PC81 PC82 PC83 PC84 PC85 PC86 PC87 PC88
## 1 -6.443170 1.83956 -4.31690 3.162300 3.88469 2.71503 1.53758 3.58843
## 2 -0.199465 3.38909 -2.99189 3.244740 -1.58022 3.20976 1.88266 6.84259
## 3 -1.699530 -3.79007 4.17300 7.029760 1.97388 -3.85027 4.20135 -6.80635
## 4 -18.767000 5.48438 -9.84728 5.582990 2.23714 4.16893 -10.36050 3.19901
## 5 9.240860 -1.70624 3.38226 2.525090 -3.98731 2.47006 4.15028 -8.34635
## 6 -6.014610 -8.22647 -13.41560 -0.858546 -3.22604 8.85529 -10.47330 -4.72417
## PC89 PC90 PC91 PC92 PC93 PC94 PC95
## 1 2.9237700 -1.174850 -1.771270 2.770570 0.475811 2.639810 0.441089
## 2 -1.8059400 4.373540 -12.713400 0.889759 -1.552400 -0.109804 4.667540
## 3 -10.5783000 -6.958860 10.898300 -1.400270 0.583719 -0.996475 -1.975080
## 4 2.2166400 -0.260060 -6.463720 7.555240 -4.397560 0.371583 -2.671060
## 5 0.2608440 -5.346590 0.409559 1.481070 -2.083320 4.788510 3.257690
## 6 0.0814059 -0.702711 -8.225600 2.683760 -8.159570 2.908500 1.401030
## PC96 PC97 PC98 PC99 PC100 PC101 PC102
## 1 1.871160 -5.345640 1.123030 3.8324100 4.429460 3.851840 -0.684432
## 2 -0.947024 -3.227110 -0.485948 1.7780700 -0.494879 2.156390 3.148300
## 3 0.877382 2.141940 -3.737950 1.3161900 -3.770210 1.886970 -1.763700
## 4 1.246670 -0.479822 0.900249 -7.1356500 1.164820 1.714810 1.542460
## 5 -4.474370 3.739800 -0.491935 0.0339354 1.290120 0.387799 -0.703330
## 6 -1.487730 8.175420 6.974200 -6.9411800 1.599830 -12.971100 5.138090
## PC103 PC104 PC105 PC106 PC107 PC108 PC109 PC110
## 1 2.25127 1.52242 1.906900 0.3036580 2.60092 -0.43695 5.143180 0.670442
## 2 1.61509 2.50924 -0.372587 1.5369000 -5.25799 1.23955 2.456150 4.775800
## 3 -5.46988 -1.75623 1.980140 0.9970790 1.69452 -8.97879 0.148475 -3.022270
## 4 -3.20834 -2.54959 0.729329 0.1442540 1.59751 4.16221 -3.662630 3.966310
## 5 -1.75826 -3.16704 3.781260 -0.2276980 2.71692 -2.47770 3.644330 3.318670
## 6 -0.15957 -1.73146 4.174510 -0.0224256 2.94150 5.84921 1.235730 -1.103880
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## 1 0.233544 1.491500 -2.291850 2.75724 4.884290 -5.246880 -0.804267
## 2 -2.612870 1.070640 3.629040 -1.37871 -4.767060 -0.517355 -5.131480
## 3 1.307540 0.255413 -0.929420 3.97817 1.340020 3.841250 2.142500
## 4 -5.926810 -4.381290 0.525513 -4.22072 8.663280 -0.735895 -11.334500
## 5 6.378110 1.025500 4.854340 3.27895 -3.870160 -5.524730 8.700350
## 6 -1.616950 -5.279780 -2.855310 -11.38320 -0.243223 -3.058180 -7.078780
## PC118 PC119 PC120 PC121 PC122 PC123 PC124 PC125
## 1 -2.10701 3.333620 -2.215670 1.317990 -1.266080 0.190247 -3.89487 -2.80235
## 2 -4.94447 5.394510 -0.399604 0.336776 -0.395207 3.196520 -4.65527 2.98828
## 3 11.90080 -3.439220 3.095730 -0.587585 2.156610 1.273720 -3.38204 -2.57005
## 4 -5.42009 3.225020 -5.246600 3.407710 -5.493770 -5.074700 3.36216 3.98967
## 5 3.73444 -5.657550 1.020350 1.996580 1.535220 1.972160 -2.88932 -8.43176
## 6 -2.36509 0.662008 -8.206090 4.289750 2.361380 -5.375370 3.63467 5.10431
## PC126 PC127 PC128 PC129 PC130 PC131 PC132 PC133
## 1 3.471030 3.016630 -3.925310 -2.14323 -2.687760 -0.433259 3.58807 1.525600
## 2 -4.326420 3.231160 -0.718613 -1.38307 -0.170312 -0.374122 -4.82892 2.103600
## 3 -3.125980 -0.896061 -1.163370 1.41482 0.493408 1.832260 3.01200 0.647162
## 4 0.802205 -4.291240 -5.576690 0.97485 1.256250 -1.065740 7.41296 0.175257
## 5 -4.921190 0.742270 -5.490360 -1.96823 0.305370 3.665320 -5.13845 -2.557740
## 6 -6.691420 3.812800 -0.800976 5.76665 -2.657830 2.180710 1.69836 -8.661120
## PC134 PC135 PC136 PC137 PC138 PC139 PC140 PC141
## 1 4.204800 3.50766 -3.622220 -3.061660 4.27550 -1.23560 -5.24803 -0.523196
## 2 2.249160 7.33844 -0.255614 -5.768620 5.60840 -1.66117 -2.77480 0.705456
## 3 -0.632887 -2.77590 -2.518710 0.322666 -5.57880 5.30426 3.46215 -3.504200
## 4 -8.758330 1.69746 4.803670 0.639076 6.57224 -2.78041 -9.07109 1.242930
## 5 -3.477590 -3.41453 -6.135030 -3.890220 1.14685 3.50326 3.22089 -4.223740
## 6 0.188458 6.80750 4.682540 -2.105050 -4.82354 1.69971 3.74137 -2.142900
## PC142 PC143 PC144 PC145 PC146 PC147 PC148
## 1 -0.173200 -0.775339 4.46472 -1.424750 -2.2181000 2.490170 1.419960
## 2 5.600500 -6.012330 5.26934 -3.267700 0.0852875 1.049250 -1.768620
## 3 0.320618 1.420100 -7.04487 0.232887 3.3146700 -3.616300 1.685780
## 4 -1.607800 0.241212 -4.46744 2.599630 0.9280960 1.541200 -0.483154
## 5 0.563423 4.325520 1.73424 -4.996600 -7.8645100 -0.139164 2.314240
## 6 6.806240 4.354110 2.02088 2.259160 2.8227200 -7.372750 0.467339
## PC149 PC150 PC151 PC152 PC153 PC154 PC155
## 1 -0.751374 -0.314859 4.9189600 2.832320 2.1644200 -0.48708 5.633100
## 2 -3.457890 1.122780 4.4779900 3.029150 0.5865970 2.21855 5.408460
## 3 1.304000 -0.849583 -5.3868500 -3.694100 -1.4307600 0.99073 -2.478430
## 4 -0.648384 1.500540 -0.0454821 -0.899994 -0.9112840 3.99710 2.946110
## 5 4.085970 -1.058240 2.1960000 1.563200 -0.8587100 -2.03031 -0.114413
## 6 4.018140 -3.663050 -6.9023600 1.565750 -0.0364479 -1.12643 -3.045680
## PC156 PC157 PC158 PC159 PC160 PC161 PC162
## 1 -3.40933 4.245140 -1.608310 -0.170535 2.250260000 3.06643 -0.852598
## 2 2.57965 1.008830 -1.634390 3.641350 -0.684657000 1.74680 -1.512600
## 3 -4.95370 4.642520 3.691120 -1.763100 -4.002720000 4.10108 1.779000
## 4 2.82666 -4.000820 9.446800 0.366259 -0.306979000 -1.30854 -1.802620
## 5 -5.92844 3.509700 0.381809 2.169450 0.179172000 -4.47060 2.325670
## 6 3.92859 0.675016 -2.718180 1.907780 0.000285673 -8.03710 0.267860
## PC163 PC164 PC165 PC166 PC167 PC168 PC169 PC170
## 1 0.582339 -2.580390 2.534640 -1.589210 -1.59853 -1.455500 -4.33631 -3.95844
## 2 -0.592519 -3.621410 2.543370 -1.163350 1.89278 -1.634220 -5.58807 1.23806
## 3 1.663760 1.286180 -2.124800 -0.953216 -2.25028 3.720880 -4.73797 -0.96331
## 4 0.554564 -0.670156 0.685410 4.931360 -8.06053 -0.302732 7.43252 -3.31588
## 5 1.366430 -0.520266 -0.703254 2.255250 -5.59305 -5.713120 2.88964 -2.07597
## 6 -3.714100 9.020530 5.422160 4.142060 5.22376 -0.119157 4.82586 3.03351
## PC171 PC172 PC173 PC174 PC175 PC176 PC177
## 1 0.3138550 -2.836640 0.581467 -3.716190 -1.784550 1.922010 0.736131
## 2 2.8229900 1.280020 -2.240140 -1.526570 -1.140210 0.987885 0.272200
## 3 -0.0935415 -0.850212 2.103960 7.079490 -2.267310 -1.072840 -4.739330
## 4 2.0464400 -3.026580 2.425940 -0.111941 0.515358 -0.360149 4.501240
## 5 2.5353400 -1.658530 -0.194528 -4.949810 4.951410 -3.588460 0.845839
## 6 5.3982000 7.035450 -0.577793 1.208870 1.451360 4.278480 1.147550
## PC178 PC179 PC180 PC181 PC182 PC183 PC184
## 1 -0.245713 -7.739400 -2.80283 5.160200 -5.933070 4.642320 -2.3001200
## 2 0.489924 -1.649790 -3.27892 3.705430 0.475018 -0.281622 3.6696600
## 3 -2.571620 -3.262760 2.11674 0.566958 5.369940 0.913404 -10.4730000
## 4 -2.499130 5.848700 -1.91250 0.993972 -3.598870 3.904920 -1.2258800
## 5 -1.163900 -0.547669 -2.98600 4.095320 2.077450 2.955180 -0.0954974
## 6 -2.227390 9.930780 -3.43060 5.058650 3.444780 11.927500 -0.9931580
## PC185 PC186 PC187 PC188 PC189 PC190 PC191
## 1 -1.8608100 -3.57918 -3.912450 -2.7491800 -2.5978300 -1.807430 -1.8084200
## 2 -1.8806100 -7.12260 0.209279 0.0102187 -0.0604702 4.360960 5.1693100
## 3 -7.1409200 1.70281 -1.111460 -7.1552800 -8.6599500 -0.900483 -1.0899900
## 4 6.2324500 4.95887 -5.534910 -2.1408900 6.2462800 -2.671090 -6.8505500
## 5 0.0824617 2.49476 -1.652400 -3.6944000 -1.7281500 -1.053770 0.5330990
## 6 -4.4923500 5.50138 10.743400 -1.3303000 1.5984800 0.940630 0.0717802
## PC192 PC193 PC194 PC195 PC196 PC197 PC198
## 1 4.392910 -0.704383 0.127557 2.069700 0.267326 2.343840 -6.774000
## 2 4.154560 2.998550 0.741238 2.082670 2.141780 -0.735199 -2.112400
## 3 -3.583830 -0.872622 1.375060 -0.841222 -7.582140 -3.181530 -3.944950
## 4 -2.731750 4.570590 -6.040680 3.671770 -6.331720 -2.819110 9.198310
## 5 0.814798 4.779060 -4.684740 1.950760 2.227170 -0.463965 -0.632437
## 6 -6.477250 1.733180 9.695080 1.744510 -1.948060 -4.347590 -4.955840
## PC199 PC200 PC201 PC202 PC203 PC204 PC205
## 1 2.29855 -0.0840596 -0.290231 5.129110 -1.081720 -2.044860 -0.0636947
## 2 -1.01843 1.9971400 -1.360590 4.351050 0.039855 -1.017150 0.0704090
## 3 -4.91483 -8.5438900 2.165530 -3.273990 -3.107690 -1.767540 0.7227850
## 4 6.95400 2.8321700 0.960941 -0.693739 2.248740 -0.873033 6.2040300
## 5 2.81415 -1.6188800 3.698710 3.380350 -9.481560 -5.930260 -4.7194000
## 6 -4.46817 3.0213600 -4.903340 0.566249 9.049290 -8.506660 4.4522700
## PC206 PC207 PC208 PC209 PC210 PC211 PC212
## 1 1.5565900 4.89563 9.70141 3.520910 5.68518 -0.588393 1.056990
## 2 0.8222620 4.83075 2.18860 2.087710 -2.66309 -2.121430 1.516630
## 3 0.0512091 10.63600 11.86430 14.413100 -7.77049 4.691010 0.630596
## 4 3.4318200 -2.31269 -2.76345 -12.579300 11.45120 2.536890 10.807200
## 5 -0.9745060 8.45072 7.45257 -0.122306 15.12800 -0.265452 1.483020
## 6 9.0050700 -6.74781 -10.43120 1.213550 -12.14130 1.414730 -7.572490
## PC213 PC214 PC215 PC216 PC217 PC218 PC219 PC220
## 1 -1.266960 -1.60985 -0.455257 1.231620 1.37431 -4.960800 -4.40612 6.5877900
## 2 -0.481193 -3.71090 1.183000 -1.255400 2.45358 -2.561620 -1.60094 3.2417100
## 3 3.097420 -1.23464 4.302810 4.391370 7.53760 4.076680 -10.18920 5.0080000
## 4 -7.962850 -3.56571 2.817200 0.285720 4.46929 -4.205260 1.14108 -5.4300800
## 5 -7.528000 -3.75026 8.194150 1.091220 2.87153 -6.945880 1.55729 -3.4248800
## 6 0.178464 6.24746 -8.234460 -0.268105 2.09840 0.779583 -17.71250 -0.0259676
## PC221 PC222 PC223 PC224 PC225 PC226 PC227
## 1 -2.49333 1.295260 1.77904 -1.765170 1.51092 -6.89570 1.222100
## 2 -4.23876 4.800870 -1.94656 -2.462170 6.82174 -7.55609 0.722666
## 3 5.08803 7.371570 8.18448 0.267683 3.48268 -14.00320 0.621006
## 4 4.63688 -0.344474 -3.59394 7.851560 -4.77102 5.96996 -7.958300
## 5 -3.30376 10.818000 -5.81665 -0.492456 5.12095 -2.48280 -0.717461
## 6 -8.96617 -14.448400 1.28541 -1.475940 -5.50763 -9.50137 -5.830390
## PC228 PC229 PC230 PC231 PC232 PC233 PC234
## 1 -0.291177 5.38652 5.04529 3.02889 6.859150 1.555080 1.29869
## 2 -2.053150 -5.98924 3.81262 4.17189 0.124972 3.060280 -1.89391
## 3 -15.958000 -7.04024 2.50946 5.35528 0.410173 1.464870 2.39490
## 4 0.578263 5.83464 -13.33220 -10.07140 -3.934080 3.461750 -1.71238
## 5 -4.859340 7.17837 -17.11510 2.30003 8.734620 -5.527440 2.22191
## 6 5.254240 -1.82583 4.63786 12.23260 -3.380450 -0.478726 4.62158
## PC235 PC236 PC237 PC238 PC239 PC240 PC241 PC242
## 1 1.268790 4.52818 -7.90247 1.263610 4.09049 6.95967 -5.75503 -3.350460
## 2 -2.088440 -1.19242 5.23539 -3.943150 2.10397 4.34058 -2.17436 -3.132580
## 3 -12.433200 4.77106 8.64175 -15.126500 -2.63310 3.34238 3.77514 8.087270
## 4 2.793900 -7.70442 14.41080 -3.873750 -1.45488 -2.44353 10.76130 3.397560
## 5 -0.870791 -1.30531 -8.18779 -0.964891 -2.22363 10.75230 1.02944 0.560609
## 6 -8.529220 -6.23071 -4.26087 2.978340 -5.84498 -3.58001 -7.57907 3.112510
## PC243 PC244 PC245 PC246 PC247 PC248 PC249 PC250
## 1 -1.887980 3.048360 8.25138 2.96888 -2.74788 -7.25497 0.606981 2.748020
## 2 -0.467864 0.751218 -2.37102 -2.16277 -3.27506 4.45175 -2.053020 -1.088070
## 3 -5.055780 -16.320700 -6.31010 7.57656 4.43318 9.95462 -1.544290 -1.869050
## 4 4.911020 2.182850 8.67330 -1.86469 -11.33880 3.70551 1.268100 0.207395
## 5 -4.010240 4.846260 8.99997 -9.64692 -4.28301 -6.92391 -5.193560 5.790190
## 6 0.317515 0.249875 -8.37567 7.74226 1.02684 -2.74028 4.764380 -1.612520
## PC251 PC252 PC253 PC254 PC255 PC256 PC257 PC258
## 1 4.86447 -2.52755 2.38467 0.0448477 -5.31077 8.40696 8.242650 5.59847
## 2 -4.35830 5.40465 1.91873 -2.1237500 1.24516 4.74902 -0.557711 -1.64364
## 3 3.39060 -3.05126 -6.91127 -9.8591800 0.62905 -4.09576 -3.203680 3.44532
## 4 -1.63647 -5.42257 1.56657 -5.1332400 -8.49202 -9.78501 -7.576970 -1.69326
## 5 1.82744 1.09874 3.63232 0.8565510 -5.45035 8.13579 -5.024560 3.09718
## 6 3.90397 4.10101 -2.05782 -2.1190700 5.60654 -1.82609 -3.433010 -3.56565
## PC259 PC260 PC261 PC262 PC263 PC264 PC265
## 1 9.3155000 0.719761 0.580734 -7.64357000 0.696658 4.39360 8.06374
## 2 3.4819200 4.315870 1.015440 -4.47027000 5.465670 4.00709 -6.48726
## 3 2.1263500 -0.517176 4.983100 5.55968000 0.179064 -6.60045 -2.56074
## 4 -5.2229300 -11.558400 2.654590 0.00190635 -1.453690 -5.30192 -4.26263
## 5 -6.4178500 5.448530 4.900950 -0.67934400 3.008140 -1.17734 -5.74475
## 6 0.0127738 4.882430 -1.211800 4.04374000 0.210157 1.39389 2.00698
## PC266 PC267 PC268 PC269 PC270 PC271 PC272
## 1 0.180735 2.117720 -4.77674 -4.413830 9.333690 14.076300 -4.07322
## 2 -7.499210 -3.653030 2.66255 -0.555569 -4.079380 -2.101810 6.11986
## 3 3.790040 0.247629 3.81500 0.609312 -2.652120 -0.727738 -1.39410
## 4 1.938170 -0.620761 8.61369 3.525960 -1.878210 -7.486710 2.22378
## 5 3.740610 -2.155160 -4.02776 2.201080 -2.951990 -3.904070 1.04965
## 6 -0.316796 -3.881040 -2.55355 -2.415430 -0.699635 1.750140 -3.13271
## PC273 PC274 PC275 PC276 PC277 PC278 PC279
## 1 -10.811000 0.208267 4.651730 1.16779 -4.812110 -3.322090 3.026150
## 2 5.981040 0.860304 -5.686120 5.86928 6.004530 -1.379920 -3.026130
## 3 2.620050 -5.757590 -1.667580 -2.79863 -0.380622 -4.231010 4.692490
## 4 3.807900 -5.598330 1.101140 3.98188 -1.783290 -1.552710 2.946290
## 5 0.375166 4.860810 -0.651275 1.82738 8.155100 0.560862 -6.258940
## 6 2.427860 4.076330 0.558768 -1.12187 -1.858940 4.047870 0.695363
## PC280 PC281 PC282 PC283 PC284 PC285 PC286 PC287
## 1 4.499140 -8.17780 0.644040 3.70380 -3.647450 -7.788210 0.251713 -4.16450
## 2 4.382140 -4.68117 0.529465 1.19826 2.134550 -5.084610 -4.957800 5.29651
## 3 -0.228019 4.32578 -1.448660 -7.78725 7.675220 5.594250 -1.586220 1.07977
## 4 0.499336 4.76906 -4.674750 2.14105 1.589770 -0.186059 1.760220 -1.23835
## 5 -5.299180 1.91256 9.464840 5.74187 -6.064580 5.245830 -5.734360 2.64987
## 6 0.277668 -1.00595 3.460120 -4.49424 0.237973 3.234210 -0.301798 5.27515
## PC288 PC289 PC290 PC291 PC292 PC293 PC294
## 1 -9.996610 13.12480 2.871160 -4.881800 15.220000 9.717160 1.721690
## 2 5.358210 -1.06213 0.404066 -3.354730 5.271610 1.987590 15.290100
## 3 0.973881 -5.44692 -4.967540 1.100860 -6.512470 -6.068240 -6.820920
## 4 -1.243610 -3.83314 1.145480 -0.172442 -2.360910 0.271403 -0.357951
## 5 7.786440 -6.88359 5.737540 -0.726949 -6.030380 -7.258960 2.350180
## 6 -0.371498 3.70075 -0.705509 1.010940 -0.938047 3.868570 3.402900
## PC295 PC296 PC297 PC298 PC299 PC300 PC301
## 1 -3.4110900 1.686080 6.62375 -10.3170000 2.115910 0.321069 1.199770
## 2 -9.4855200 6.153120 17.61420 -7.2190300 7.198400 -6.962230 -5.653820
## 3 3.3814800 -4.143960 -5.74892 4.9096400 -0.910531 -0.102854 -0.283924
## 4 -3.0965900 0.839258 -4.59375 -0.0365626 -0.220522 -2.668270 -5.738610
## 5 -4.4456700 -0.248431 -11.56480 4.4448900 1.259200 0.233057 3.147440
## 6 -0.0810154 -1.332060 -1.32580 1.4609400 1.912250 1.705790 2.295310
## PC302 PC303 PC304 PC305 PC306 PC307 PC308
## 1 1.574640 -3.410510 -2.8918700 -3.23672 1.482110 -1.8217600 1.745490
## 2 0.802838 -8.660020 -1.3383400 -8.27628 3.236250 7.9197700 -3.593290
## 3 -1.958730 4.446000 0.7113250 5.66868 -0.847999 -4.1450500 0.267413
## 4 -0.458483 -0.995096 0.0814797 -5.66699 -2.117740 1.3477800 0.150716
## 5 -5.632880 -1.385670 9.9214900 -1.62767 -4.083950 6.1395500 -1.262320
## 6 -0.970871 0.859766 0.2588410 3.55760 1.765990 -0.0887573 -2.060870
## PC309 PC310 PC311 PC312 PC313 PC314 PC315
## 1 -3.918990 -0.167691 -1.184250 1.242380 -0.466074 0.353532 -2.221200
## 2 2.215080 2.951480 -1.747090 -1.625510 -1.582370 -2.256560 2.379820
## 3 -0.939395 -2.352220 -1.784810 4.128300 -2.899250 2.590090 -1.071720
## 4 3.228050 -0.940345 0.394989 -0.637514 0.110893 1.006150 0.615197
## 5 1.743200 5.374170 3.288830 -7.110870 0.716492 -0.481899 1.932740
## 6 0.246399 -1.196600 1.812030 1.838890 0.300388 -0.774491 0.605467
## PC316 PC317 PC318 PC319 PC320 PC321 PC322
## 1 0.352821 -2.220830 -2.11519 2.158240 0.247180 -0.4446400 -0.835695
## 2 -0.496456 -2.474130 -4.76329 0.395585 -0.520601 -1.4199800 -3.187130
## 3 0.979855 0.753731 2.23401 0.406704 -0.827127 -0.4565480 0.968956
## 4 -0.418606 -0.105097 -2.62185 0.286816 1.054780 -0.0905316 -0.621044
## 5 0.302910 -2.030700 1.58847 -3.183170 3.812870 3.7652200 -2.353640
## 6 -0.886637 0.871873 -2.42999 2.415860 -0.816579 -0.9468930 2.103610
## PC323 PC324 PC325 PC326 PC327 PC328 PC329
## 1 0.516407 0.357848 -1.338830 2.550150 1.7769500 0.640938 0.7280670
## 2 2.077730 -2.123860 1.529100 0.947236 -1.4292900 -1.574630 -0.2259680
## 3 -0.400698 1.644650 -0.458929 -0.531544 -1.1136200 -1.376580 -0.5154730
## 4 1.133430 1.358530 -0.616837 0.314308 0.0895645 -1.338060 1.1146800
## 5 3.226560 -3.635110 2.314460 -2.032180 -0.6470410 0.445676 0.0271391
## 6 -4.040430 -1.187590 -2.444880 0.737983 -2.7459600 -1.951170 -1.7823600
## PC330 PC331 PC332 PC333 PC334 PC335 PC336
## 1 -1.5942800 0.514835 -0.2979850 0.770455 1.289120 -0.0675534 0.7056970
## 2 -1.0438600 0.757701 -1.5047800 3.134780 0.646038 -0.2487530 -0.3816210
## 3 0.0645953 -0.122472 -0.2345380 -0.565257 -0.396688 -0.3370070 -0.3057790
## 4 -0.6303430 -0.454786 -0.0135467 0.381199 -0.143390 0.1891330 -0.0823058
## 5 -0.0344383 -0.600860 0.1210370 -0.596788 -0.330062 0.1136310 0.5483740
## 6 1.0374000 0.138874 0.1367710 0.643809 -0.335264 -0.4544160 -0.2970510
## PC337 PC338 Individual Pop_City Country Latitude Longitude
## 1 -0.1890310 1.22504e-06 1210 Alushta Ukraine 44.68289 34.40368
## 2 0.2466140 1.22504e-06 1209 Alushta Ukraine 44.68289 34.40368
## 3 0.1358210 1.22504e-06 1204 Alushta Ukraine 44.68289 34.40368
## 4 0.2101320 1.22504e-06 1211 Alushta Ukraine 44.68289 34.40368
## 5 0.0485408 1.22504e-06 1212 Alushta Ukraine 44.68289 34.40368
## 6 0.2411700 1.22504e-06 1213 Alushta Ukraine 44.68289 34.40368
## Continent Year Region Subregion order order2 orderold
## 1 Europe 2021 Eastern Europe East Europe 43 35 35
## 2 Europe 2021 Eastern Europe East Europe 43 35 35
## 3 Europe 2021 Eastern Europe East Europe 43 35 35
## 4 Europe 2021 Eastern Europe East Europe 43 35 35
## 5 Europe 2021 Eastern Europe East Europe 43 35 35
## 6 Europe 2021 Eastern Europe East Europe 43 35 35
sampling_loc <- read.csv(
here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_far_east_euro_2.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_far_east_euro_2.rds"))
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_far_east_euro_2.rds"))
head(sampling_loc)
## Pop_City Location Latitude Longitude Continent
## 1 Sevastopol, Crimea Sevastopol, Ukraine 44.54125 33.51400 Europe
## 2 Alushta Alushta, Ukraine 44.68289 34.40368 Europe
## 3 Kerch, Crimea Kerch, Ukraine 45.35246 36.47015 Europe
## 4 Krasnodar Krasnodar, Russia 44.95504 39.02782 Europe
## 5 Sochi Sochi, Russia 43.60042 39.74533 Europe
## 6 Tikhoretsk Tikhoretsk, Russia 45.85460 40.12560 Europe
## Abbreviation Year Region Subregion order order2 orderold
## 1 SEV 2021 Eastern Europe East Europe 42 34 34
## 2 ALU 2021 Eastern Europe East Europe 43 35 35
## 3 KER 2021 Eastern Europe East Europe 44 36 36
## 4 KRA 2017 Eastern Europe East Europe 45 37 37
## 5 SOC 2021 Eastern Europe East Europe 46 38 38
## 6 TIK 2021 Eastern Europe East Europe 47 39 39
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 37 Levels: ALU ARM BEN CAM CHA GEL GES HAI HAN HOC HUN INJ INW JAF KAC ... YUN
## [1] 37
Check the regions
## [1] "Eastern Europe" "East Asia" "South Asia" "Southeast Asia"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6
## 1 ALU 20.4856 -0.305611 -2.550140 -7.28735 -24.20470 6.38975
## 2 ALU 25.3823 4.337260 -0.388604 -5.43395 -20.36060 8.95141
## 3 ALU 20.0237 2.626390 -0.786929 -10.38780 -29.93820 8.93216
## 4 ALU 25.8712 -3.342340 -1.110070 -2.41729 2.61012 -3.53675
## 5 ALU 30.0675 -11.041700 0.724509 -4.64010 -5.47538 9.77449
## 6 ALU 27.3772 -2.688700 0.959256 -1.42843 -1.32056 -4.38881
## PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14
## 1 -15.04960 6.67978 0.151167 38.2655 -6.15882 -8.821650 -24.86590 -3.87666
## 2 -12.67250 4.17658 -0.154637 31.6130 -3.83496 -1.228910 -8.24218 3.89523
## 3 -19.74160 8.25617 1.521170 44.7494 -1.77796 -3.928850 -15.47780 4.22217
## 4 6.09960 4.60456 0.107691 31.9499 6.25326 0.350372 5.77296 12.58190
## 5 -7.78187 2.21053 0.984531 24.2537 9.42648 -2.278880 1.63767 -27.95230
## 6 2.18008 1.82266 0.788612 27.3708 -2.65027 -0.994772 -4.55721 -17.14900
## PC15 PC16 PC17 PC18 PC19 PC20 PC21 PC22
## 1 -6.17760 14.824700 -9.01089 7.14638 13.722300 8.52422 7.00553 -3.77419
## 2 -4.40492 -0.229220 -9.98679 -5.77431 -4.806080 17.98710 19.70860 -9.18956
## 3 -6.60865 5.091250 -17.67700 2.63932 0.991916 10.78830 11.84630 -11.81950
## 4 -5.58323 -3.184320 -18.61840 1.67256 -4.746580 14.92690 17.16900 -7.24129
## 5 -2.96934 -8.900110 21.58010 -29.60820 -15.574500 1.08231 -5.34274 8.00136
## 6 -1.06223 -0.968945 13.54250 -20.00430 -14.981500 5.18956 1.51858 10.53780
## PC23 PC24 PC25 PC26 PC27 PC28 PC29
## 1 -1.45222 -3.332710 -4.2012100 -9.21728 4.76234 -0.891124 5.2582300
## 2 -11.60540 15.355200 -4.0634100 -1.13517 1.33338 14.433700 -3.7651200
## 3 -9.34223 8.623670 -3.7577200 -6.51587 5.63725 6.830240 0.0136676
## 4 -6.96422 9.800260 -10.1645000 -13.08660 12.53640 -2.196250 -8.5672800
## 5 7.71842 -9.136000 0.0228503 -9.11044 10.12690 -2.903270 1.2629200
## 6 3.21686 0.968363 -5.4992600 -2.00759 8.71201 -3.956690 -10.9915000
## PC30 PC31 PC32 PC33 PC34 PC35 PC36
## 1 -11.54270 0.105329 -5.48342 -6.168920 -2.445510 2.38026 -18.168900
## 2 -5.08792 -3.830590 -3.33753 4.723430 -2.689220 4.57595 6.566870
## 3 -10.27700 1.422650 -7.00011 -6.931120 -5.608190 11.83910 -6.569350
## 4 -4.63713 13.878400 -7.89427 -0.547714 -4.742410 7.84252 -6.930040
## 5 -17.04900 -11.385000 1.81386 2.792030 3.853960 -13.36160 3.102920
## 6 -1.97113 11.571500 3.23825 1.899260 -0.426233 -4.60796 0.468608
## PC37 PC38 PC39 PC40 PC41 PC42 PC43
## 1 0.638639 5.96370 1.638110 -2.702090 26.01240 2.872620 -6.5748000
## 2 2.467160 -0.13457 2.271750 -0.695783 -25.92740 3.957250 9.0044100
## 3 2.511160 -1.24808 0.509379 6.589810 -1.63697 4.799590 -0.0233594
## 4 2.121700 -1.87570 1.823560 2.841060 2.42338 0.418901 -11.3373000
## 5 2.344010 -3.17460 7.938730 0.777851 -1.85059 -5.698850 14.5095000
## 6 5.034230 4.93773 10.499000 -15.493800 3.24924 5.950570 14.8428000
## PC44 PC45 PC46 PC47 PC48 PC49 PC50 PC51
## 1 16.89850 1.788520 5.62376 8.75830 7.335620 3.93678 -7.88436 -0.00527219
## 2 -13.97630 0.418266 -4.15255 -5.67872 -9.702130 5.36903 8.58153 6.94867000
## 3 2.98640 -1.076080 1.37242 -1.21517 -0.602785 2.66775 -3.25599 8.65494000
## 4 5.15674 3.924090 4.79590 4.03105 10.211600 1.34531 -9.18752 2.72248000
## 5 -1.09257 -2.114320 -2.75617 3.44852 0.473827 0.79640 -6.17943 3.01706000
## 6 -1.87532 -11.292000 -3.75054 6.02935 6.197380 3.29759 -3.60343 -0.55713100
## PC52 PC53 PC54 PC55 PC56 PC57 PC58
## 1 2.340900 12.99150 8.848370 -1.60283 -20.37070 -0.376401 11.458700
## 2 -4.383410 -9.85162 -11.181800 3.11030 26.42630 -1.008880 -18.740200
## 3 -6.471730 -0.49460 -0.560385 4.99467 -1.27009 1.606140 0.764857
## 4 -1.318270 -3.21074 0.506548 1.69150 -9.63551 -1.613160 -6.314400
## 5 0.859159 5.97654 2.418050 -2.52355 -5.83637 3.143980 7.944640
## 6 4.578160 1.21431 6.930270 -10.54760 3.27519 8.432160 9.756380
## PC59 PC60 PC61 PC62 PC63 PC64 PC65
## 1 12.856000 -3.76451 3.046620 3.70025 -1.03461 1.963410 -4.801280
## 2 -4.022410 11.09840 -2.703890 -2.78526 6.80457 -1.793960 1.971330
## 3 -6.825280 1.63018 3.468500 -2.68255 -3.66434 7.675350 -1.717530
## 4 2.388530 -8.23049 1.187890 13.60900 -8.75254 -7.880010 8.460010
## 5 7.744390 3.05013 1.542050 -6.87117 9.60592 -1.401980 -7.486110
## 6 -0.982531 -1.11583 0.763478 10.81760 16.40710 -0.705895 -0.986492
## PC66 PC67 PC68 PC69 PC70 PC71 PC72
## 1 1.1013800 6.9687800 0.677785 -14.196900 1.932510 -2.730120 0.852579
## 2 0.0759335 -13.2050000 1.579390 17.570800 0.696840 -5.388440 1.549200
## 3 9.6270600 -0.2945390 -3.152270 -0.185804 -1.788710 2.772100 -1.140440
## 4 -19.0307000 0.0222812 13.416700 -9.458960 3.892460 -5.184720 -5.684060
## 5 -6.0241000 4.3416200 2.450220 0.687401 0.928587 -0.543575 -1.444860
## 6 -13.5157000 2.9042000 19.716400 -6.014460 -3.473160 -3.559860 -1.434370
## PC73 PC74 PC75 PC76 PC77 PC78 PC79 PC80
## 1 -0.0272235 0.442396 -0.734162 1.60670 8.34790 -1.57096 -2.325790 -2.66391
## 2 3.7382100 3.371150 1.717450 2.09539 -12.74000 -1.72072 4.329370 -2.44019
## 3 -3.2626400 -3.607260 -3.840580 -2.75547 6.71369 1.34046 0.637695 2.15170
## 4 0.7048390 -3.747130 1.718730 4.18192 -3.58733 4.14767 -4.507960 -5.19651
## 5 -5.9070400 -4.062740 -4.659250 -1.10407 3.31766 2.18593 -6.823190 -4.00661
## 6 6.0514700 -4.166360 -4.344890 -7.77497 4.97026 -1.38811 -3.049480 1.18698
## PC81 PC82 PC83 PC84 PC85 PC86 PC87 PC88
## 1 -6.443170 1.83956 -4.31690 3.162300 3.88469 2.71503 1.53758 3.58843
## 2 -0.199465 3.38909 -2.99189 3.244740 -1.58022 3.20976 1.88266 6.84259
## 3 -1.699530 -3.79007 4.17300 7.029760 1.97388 -3.85027 4.20135 -6.80635
## 4 -18.767000 5.48438 -9.84728 5.582990 2.23714 4.16893 -10.36050 3.19901
## 5 9.240860 -1.70624 3.38226 2.525090 -3.98731 2.47006 4.15028 -8.34635
## 6 -6.014610 -8.22647 -13.41560 -0.858546 -3.22604 8.85529 -10.47330 -4.72417
## PC89 PC90 PC91 PC92 PC93 PC94 PC95
## 1 2.9237700 -1.174850 -1.771270 2.770570 0.475811 2.639810 0.441089
## 2 -1.8059400 4.373540 -12.713400 0.889759 -1.552400 -0.109804 4.667540
## 3 -10.5783000 -6.958860 10.898300 -1.400270 0.583719 -0.996475 -1.975080
## 4 2.2166400 -0.260060 -6.463720 7.555240 -4.397560 0.371583 -2.671060
## 5 0.2608440 -5.346590 0.409559 1.481070 -2.083320 4.788510 3.257690
## 6 0.0814059 -0.702711 -8.225600 2.683760 -8.159570 2.908500 1.401030
## PC96 PC97 PC98 PC99 PC100 PC101 PC102
## 1 1.871160 -5.345640 1.123030 3.8324100 4.429460 3.851840 -0.684432
## 2 -0.947024 -3.227110 -0.485948 1.7780700 -0.494879 2.156390 3.148300
## 3 0.877382 2.141940 -3.737950 1.3161900 -3.770210 1.886970 -1.763700
## 4 1.246670 -0.479822 0.900249 -7.1356500 1.164820 1.714810 1.542460
## 5 -4.474370 3.739800 -0.491935 0.0339354 1.290120 0.387799 -0.703330
## 6 -1.487730 8.175420 6.974200 -6.9411800 1.599830 -12.971100 5.138090
## PC103 PC104 PC105 PC106 PC107 PC108 PC109 PC110
## 1 2.25127 1.52242 1.906900 0.3036580 2.60092 -0.43695 5.143180 0.670442
## 2 1.61509 2.50924 -0.372587 1.5369000 -5.25799 1.23955 2.456150 4.775800
## 3 -5.46988 -1.75623 1.980140 0.9970790 1.69452 -8.97879 0.148475 -3.022270
## 4 -3.20834 -2.54959 0.729329 0.1442540 1.59751 4.16221 -3.662630 3.966310
## 5 -1.75826 -3.16704 3.781260 -0.2276980 2.71692 -2.47770 3.644330 3.318670
## 6 -0.15957 -1.73146 4.174510 -0.0224256 2.94150 5.84921 1.235730 -1.103880
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## 1 0.233544 1.491500 -2.291850 2.75724 4.884290 -5.246880 -0.804267
## 2 -2.612870 1.070640 3.629040 -1.37871 -4.767060 -0.517355 -5.131480
## 3 1.307540 0.255413 -0.929420 3.97817 1.340020 3.841250 2.142500
## 4 -5.926810 -4.381290 0.525513 -4.22072 8.663280 -0.735895 -11.334500
## 5 6.378110 1.025500 4.854340 3.27895 -3.870160 -5.524730 8.700350
## 6 -1.616950 -5.279780 -2.855310 -11.38320 -0.243223 -3.058180 -7.078780
## PC118 PC119 PC120 PC121 PC122 PC123 PC124 PC125
## 1 -2.10701 3.333620 -2.215670 1.317990 -1.266080 0.190247 -3.89487 -2.80235
## 2 -4.94447 5.394510 -0.399604 0.336776 -0.395207 3.196520 -4.65527 2.98828
## 3 11.90080 -3.439220 3.095730 -0.587585 2.156610 1.273720 -3.38204 -2.57005
## 4 -5.42009 3.225020 -5.246600 3.407710 -5.493770 -5.074700 3.36216 3.98967
## 5 3.73444 -5.657550 1.020350 1.996580 1.535220 1.972160 -2.88932 -8.43176
## 6 -2.36509 0.662008 -8.206090 4.289750 2.361380 -5.375370 3.63467 5.10431
## PC126 PC127 PC128 PC129 PC130 PC131 PC132 PC133
## 1 3.471030 3.016630 -3.925310 -2.14323 -2.687760 -0.433259 3.58807 1.525600
## 2 -4.326420 3.231160 -0.718613 -1.38307 -0.170312 -0.374122 -4.82892 2.103600
## 3 -3.125980 -0.896061 -1.163370 1.41482 0.493408 1.832260 3.01200 0.647162
## 4 0.802205 -4.291240 -5.576690 0.97485 1.256250 -1.065740 7.41296 0.175257
## 5 -4.921190 0.742270 -5.490360 -1.96823 0.305370 3.665320 -5.13845 -2.557740
## 6 -6.691420 3.812800 -0.800976 5.76665 -2.657830 2.180710 1.69836 -8.661120
## PC134 PC135 PC136 PC137 PC138 PC139 PC140 PC141
## 1 4.204800 3.50766 -3.622220 -3.061660 4.27550 -1.23560 -5.24803 -0.523196
## 2 2.249160 7.33844 -0.255614 -5.768620 5.60840 -1.66117 -2.77480 0.705456
## 3 -0.632887 -2.77590 -2.518710 0.322666 -5.57880 5.30426 3.46215 -3.504200
## 4 -8.758330 1.69746 4.803670 0.639076 6.57224 -2.78041 -9.07109 1.242930
## 5 -3.477590 -3.41453 -6.135030 -3.890220 1.14685 3.50326 3.22089 -4.223740
## 6 0.188458 6.80750 4.682540 -2.105050 -4.82354 1.69971 3.74137 -2.142900
## PC142 PC143 PC144 PC145 PC146 PC147 PC148
## 1 -0.173200 -0.775339 4.46472 -1.424750 -2.2181000 2.490170 1.419960
## 2 5.600500 -6.012330 5.26934 -3.267700 0.0852875 1.049250 -1.768620
## 3 0.320618 1.420100 -7.04487 0.232887 3.3146700 -3.616300 1.685780
## 4 -1.607800 0.241212 -4.46744 2.599630 0.9280960 1.541200 -0.483154
## 5 0.563423 4.325520 1.73424 -4.996600 -7.8645100 -0.139164 2.314240
## 6 6.806240 4.354110 2.02088 2.259160 2.8227200 -7.372750 0.467339
## PC149 PC150 PC151 PC152 PC153 PC154 PC155
## 1 -0.751374 -0.314859 4.9189600 2.832320 2.1644200 -0.48708 5.633100
## 2 -3.457890 1.122780 4.4779900 3.029150 0.5865970 2.21855 5.408460
## 3 1.304000 -0.849583 -5.3868500 -3.694100 -1.4307600 0.99073 -2.478430
## 4 -0.648384 1.500540 -0.0454821 -0.899994 -0.9112840 3.99710 2.946110
## 5 4.085970 -1.058240 2.1960000 1.563200 -0.8587100 -2.03031 -0.114413
## 6 4.018140 -3.663050 -6.9023600 1.565750 -0.0364479 -1.12643 -3.045680
## PC156 PC157 PC158 PC159 PC160 PC161 PC162
## 1 -3.40933 4.245140 -1.608310 -0.170535 2.250260000 3.06643 -0.852598
## 2 2.57965 1.008830 -1.634390 3.641350 -0.684657000 1.74680 -1.512600
## 3 -4.95370 4.642520 3.691120 -1.763100 -4.002720000 4.10108 1.779000
## 4 2.82666 -4.000820 9.446800 0.366259 -0.306979000 -1.30854 -1.802620
## 5 -5.92844 3.509700 0.381809 2.169450 0.179172000 -4.47060 2.325670
## 6 3.92859 0.675016 -2.718180 1.907780 0.000285673 -8.03710 0.267860
## PC163 PC164 PC165 PC166 PC167 PC168 PC169 PC170
## 1 0.582339 -2.580390 2.534640 -1.589210 -1.59853 -1.455500 -4.33631 -3.95844
## 2 -0.592519 -3.621410 2.543370 -1.163350 1.89278 -1.634220 -5.58807 1.23806
## 3 1.663760 1.286180 -2.124800 -0.953216 -2.25028 3.720880 -4.73797 -0.96331
## 4 0.554564 -0.670156 0.685410 4.931360 -8.06053 -0.302732 7.43252 -3.31588
## 5 1.366430 -0.520266 -0.703254 2.255250 -5.59305 -5.713120 2.88964 -2.07597
## 6 -3.714100 9.020530 5.422160 4.142060 5.22376 -0.119157 4.82586 3.03351
## PC171 PC172 PC173 PC174 PC175 PC176 PC177
## 1 0.3138550 -2.836640 0.581467 -3.716190 -1.784550 1.922010 0.736131
## 2 2.8229900 1.280020 -2.240140 -1.526570 -1.140210 0.987885 0.272200
## 3 -0.0935415 -0.850212 2.103960 7.079490 -2.267310 -1.072840 -4.739330
## 4 2.0464400 -3.026580 2.425940 -0.111941 0.515358 -0.360149 4.501240
## 5 2.5353400 -1.658530 -0.194528 -4.949810 4.951410 -3.588460 0.845839
## 6 5.3982000 7.035450 -0.577793 1.208870 1.451360 4.278480 1.147550
## PC178 PC179 PC180 PC181 PC182 PC183 PC184
## 1 -0.245713 -7.739400 -2.80283 5.160200 -5.933070 4.642320 -2.3001200
## 2 0.489924 -1.649790 -3.27892 3.705430 0.475018 -0.281622 3.6696600
## 3 -2.571620 -3.262760 2.11674 0.566958 5.369940 0.913404 -10.4730000
## 4 -2.499130 5.848700 -1.91250 0.993972 -3.598870 3.904920 -1.2258800
## 5 -1.163900 -0.547669 -2.98600 4.095320 2.077450 2.955180 -0.0954974
## 6 -2.227390 9.930780 -3.43060 5.058650 3.444780 11.927500 -0.9931580
## PC185 PC186 PC187 PC188 PC189 PC190 PC191
## 1 -1.8608100 -3.57918 -3.912450 -2.7491800 -2.5978300 -1.807430 -1.8084200
## 2 -1.8806100 -7.12260 0.209279 0.0102187 -0.0604702 4.360960 5.1693100
## 3 -7.1409200 1.70281 -1.111460 -7.1552800 -8.6599500 -0.900483 -1.0899900
## 4 6.2324500 4.95887 -5.534910 -2.1408900 6.2462800 -2.671090 -6.8505500
## 5 0.0824617 2.49476 -1.652400 -3.6944000 -1.7281500 -1.053770 0.5330990
## 6 -4.4923500 5.50138 10.743400 -1.3303000 1.5984800 0.940630 0.0717802
## PC192 PC193 PC194 PC195 PC196 PC197 PC198
## 1 4.392910 -0.704383 0.127557 2.069700 0.267326 2.343840 -6.774000
## 2 4.154560 2.998550 0.741238 2.082670 2.141780 -0.735199 -2.112400
## 3 -3.583830 -0.872622 1.375060 -0.841222 -7.582140 -3.181530 -3.944950
## 4 -2.731750 4.570590 -6.040680 3.671770 -6.331720 -2.819110 9.198310
## 5 0.814798 4.779060 -4.684740 1.950760 2.227170 -0.463965 -0.632437
## 6 -6.477250 1.733180 9.695080 1.744510 -1.948060 -4.347590 -4.955840
## PC199 PC200 PC201 PC202 PC203 PC204 PC205
## 1 2.29855 -0.0840596 -0.290231 5.129110 -1.081720 -2.044860 -0.0636947
## 2 -1.01843 1.9971400 -1.360590 4.351050 0.039855 -1.017150 0.0704090
## 3 -4.91483 -8.5438900 2.165530 -3.273990 -3.107690 -1.767540 0.7227850
## 4 6.95400 2.8321700 0.960941 -0.693739 2.248740 -0.873033 6.2040300
## 5 2.81415 -1.6188800 3.698710 3.380350 -9.481560 -5.930260 -4.7194000
## 6 -4.46817 3.0213600 -4.903340 0.566249 9.049290 -8.506660 4.4522700
## PC206 PC207 PC208 PC209 PC210 PC211 PC212
## 1 1.5565900 4.89563 9.70141 3.520910 5.68518 -0.588393 1.056990
## 2 0.8222620 4.83075 2.18860 2.087710 -2.66309 -2.121430 1.516630
## 3 0.0512091 10.63600 11.86430 14.413100 -7.77049 4.691010 0.630596
## 4 3.4318200 -2.31269 -2.76345 -12.579300 11.45120 2.536890 10.807200
## 5 -0.9745060 8.45072 7.45257 -0.122306 15.12800 -0.265452 1.483020
## 6 9.0050700 -6.74781 -10.43120 1.213550 -12.14130 1.414730 -7.572490
## PC213 PC214 PC215 PC216 PC217 PC218 PC219 PC220
## 1 -1.266960 -1.60985 -0.455257 1.231620 1.37431 -4.960800 -4.40612 6.5877900
## 2 -0.481193 -3.71090 1.183000 -1.255400 2.45358 -2.561620 -1.60094 3.2417100
## 3 3.097420 -1.23464 4.302810 4.391370 7.53760 4.076680 -10.18920 5.0080000
## 4 -7.962850 -3.56571 2.817200 0.285720 4.46929 -4.205260 1.14108 -5.4300800
## 5 -7.528000 -3.75026 8.194150 1.091220 2.87153 -6.945880 1.55729 -3.4248800
## 6 0.178464 6.24746 -8.234460 -0.268105 2.09840 0.779583 -17.71250 -0.0259676
## PC221 PC222 PC223 PC224 PC225 PC226 PC227
## 1 -2.49333 1.295260 1.77904 -1.765170 1.51092 -6.89570 1.222100
## 2 -4.23876 4.800870 -1.94656 -2.462170 6.82174 -7.55609 0.722666
## 3 5.08803 7.371570 8.18448 0.267683 3.48268 -14.00320 0.621006
## 4 4.63688 -0.344474 -3.59394 7.851560 -4.77102 5.96996 -7.958300
## 5 -3.30376 10.818000 -5.81665 -0.492456 5.12095 -2.48280 -0.717461
## 6 -8.96617 -14.448400 1.28541 -1.475940 -5.50763 -9.50137 -5.830390
## PC228 PC229 PC230 PC231 PC232 PC233 PC234
## 1 -0.291177 5.38652 5.04529 3.02889 6.859150 1.555080 1.29869
## 2 -2.053150 -5.98924 3.81262 4.17189 0.124972 3.060280 -1.89391
## 3 -15.958000 -7.04024 2.50946 5.35528 0.410173 1.464870 2.39490
## 4 0.578263 5.83464 -13.33220 -10.07140 -3.934080 3.461750 -1.71238
## 5 -4.859340 7.17837 -17.11510 2.30003 8.734620 -5.527440 2.22191
## 6 5.254240 -1.82583 4.63786 12.23260 -3.380450 -0.478726 4.62158
## PC235 PC236 PC237 PC238 PC239 PC240 PC241 PC242
## 1 1.268790 4.52818 -7.90247 1.263610 4.09049 6.95967 -5.75503 -3.350460
## 2 -2.088440 -1.19242 5.23539 -3.943150 2.10397 4.34058 -2.17436 -3.132580
## 3 -12.433200 4.77106 8.64175 -15.126500 -2.63310 3.34238 3.77514 8.087270
## 4 2.793900 -7.70442 14.41080 -3.873750 -1.45488 -2.44353 10.76130 3.397560
## 5 -0.870791 -1.30531 -8.18779 -0.964891 -2.22363 10.75230 1.02944 0.560609
## 6 -8.529220 -6.23071 -4.26087 2.978340 -5.84498 -3.58001 -7.57907 3.112510
## PC243 PC244 PC245 PC246 PC247 PC248 PC249 PC250
## 1 -1.887980 3.048360 8.25138 2.96888 -2.74788 -7.25497 0.606981 2.748020
## 2 -0.467864 0.751218 -2.37102 -2.16277 -3.27506 4.45175 -2.053020 -1.088070
## 3 -5.055780 -16.320700 -6.31010 7.57656 4.43318 9.95462 -1.544290 -1.869050
## 4 4.911020 2.182850 8.67330 -1.86469 -11.33880 3.70551 1.268100 0.207395
## 5 -4.010240 4.846260 8.99997 -9.64692 -4.28301 -6.92391 -5.193560 5.790190
## 6 0.317515 0.249875 -8.37567 7.74226 1.02684 -2.74028 4.764380 -1.612520
## PC251 PC252 PC253 PC254 PC255 PC256 PC257 PC258
## 1 4.86447 -2.52755 2.38467 0.0448477 -5.31077 8.40696 8.242650 5.59847
## 2 -4.35830 5.40465 1.91873 -2.1237500 1.24516 4.74902 -0.557711 -1.64364
## 3 3.39060 -3.05126 -6.91127 -9.8591800 0.62905 -4.09576 -3.203680 3.44532
## 4 -1.63647 -5.42257 1.56657 -5.1332400 -8.49202 -9.78501 -7.576970 -1.69326
## 5 1.82744 1.09874 3.63232 0.8565510 -5.45035 8.13579 -5.024560 3.09718
## 6 3.90397 4.10101 -2.05782 -2.1190700 5.60654 -1.82609 -3.433010 -3.56565
## PC259 PC260 PC261 PC262 PC263 PC264 PC265
## 1 9.3155000 0.719761 0.580734 -7.64357000 0.696658 4.39360 8.06374
## 2 3.4819200 4.315870 1.015440 -4.47027000 5.465670 4.00709 -6.48726
## 3 2.1263500 -0.517176 4.983100 5.55968000 0.179064 -6.60045 -2.56074
## 4 -5.2229300 -11.558400 2.654590 0.00190635 -1.453690 -5.30192 -4.26263
## 5 -6.4178500 5.448530 4.900950 -0.67934400 3.008140 -1.17734 -5.74475
## 6 0.0127738 4.882430 -1.211800 4.04374000 0.210157 1.39389 2.00698
## PC266 PC267 PC268 PC269 PC270 PC271 PC272
## 1 0.180735 2.117720 -4.77674 -4.413830 9.333690 14.076300 -4.07322
## 2 -7.499210 -3.653030 2.66255 -0.555569 -4.079380 -2.101810 6.11986
## 3 3.790040 0.247629 3.81500 0.609312 -2.652120 -0.727738 -1.39410
## 4 1.938170 -0.620761 8.61369 3.525960 -1.878210 -7.486710 2.22378
## 5 3.740610 -2.155160 -4.02776 2.201080 -2.951990 -3.904070 1.04965
## 6 -0.316796 -3.881040 -2.55355 -2.415430 -0.699635 1.750140 -3.13271
## PC273 PC274 PC275 PC276 PC277 PC278 PC279
## 1 -10.811000 0.208267 4.651730 1.16779 -4.812110 -3.322090 3.026150
## 2 5.981040 0.860304 -5.686120 5.86928 6.004530 -1.379920 -3.026130
## 3 2.620050 -5.757590 -1.667580 -2.79863 -0.380622 -4.231010 4.692490
## 4 3.807900 -5.598330 1.101140 3.98188 -1.783290 -1.552710 2.946290
## 5 0.375166 4.860810 -0.651275 1.82738 8.155100 0.560862 -6.258940
## 6 2.427860 4.076330 0.558768 -1.12187 -1.858940 4.047870 0.695363
## PC280 PC281 PC282 PC283 PC284 PC285 PC286 PC287
## 1 4.499140 -8.17780 0.644040 3.70380 -3.647450 -7.788210 0.251713 -4.16450
## 2 4.382140 -4.68117 0.529465 1.19826 2.134550 -5.084610 -4.957800 5.29651
## 3 -0.228019 4.32578 -1.448660 -7.78725 7.675220 5.594250 -1.586220 1.07977
## 4 0.499336 4.76906 -4.674750 2.14105 1.589770 -0.186059 1.760220 -1.23835
## 5 -5.299180 1.91256 9.464840 5.74187 -6.064580 5.245830 -5.734360 2.64987
## 6 0.277668 -1.00595 3.460120 -4.49424 0.237973 3.234210 -0.301798 5.27515
## PC288 PC289 PC290 PC291 PC292 PC293 PC294
## 1 -9.996610 13.12480 2.871160 -4.881800 15.220000 9.717160 1.721690
## 2 5.358210 -1.06213 0.404066 -3.354730 5.271610 1.987590 15.290100
## 3 0.973881 -5.44692 -4.967540 1.100860 -6.512470 -6.068240 -6.820920
## 4 -1.243610 -3.83314 1.145480 -0.172442 -2.360910 0.271403 -0.357951
## 5 7.786440 -6.88359 5.737540 -0.726949 -6.030380 -7.258960 2.350180
## 6 -0.371498 3.70075 -0.705509 1.010940 -0.938047 3.868570 3.402900
## PC295 PC296 PC297 PC298 PC299 PC300 PC301
## 1 -3.4110900 1.686080 6.62375 -10.3170000 2.115910 0.321069 1.199770
## 2 -9.4855200 6.153120 17.61420 -7.2190300 7.198400 -6.962230 -5.653820
## 3 3.3814800 -4.143960 -5.74892 4.9096400 -0.910531 -0.102854 -0.283924
## 4 -3.0965900 0.839258 -4.59375 -0.0365626 -0.220522 -2.668270 -5.738610
## 5 -4.4456700 -0.248431 -11.56480 4.4448900 1.259200 0.233057 3.147440
## 6 -0.0810154 -1.332060 -1.32580 1.4609400 1.912250 1.705790 2.295310
## PC302 PC303 PC304 PC305 PC306 PC307 PC308
## 1 1.574640 -3.410510 -2.8918700 -3.23672 1.482110 -1.8217600 1.745490
## 2 0.802838 -8.660020 -1.3383400 -8.27628 3.236250 7.9197700 -3.593290
## 3 -1.958730 4.446000 0.7113250 5.66868 -0.847999 -4.1450500 0.267413
## 4 -0.458483 -0.995096 0.0814797 -5.66699 -2.117740 1.3477800 0.150716
## 5 -5.632880 -1.385670 9.9214900 -1.62767 -4.083950 6.1395500 -1.262320
## 6 -0.970871 0.859766 0.2588410 3.55760 1.765990 -0.0887573 -2.060870
## PC309 PC310 PC311 PC312 PC313 PC314 PC315
## 1 -3.918990 -0.167691 -1.184250 1.242380 -0.466074 0.353532 -2.221200
## 2 2.215080 2.951480 -1.747090 -1.625510 -1.582370 -2.256560 2.379820
## 3 -0.939395 -2.352220 -1.784810 4.128300 -2.899250 2.590090 -1.071720
## 4 3.228050 -0.940345 0.394989 -0.637514 0.110893 1.006150 0.615197
## 5 1.743200 5.374170 3.288830 -7.110870 0.716492 -0.481899 1.932740
## 6 0.246399 -1.196600 1.812030 1.838890 0.300388 -0.774491 0.605467
## PC316 PC317 PC318 PC319 PC320 PC321 PC322
## 1 0.352821 -2.220830 -2.11519 2.158240 0.247180 -0.4446400 -0.835695
## 2 -0.496456 -2.474130 -4.76329 0.395585 -0.520601 -1.4199800 -3.187130
## 3 0.979855 0.753731 2.23401 0.406704 -0.827127 -0.4565480 0.968956
## 4 -0.418606 -0.105097 -2.62185 0.286816 1.054780 -0.0905316 -0.621044
## 5 0.302910 -2.030700 1.58847 -3.183170 3.812870 3.7652200 -2.353640
## 6 -0.886637 0.871873 -2.42999 2.415860 -0.816579 -0.9468930 2.103610
## PC323 PC324 PC325 PC326 PC327 PC328 PC329
## 1 0.516407 0.357848 -1.338830 2.550150 1.7769500 0.640938 0.7280670
## 2 2.077730 -2.123860 1.529100 0.947236 -1.4292900 -1.574630 -0.2259680
## 3 -0.400698 1.644650 -0.458929 -0.531544 -1.1136200 -1.376580 -0.5154730
## 4 1.133430 1.358530 -0.616837 0.314308 0.0895645 -1.338060 1.1146800
## 5 3.226560 -3.635110 2.314460 -2.032180 -0.6470410 0.445676 0.0271391
## 6 -4.040430 -1.187590 -2.444880 0.737983 -2.7459600 -1.951170 -1.7823600
## PC330 PC331 PC332 PC333 PC334 PC335 PC336
## 1 -1.5942800 0.514835 -0.2979850 0.770455 1.289120 -0.0675534 0.7056970
## 2 -1.0438600 0.757701 -1.5047800 3.134780 0.646038 -0.2487530 -0.3816210
## 3 0.0645953 -0.122472 -0.2345380 -0.565257 -0.396688 -0.3370070 -0.3057790
## 4 -0.6303430 -0.454786 -0.0135467 0.381199 -0.143390 0.1891330 -0.0823058
## 5 -0.0344383 -0.600860 0.1210370 -0.596788 -0.330062 0.1136310 0.5483740
## 6 1.0374000 0.138874 0.1367710 0.643809 -0.335264 -0.4544160 -0.2970510
## PC337 PC338 Individual Pop_City Location Latitude
## 1 -0.1890310 1.22504e-06 1210 Alushta Alushta, Ukraine 44.68289
## 2 0.2466140 1.22504e-06 1209 Alushta Alushta, Ukraine 44.68289
## 3 0.1358210 1.22504e-06 1204 Alushta Alushta, Ukraine 44.68289
## 4 0.2101320 1.22504e-06 1211 Alushta Alushta, Ukraine 44.68289
## 5 0.0485408 1.22504e-06 1212 Alushta Alushta, Ukraine 44.68289
## 6 0.2411700 1.22504e-06 1213 Alushta Alushta, Ukraine 44.68289
## Longitude Continent Year Region Subregion order order2 orderold
## 1 34.40368 Europe 2021 Eastern Europe East Europe 43 35 35
## 2 34.40368 Europe 2021 Eastern Europe East Europe 43 35 35
## 3 34.40368 Europe 2021 Eastern Europe East Europe 43 35 35
## 4 34.40368 Europe 2021 Eastern Europe East Europe 43 35 35
## 5 34.40368 Europe 2021 Eastern Europe East Europe 43 35 35
## 6 34.40368 Europe 2021 Eastern Europe East Europe 43 35 35
Create a new bed file (since we didn’t make one specifically for these pops before)
echo "BER
PAL
GRV
REC
SIC
HAI
YUN
HUN
OKI
KAN
UTS
KAG
TAI
GEL
BEN
SUF
INW
KLP
KUN
KAT
JAF
CAM
SUU
INJ
MAT
SSK
KAC
SON
CHA
LAM
HAN
HOC
QNC
" > euro_global/output/neuroadmixture/native_sicily_and_Americas.txt
Training done with euro_native pops for all italian + native +US
cd /gpfs/gibbs/pi/caccone/mkc54/albo
plink \
--keep-allele-order \
--keep-fam euro_global/output/neuroadmixture/native_sicily_and_Americas.txt \
--bfile euro_global/output/file7b \
--make-bed \
--export vcf \
--out euro_global/output/neuroadmixture/euro_native_sicily_Americas \
--extract euro_global/output/neuroadmixture/train/train_euro_nativeb.snplist \
--silent
grep "samples\|variants" euro_global/output/neuroadmixture/euro_native_sicily_Americas.log
100367 variants loaded from .bim file. –extract: 22537 variants remaining. Total genotyping rate in remaining samples is 0.967089. 22537 variants and 287 people pass filters and QC.
genotype <- here(
"euro_global/output/neuroadmixture/euro_native_sicily_Americas.vcf"
)
d <- read.vcfR(
genotype
)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22537
## column count: 296
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22537
## Character matrix gt cols: 296
## skip: 0
## nrows: 22537
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## BEN BER CAM CHA GEL GRV HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN LAM
## 12 12 12 12 2 12 12 4 7 12 11 4 2 6 12 11 6 4 4 9
## MAT OKI PAL QNC REC SIC SON SSK SUF SUU TAI UTS YUN
## 12 12 11 11 11 9 3 12 6 6 7 12 9
Convert format
##
## - number of detected individuals: 287
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.geno"
##
## - number of detected individuals: 287
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.removed file, for more informations.
##
##
## - number of detected individuals: 287
## - number of detected loci: 22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.lfmm"
PCA for MAF 1% r2<0.01 snp set of euro_native2_albania_croatia_greece_US
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
##
## -n (number of individuals) 287
## -L (number of loci) 22537
## -K (number of principal components) 287
## -x (genotype file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.lfmm
## -a (eigenvalue file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.pca/euro_native_sicily_Americas.eigenvalues
## -e (eigenvector file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.pca/euro_native_sicily_Americas.eigenvectors
## -d (standard deviation file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.pca/euro_native_sicily_Americas.sdev
## -p (projection file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.pca/euro_native_sicily_Americas.projections
## -c data centered
## * pca class *
##
## project directory: /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/
## pca result directory: euro_native_sicily_Americas.pca/
## input file: euro_native_sicily_Americas.lfmm
## eigenvalue file: euro_native_sicily_Americas.eigenvalues
## eigenvector file: euro_native_sicily_Americas.eigenvectors
## standard deviation file: euro_native_sicily_Americas.sdev
## projection file: euro_native_sicily_Americas.projections
## pcaProject file: euro_native_sicily_Americas.pcaProject
## number of individuals: 287
## number of loci: 22537
## number of principal components: 287
## centered: TRUE
## scaled: FALSE
Test
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
##
## -n (number of eigenvalues) 287
## -i (input file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.pca/euro_native_sicily_Americas.eigenvalues
## -o (output file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native_sicily_Americas.pca/euro_native_sicily_Americas.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)
Get values
# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
sampling_loc <- read.csv(
here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_Brazil_euro.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_Brazil_euro.rds"))
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_Brazil_euro.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.92910 Americas BER 2018
## 2 Palm Beach USA 26.70560 -80.03640 Americas PAL 2018
## 3 Recife, PE Brazil -8.05882 -34.87810 Americas REC 2017
## 4 Gravatai Brazil -29.93760 -50.99070 Americas GRV 2018
## 5 Sicilia Italy 38.23294 15.55088 Europe SIC 2016
## 6 Kanazawa Japan 36.56100 136.65620 Asia KAN 2008
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 3 NA 77
## 3 South America 7 NA 81
## 4 South America 8 NA 82
## 5 Southern Europe West Europe 27 19 19
## 6 East Asia 51 43 43
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 33 Levels: BEN BER CAM CHA GEL GRV HAI HAN HOC HUN INJ INW JAF KAC KAG ... YUN
## [1] 33
Check the regions
## [1] "North America" "South America" "Southern Europe" "East Asia"
## [5] "South Asia" "Southeast Asia"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 BEN -27.9868 -9.0873 -5.55891 0.3080150 -4.01840 1.950530 3.70341
## 2 BEN -28.9441 -10.9597 -5.01098 1.1825100 -4.97300 1.453670 5.16611
## 3 BEN -29.4994 -10.2542 -5.25078 0.9689040 -4.89600 0.549813 5.03626
## 4 BEN -28.9520 -11.0038 -5.93855 0.5300450 -3.26018 0.449869 4.76751
## 5 BEN -27.9604 -10.0257 -6.17960 3.0958500 -5.52483 0.450500 3.86448
## 6 BEN -28.1620 -11.0876 -6.79601 0.0490528 -3.87185 2.094940 7.01968
## PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15
## 1 5.91813 4.78740 0.745305 8.20893 -1.780890 1.193630 0.231945 0.0533398
## 2 6.55756 4.62976 -0.216794 9.46454 -4.079890 1.136330 -2.297950 -0.3697570
## 3 6.03396 4.77860 0.518998 10.77310 -1.772330 2.734290 -4.288900 -0.0944688
## 4 5.96438 5.84500 -2.590170 10.34560 -3.115270 0.439902 -3.548310 0.0491362
## 5 5.87321 4.78631 0.691438 10.57550 -0.243535 2.036400 -1.867820 -0.3295010
## 6 5.44803 5.07241 1.652590 10.82090 -3.716390 0.736772 -1.851240 1.8900500
## PC16 PC17 PC18 PC19 PC20 PC21 PC22
## 1 -1.266420 -2.0139300 -0.433046 -1.49173 0.665996 -2.234730 12.42980
## 2 -1.574040 0.0860914 0.888112 -1.02298 -0.102479 -0.672590 15.03790
## 3 -0.454693 -1.8951400 0.232981 -2.60175 0.250769 -1.729150 8.66984
## 4 -2.215180 -0.2608700 0.119347 -1.50181 1.808150 0.394653 8.53026
## 5 0.359386 0.5562120 0.725415 -3.65824 -2.890880 -1.201060 11.23110
## 6 -1.534290 -2.1531400 3.053970 -2.79129 2.101390 -0.727313 19.58400
## PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30
## 1 0.761672 -0.430870 0.8843980 -3.25450 0.221034 1.355870 5.25490 12.78410
## 2 -0.123448 -1.577800 2.7291000 -7.15095 1.562620 0.900525 7.84708 15.08510
## 3 2.441130 -1.166160 -1.3492000 -4.77287 0.799582 -3.100520 7.30012 6.15201
## 4 -2.386990 0.335299 -0.5935370 -2.94495 -0.888812 1.604380 6.77380 6.80868
## 5 2.278470 -3.814050 -0.0269465 -3.42005 0.858828 -1.198720 7.75474 7.40810
## 6 4.278960 -6.874380 -0.0964525 -6.43008 0.698959 -1.633610 26.79140 17.88970
## PC31 PC32 PC33 PC34 PC35 PC36 PC37 PC38
## 1 -0.973695 -7.17816 -1.2580800 5.45639 6.12411 1.844960 -0.72627 -3.9845500
## 2 -5.683380 -10.14920 -2.4827900 9.61246 2.69258 -1.401460 1.48955 -3.6300900
## 3 -1.281290 -3.98277 -1.7402000 7.08759 4.44985 2.863930 3.62264 -0.2127700
## 4 0.702757 -6.26064 -2.3531000 5.10378 2.14621 0.293538 -1.99579 -3.4623400
## 5 -1.515960 -1.97493 0.0676711 4.58086 6.38920 -2.295360 1.49959 -1.8260700
## 6 -3.443480 -15.79480 2.6084800 20.95110 9.83649 7.164940 19.11160 0.0241028
## PC39 PC40 PC41 PC42 PC43 PC44 PC45
## 1 2.084780 -2.580950 -1.46229 -3.00743 0.8819480 0.821013 1.214560
## 2 3.015540 2.452000 -3.83215 1.06354 -2.4413300 0.370019 -2.565900
## 3 0.593476 0.106879 -4.82031 -1.30717 -3.8806500 -0.142078 -5.594520
## 4 2.503040 -1.946390 -1.89825 -4.44304 0.0180848 -1.933410 0.213887
## 5 0.192741 -0.438637 -2.21437 -3.17785 2.4504800 -0.391757 1.153320
## 6 -14.030000 -6.934650 -3.42846 -5.15883 -13.4982000 -2.794790 12.950700
## PC46 PC47 PC48 PC49 PC50 PC51 PC52
## 1 1.731850 0.338732 -1.72754 -6.91383 1.611800 3.019810 -4.60060
## 2 0.942138 -4.192440 4.72635 -35.05700 -0.159692 3.503820 -15.10700
## 3 2.520570 0.169481 -5.61066 -7.63847 6.700730 0.712633 1.85182
## 4 2.225770 1.290450 2.18913 -12.07980 -0.383975 8.330670 -2.95165
## 5 0.717575 -2.566600 4.41802 -11.65780 -0.559683 1.512060 -2.33130
## 6 -4.136980 2.018580 -7.22710 43.38740 -9.305990 -9.569190 13.33710
## PC53 PC54 PC55 PC56 PC57 PC58 PC59
## 1 1.301540 1.267390 7.40905 -3.21041 0.336023 3.80185 5.058700
## 2 4.597570 0.246998 -3.07987 -12.70330 -4.772730 -10.49040 -2.739270
## 3 6.071460 -1.507940 -2.09003 5.76525 0.054409 -5.56212 1.479390
## 4 -4.515890 -3.524390 -4.72082 6.47438 0.412994 9.66145 4.740800
## 5 -0.326511 0.388013 -5.55859 -2.38799 -1.279610 4.98030 0.259202
## 6 -5.910610 -0.353015 4.46964 1.62015 -2.233240 -4.84299 -4.363080
## PC60 PC61 PC62 PC63 PC64 PC65 PC66
## 1 -1.838270 -0.110149 -8.01533 -1.04805 3.9015200 -6.403050 0.574366
## 2 12.417500 12.151800 13.89750 -4.64977 -0.7759550 -7.350880 -14.506300
## 3 1.571440 -4.368410 -9.85807 -1.26122 -3.2807800 4.514280 5.074280
## 4 0.846691 -2.129420 -4.80275 3.30404 -0.0934461 0.131633 3.005300
## 5 -0.923966 -1.596270 -4.75110 7.73719 0.2663180 -2.220510 3.099970
## 6 -2.201000 2.500710 8.43954 6.06728 -2.3796800 -0.612183 -4.167650
## PC67 PC68 PC69 PC70 PC71 PC72 PC73 PC74
## 1 3.11166 7.06384 2.896360 -2.143830 7.90763 -0.919581 -9.54844 7.295890
## 2 -2.17393 -24.85840 -17.043700 -3.470070 1.39043 -9.937550 1.72217 -0.749985
## 3 -3.86727 10.38510 -0.182639 -7.325990 1.83799 5.435100 1.17335 -4.567540
## 4 6.03290 8.82299 3.478440 -4.369100 -3.11641 8.516430 1.48559 2.973250
## 5 -5.36433 6.41736 5.223680 4.939590 -1.12897 5.240490 -7.24477 -4.521190
## 6 5.41691 -7.45211 -2.709640 0.885797 -3.76283 -5.676240 2.79463 -0.300528
## PC75 PC76 PC77 PC78 PC79 PC80 PC81
## 1 -12.518900 -12.002800 0.89719 -1.947470 5.60450 6.537620 7.360060
## 2 -1.637810 1.012620 -2.34214 -5.134770 1.13798 -0.665655 1.642230
## 3 1.613300 0.113959 1.61805 7.079900 3.23799 -0.641635 -2.350940
## 4 -3.075040 3.164620 -13.13410 1.603080 -8.72153 -3.105100 3.082540
## 5 -0.130613 -5.797960 -5.74377 -3.906230 -1.50849 1.554610 -4.931390
## 6 -2.424430 -1.399860 3.58868 -0.778464 -1.80604 -3.448510 0.154695
## PC82 PC83 PC84 PC85 PC86 PC87 PC88 PC89
## 1 -5.367360 -4.00604 -7.329600 -4.19953 4.223190 8.26659 7.13485 -19.587100
## 2 -0.198981 -8.54468 4.860840 3.48384 -6.834450 2.00563 -1.04595 4.851030
## 3 0.421844 3.00855 -3.223160 -6.02851 7.984840 -6.36501 5.32659 4.553460
## 4 4.074450 10.47270 -0.273944 -5.55145 -3.133760 -4.46994 -6.40368 -7.740630
## 5 -1.618570 -2.29159 -8.813950 -11.84120 -2.246410 -3.93520 -6.05628 -8.451110
## 6 -2.894390 1.89023 -1.470960 2.86234 0.905114 -3.67370 -1.82196 -0.917497
## PC90 PC91 PC92 PC93 PC94 PC95 PC96
## 1 -4.94073 -5.423260 1.54224 8.422590 -4.24749 4.108610 -5.626930
## 2 2.53255 4.885750 -1.47199 3.210320 3.30140 1.734530 4.495000
## 3 1.56661 0.678582 4.30504 -2.308270 -8.55437 0.345432 1.777310
## 4 -4.80391 -8.116790 -11.83820 -4.069290 11.74910 -12.530700 -0.670313
## 5 -10.88650 -4.118770 4.33815 -0.544696 -7.68970 -3.157390 -8.540630
## 6 -4.42027 -0.323588 2.88592 -2.998070 2.80336 2.547970 1.810400
## PC97 PC98 PC99 PC100 PC101 PC102 PC103
## 1 4.930930 -0.725001 -7.0365300 0.2512860 -8.637080 0.775752 -3.31963000
## 2 -0.325766 1.759460 0.6433410 3.9173500 -5.630310 4.000010 0.00404051
## 3 -6.885840 -6.284380 1.6681600 -2.0952600 0.555313 -8.919030 -12.70480000
## 4 -4.775260 6.671960 -4.4419300 1.6787500 -12.835600 5.872480 13.07120000
## 5 -0.407980 3.778690 0.9572150 -2.7288600 7.319920 -2.905770 0.35116800
## 6 2.144340 -0.244524 -0.0807656 0.0877283 2.232350 2.678400 -2.36631000
## PC104 PC105 PC106 PC107 PC108 PC109 PC110
## 1 -3.650890 10.365500 -0.110929 -5.179050 4.98707 2.065450 5.275890
## 2 6.632720 -2.665690 2.629080 0.344004 -7.90520 -2.711130 -1.770720
## 3 -6.874500 3.909680 -11.960800 2.307880 -1.23568 12.459000 -2.327750
## 4 -15.773100 13.539500 -6.509400 12.766100 -5.89163 -6.022990 -0.544698
## 5 -1.133730 -1.669730 6.815360 2.007520 5.99504 -4.309400 18.195300
## 6 0.937436 0.451469 1.582670 -1.641950 -3.37312 0.223767 -0.313039
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## 1 -3.72262 -1.709440 -2.469530 7.388760 -9.91022 -1.4255400 8.345290
## 2 2.36369 2.119570 3.133310 0.555375 4.40154 0.2884900 -4.108450
## 3 2.59460 -5.405970 -18.265400 -0.543583 -7.95654 -8.7136600 -13.896900
## 4 -7.54909 8.035770 -1.318790 -3.488650 -7.59389 10.2802000 -2.675060
## 5 4.03111 5.095640 -1.588530 6.625040 -12.37840 2.0082100 10.941300
## 6 -1.03677 0.748991 0.429426 2.739340 1.38151 -0.0964276 -0.887215
## PC118 PC119 PC120 PC121 PC122 PC123 PC124
## 1 -3.70027 -3.00961 -5.858970 5.553240 -4.930520 -6.60648 -12.5941000
## 2 -3.06922 3.05055 -2.976410 0.413382 4.508610 -3.52480 -0.0575154
## 3 10.10280 6.86568 -1.122790 10.506900 0.766919 5.33264 -4.6897800
## 4 0.59914 4.29264 4.891670 4.334770 -4.625190 -15.18510 -1.9201200
## 5 5.97099 -6.98284 0.621585 9.133850 1.637910 9.59557 2.7198300
## 6 2.68469 1.10565 2.177320 1.387450 1.767020 -0.20282 0.7362430
## PC125 PC126 PC127 PC128 PC129 PC130 PC131
## 1 -0.712984 0.138293 2.179260 6.03463 -1.2302300 8.125010 4.019940
## 2 -8.047660 3.826760 4.767960 -3.71010 0.0897396 -3.211940 2.280000
## 3 3.550700 -13.411400 -2.037490 -1.88012 15.0846000 -0.590829 6.077670
## 4 0.783764 8.090510 -3.312490 4.13663 -5.4562900 9.836730 -2.596380
## 5 -0.642211 8.697430 2.798690 11.04680 -6.7470800 -8.821130 2.921880
## 6 -0.482564 -0.618477 0.197942 -2.13166 0.7780380 0.876103 -0.556115
## PC132 PC133 PC134 PC135 PC136 PC137 PC138
## 1 3.207600 6.039150 1.770420 -2.976580 5.722390 9.32636 1.20594
## 2 3.606010 -2.140560 2.982800 2.894330 -0.468215 1.56876 -1.75236
## 3 -5.079250 -10.075700 -0.874715 13.116200 4.294810 -10.52870 8.22391
## 4 -2.785890 13.811600 1.911940 5.512510 -7.301110 -3.36717 -17.63690
## 5 -3.748820 -13.804300 0.530970 -6.411690 -1.320520 -10.35680 1.31892
## 6 -0.657234 -0.500192 3.447490 0.214201 0.189449 1.46786 -1.16552
## PC139 PC140 PC141 PC142 PC143 PC144 PC145
## 1 -3.936410 16.697200 3.28420 13.08420 -3.691860 -11.328300 -1.21216
## 2 -2.301130 -0.411955 3.36125 -2.25490 -0.200263 1.258310 -2.04743
## 3 -6.910680 13.690800 -1.99014 -8.37847 -0.584530 -9.490540 -6.54467
## 4 3.855460 5.191490 -10.45740 -7.67927 5.319640 -0.540938 5.05316
## 5 22.110100 1.811170 -10.67750 -5.94812 9.515320 -1.534930 -2.17691
## 6 -0.817732 -1.910000 -1.11667 -1.32883 1.723810 1.022040 2.28221
## PC146 PC147 PC148 PC149 PC150 PC151 PC152
## 1 4.03896 3.4457400 -3.070150 11.881300 -19.45000 5.623070 0.579008
## 2 -1.10344 3.7740800 1.051590 -0.883642 2.79289 1.600340 0.925971
## 3 -5.08406 3.5452200 6.780100 1.708210 -4.63673 12.203800 -0.483891
## 4 4.32733 -0.0750739 -1.516440 2.209780 5.57357 -11.001000 -2.278020
## 5 -11.70380 6.2777200 -5.020820 4.939670 -8.60653 19.327900 -5.683860
## 6 4.13200 2.2194700 -0.423547 0.484201 -1.43745 -0.935855 1.025520
## PC153 PC154 PC155 PC156 PC157 PC158 PC159
## 1 -12.878000 1.033240 6.583560 -5.511640 2.589040 9.432300 -9.48998
## 2 -0.868274 0.937980 3.491970 1.037770 -3.169020 -0.586865 1.69525
## 3 10.101600 11.540300 -3.730950 6.569580 -8.149760 1.730820 -1.20677
## 4 15.152800 -0.495281 -21.212200 0.203603 1.680550 -2.091630 -2.92136
## 5 -9.435780 -15.492700 8.164590 -3.640940 -14.507700 -6.723570 3.23550
## 6 1.442670 1.980180 -0.817082 -3.189410 -0.441945 -4.215420 1.65176
## PC160 PC161 PC162 PC163 PC164 PC165 PC166
## 1 1.48242 9.15575 -2.397780 -3.449520 0.964108 -2.44514 -4.120880
## 2 3.60080 -1.69537 -0.585215 5.308250 2.814770 -1.34442 -0.173893
## 3 7.78513 12.72490 -2.229330 18.424300 0.644631 15.09080 14.597600
## 4 6.66010 1.06526 0.763251 -6.127380 -6.183860 2.32320 -2.998020
## 5 19.65440 -8.62961 2.344090 -15.537600 -2.883270 11.85370 13.854700
## 6 1.03209 -0.41453 -0.983646 0.196306 3.261320 -2.42458 0.314184
## PC167 PC168 PC169 PC170 PC171 PC172 PC173
## 1 -10.292100 -20.124500 -11.30100 6.344640 -9.368680 -9.728120 5.140010
## 2 -0.900305 3.355870 -3.16255 2.574450 -1.985830 0.575674 -0.566387
## 3 0.058153 9.812130 -7.17513 -17.631000 7.983690 -22.290300 6.855110
## 4 8.613030 0.182716 -1.71216 -13.120900 0.202328 -4.317990 -5.667270
## 5 9.073690 -16.238200 14.37750 11.516000 -5.280630 3.581040 9.958980
## 6 0.435482 -0.579278 1.11392 0.925707 -1.761120 1.946030 -0.497627
## PC174 PC175 PC176 PC177 PC178 PC179 PC180
## 1 14.92830 3.119610 -1.34525 -2.755000 1.56895 -17.358100 11.031600
## 2 -1.62261 0.694167 4.16878 1.264910 -5.24309 -2.706600 -7.004110
## 3 -11.57420 -2.765520 -6.85045 6.161230 -1.45467 4.007950 6.536340
## 4 -3.85618 2.681660 3.88404 9.843810 10.11240 0.767019 -9.671400
## 5 -24.02330 -5.464220 -4.16733 2.513230 -3.23209 12.529500 -4.566630
## 6 1.67146 -0.215576 1.81306 0.744773 1.85090 -2.732590 -0.570049
## PC181 PC182 PC183 PC184 PC185 PC186 PC187
## 1 1.426210 -2.489690 7.05421 26.189400 15.125300 -2.040030 8.853190
## 2 1.395950 -0.247367 -1.89733 -2.824270 3.304820 3.515820 2.020320
## 3 9.027280 -2.364400 -1.18158 -0.588440 0.900796 1.009520 -6.877080
## 4 -14.645300 -28.803600 12.04750 2.608000 12.728400 7.282120 9.027220
## 5 -4.395440 6.129290 -9.43538 -2.951500 -4.524190 0.728390 -1.945460
## 6 0.171567 -0.936474 -1.38738 -0.705347 -1.442620 -0.601111 -0.754935
## PC188 PC189 PC190 PC191 PC192 PC193 PC194
## 1 3.12938 16.26000 15.275500 1.782970 3.106490 -1.823070 0.0535326
## 2 -3.35918 -5.69236 -1.393210 -3.975370 2.615380 -0.337273 -1.6417100
## 3 4.64530 -15.78190 9.423800 6.103940 -4.275900 -8.049880 -9.3616900
## 4 -5.91934 -12.61830 -5.278100 9.547440 1.186180 -4.708890 -10.4498000
## 5 -2.90288 2.97822 -6.992460 -0.699433 -6.657050 3.233220 3.5032700
## 6 2.90335 -3.10696 -0.884288 -3.230100 -0.619537 1.818470 -2.2591600
## PC195 PC196 PC197 PC198 PC199 PC200 PC201
## 1 -3.263470 10.1692000 2.403610 -5.660060 -2.732850 7.8384500 -0.116071
## 2 3.887430 1.8286500 -0.522081 -1.813420 -1.187670 1.5394200 -0.753382
## 3 -0.441351 0.6416220 -20.740500 -3.670630 5.011280 0.0746461 1.994490
## 4 -2.037770 -0.4025780 12.728900 0.179160 3.776370 -5.4642600 -4.106600
## 5 -1.025930 3.6759300 7.863650 4.437090 2.708850 -3.2536500 -8.460940
## 6 -0.880249 0.0849061 1.934600 0.685482 0.757912 -2.6629600 1.759240
## PC202 PC203 PC204 PC205 PC206 PC207 PC208
## 1 3.6195000 -3.707560 0.0158396 5.839080 2.229270 -11.10690 3.26447
## 2 -0.7886260 0.291429 1.5528800 3.604630 -7.745430 5.85148 1.09819
## 3 6.0006600 -0.272915 -1.8461100 4.245150 7.416220 2.76945 -1.41021
## 4 -4.8589000 2.895360 4.4053400 -4.043130 -4.733780 4.70966 3.95704
## 5 -1.7367400 2.460220 2.9588000 -6.624050 1.281920 1.03362 1.74956
## 6 0.0605942 -2.286350 0.6293900 -0.634392 0.914047 3.41830 1.46717
## PC209 PC210 PC211 PC212 PC213 PC214 PC215
## 1 1.18134 -2.69620 -1.6968100 5.1375900 4.85852 -0.597896 -2.6557600
## 2 2.63148 0.99805 -5.2046200 0.3573960 4.80028 3.544460 4.8880300
## 3 -1.67602 3.08015 -0.0645954 0.9254070 -7.90074 2.591360 -4.0078500
## 4 -4.74794 3.56340 0.6744180 2.0860900 6.77795 -1.808880 4.4124700
## 5 3.43979 -7.08848 1.0160100 -4.9488000 -2.88778 2.243840 -0.0372929
## 6 -2.69538 -2.44311 -1.7924500 0.0184598 1.20241 -0.671993 -1.5522000
## PC216 PC217 PC218 PC219 PC220 PC221 PC222
## 1 4.49055 0.637331 -2.365800 1.949140 -5.035390 -1.548830 -0.838372
## 2 -3.86744 -3.500280 0.808693 0.195691 -8.131690 4.254230 8.771050
## 3 -2.61305 1.392310 0.314417 -2.420570 -2.270040 1.684090 -0.530963
## 4 1.00933 2.606290 -0.528731 -1.362910 1.269370 -0.508426 0.563710
## 5 -3.48988 -2.834740 1.543290 -1.324410 -0.419488 -1.843760 -0.369917
## 6 -2.04589 -0.786415 2.031060 -0.669019 -0.880694 -0.589158 -1.018700
## PC223 PC224 PC225 PC226 PC227 PC228 PC229
## 1 0.954695 2.45351 -0.737650 0.415769 0.785759 -0.651546 1.36670000
## 2 18.281000 12.17320 16.524000 -26.419500 8.788070 5.993790 20.42990000
## 3 0.409464 1.66778 0.934650 -2.862440 1.102900 0.689047 -0.00841803
## 4 -0.886546 1.23434 -4.179950 0.117267 -0.187612 -1.101560 -1.69233000
## 5 1.443750 1.37715 0.159855 0.977391 -0.696971 -0.191525 2.62075000
## 6 -2.979680 -1.21344 1.280080 1.205850 -0.393115 -1.905210 -0.98067600
## PC230 PC231 PC232 PC233 PC234 PC235 PC236
## 1 1.334460 -2.3155100 -1.345160 2.042100 -2.138660 -2.404690 0.4979810
## 2 -3.639970 -13.8845000 -18.269700 -1.052180 -5.245630 -2.464180 -7.2019100
## 3 -3.931810 -1.3872000 3.465830 1.637310 2.006790 -1.720370 -1.4247600
## 4 -0.240021 1.1555300 -1.110210 -0.914095 0.403332 -0.452953 0.0840436
## 5 -0.862870 0.6809950 -0.584831 -1.733090 2.094720 2.020550 0.2379360
## 6 -1.438750 0.0388905 -0.319700 -0.722855 -0.594278 1.125490 -0.2412180
## PC237 PC238 PC239 PC240 PC241 PC242 PC243
## 1 1.132170 0.828952 -1.0402100 -0.534313 0.166768 -0.1861520 -0.853036
## 2 -0.560467 -0.238514 -0.5820710 -5.140480 1.185240 -3.8792300 -2.995210
## 3 -0.648752 -0.296555 -0.3478960 -1.963030 1.805890 -2.2796300 -1.737720
## 4 -0.382944 -0.430719 -1.1353500 -0.777930 0.475309 -2.5282900 -0.684651
## 5 0.651394 0.744220 0.0405264 -0.109277 2.219380 -0.0243305 -1.720880
## 6 -0.623646 3.208910 0.9541860 -2.308850 0.952111 -0.9234700 -7.028690
## PC244 PC245 PC246 PC247 PC248 PC249 PC250
## 1 -0.549459 -1.676700 1.442640 0.388364 -1.585570 0.0158327 -2.068450000
## 2 -3.520200 3.064050 -4.494790 -2.666930 -0.398643 1.3191100 -1.880360000
## 3 0.908814 -0.526994 0.646026 1.772390 0.192232 -0.2334130 -1.445990000
## 4 0.905766 -1.366530 -1.241710 -0.480329 0.339002 -0.0926394 -0.000228799
## 5 -3.184300 -0.604245 0.766963 1.316780 -0.886905 1.7635700 0.218820000
## 6 0.404846 -1.935070 -7.452530 0.947396 -3.893770 10.5260000 3.845080000
## PC251 PC252 PC253 PC254 PC255 PC256 PC257
## 1 -0.0909045 -1.286410 -1.476590 0.445361 -0.454673 -1.279170000 0.616078
## 2 -0.1960620 -2.804130 0.797743 -2.514610 -2.363580 -0.095094100 -2.614910
## 3 0.0338227 0.516131 -0.875077 0.272255 0.168574 -0.119842000 0.553969
## 4 -1.1658700 -0.141914 0.708533 -0.702764 -0.613669 0.000595737 0.639629
## 5 -0.1425980 1.286430 -1.024210 1.305400 -0.363115 0.340160000 0.209637
## 6 0.6705600 -1.752760 -0.262442 -1.252340 -8.706210 -2.363270000 -20.676100
## PC258 PC259 PC260 PC261 PC262 PC263 PC264
## 1 -0.47031100 0.2438470 -0.5043690 0.226510 -0.4418150 -0.8708950 0.799262
## 2 -1.90755000 -2.1152500 -1.0091900 0.545371 -1.4976800 -0.0525219 0.423708
## 3 0.00727958 0.7054270 -0.9570060 0.957102 0.7390040 0.1333230 -1.466320
## 4 -0.34952200 -1.0184900 -0.0294914 -0.571801 -0.0639744 0.5144190 0.160258
## 5 -0.66157300 0.0231993 0.5241020 0.778208 -1.0389300 -0.4784070 -0.991843
## 6 38.03930000 1.1037000 -5.4512800 0.209399 -3.9660400 -3.2770000 5.910630
## PC265 PC266 PC267 PC268 PC269 PC270 PC271
## 1 0.3961940 -0.365417 -0.1131220 0.0399503 0.318304 0.42435600 0.427372
## 2 -0.0778169 -0.693591 0.7942950 -0.5593870 -0.252321 -0.72199200 -0.143000
## 3 -0.9421380 -0.734953 0.0518940 -0.3157790 0.119161 -0.08887840 0.474355
## 4 0.1291820 -0.220107 -0.0933386 -1.0670800 0.603698 -0.00707304 0.595090
## 5 0.9881800 0.161395 0.3287930 0.5240300 0.365007 -0.28078300 0.133682
## 6 4.4098800 -1.617270 2.4468000 1.9637500 0.818310 0.98260300 0.898526
## PC272 PC273 PC274 PC275 PC276 PC277 PC278
## 1 0.8898670 -0.5320180 -0.202373 0.1221180 -0.0494071 0.1407310 0.2934410
## 2 0.1501440 0.1468680 -0.118874 0.3131190 0.4881750 0.7204670 0.0129602
## 3 0.4355940 -0.1591700 -0.349746 -0.2655820 -0.4097860 0.2029200 0.2233030
## 4 -0.3921040 0.0654559 0.327395 -0.0158294 0.5578660 0.3354980 -0.2985050
## 5 -0.0447267 -0.8947430 0.194515 0.2834500 0.5954340 -0.0214011 -0.2369820
## 6 0.9333370 0.5134990 -1.294610 -0.5516730 -1.3518900 1.9478600 1.0588200
## PC279 PC280 PC281 PC282 PC283 PC284 PC285
## 1 0.378642 -0.2942070 -0.0961943 -0.4393570 -0.00963723 -0.1411430 -0.3151240
## 2 0.262467 0.4056820 -0.7283000 -0.5717360 -0.41356500 0.0274373 -0.1701970
## 3 0.338803 0.0757141 -0.1722910 0.4152430 0.15766400 0.6326340 0.0471803
## 4 -0.687666 -0.6938590 -0.1059360 0.0656018 0.69531300 0.0811386 0.2997780
## 5 0.288014 -0.2440200 -0.1515450 0.6560290 -0.03491080 0.3181510 -0.1604810
## 6 -0.865197 0.0494245 -0.3556250 -0.5837550 -0.13014400 -0.3389760 -0.2589450
## PC286 PC287 Individual Pop_City Country Latitude Longitude
## 1 -0.00890661 1.37433e-06 266 Bengaluru India 12.9716 77.5946
## 2 -0.07037630 1.37433e-06 255 Bengaluru India 12.9716 77.5946
## 3 -0.00793918 1.37433e-06 256 Bengaluru India 12.9716 77.5946
## 4 -0.22101200 1.37433e-06 265 Bengaluru India 12.9716 77.5946
## 5 0.07046310 1.37433e-06 261 Bengaluru India 12.9716 77.5946
## 6 0.01113450 1.37433e-06 262 Bengaluru India 12.9716 77.5946
## Continent Year Region Subregion order order2 orderold
## 1 Asia Unknown South Asia 60 52 52
## 2 Asia Unknown South Asia 60 52 52
## 3 Asia Unknown South Asia 60 52 52
## 4 Asia Unknown South Asia 60 52 52
## 5 Asia Unknown South Asia 60 52 52
## 6 Asia Unknown South Asia 60 52 52
genotype <- here(
"euro_global/output/neuroadmixture/native_iberia_Americas2.vcf"
)
d <- read.vcfR(
genotype
)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22537
## column count: 340
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22537
## Character matrix gt cols: 340
## skip: 0
## nrows: 22537
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## BAR BEN BER CAM CHA GEL GRV HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN
## 12 12 12 12 12 2 12 12 4 7 12 11 4 2 6 12 11 6 4 4
## LAM MAT OKI PAL POL POP QNC REC SON SPB SPC SPM SPS SSK SUF SUU TAI UTS YUN
## 9 12 12 11 2 12 11 11 3 8 6 5 8 12 6 6 7 12 9
Convert format
##
## - number of detected individuals: 331
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.geno"
##
## - number of detected individuals: 331
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.removed file, for more informations.
##
##
## - number of detected individuals: 331
## - number of detected loci: 22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.lfmm"
PCA for MAF 1% r2<0.01 snp set of euro_native2_albania_croatia_greece_US
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
##
## -n (number of individuals) 331
## -L (number of loci) 22537
## -K (number of principal components) 331
## -x (genotype file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.lfmm
## -a (eigenvalue file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.pca/native_iberia_Americas2.eigenvalues
## -e (eigenvector file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.pca/native_iberia_Americas2.eigenvectors
## -d (standard deviation file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.pca/native_iberia_Americas2.sdev
## -p (projection file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.pca/native_iberia_Americas2.projections
## -c data centered
## * pca class *
##
## project directory: /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/
## pca result directory: native_iberia_Americas2.pca/
## input file: native_iberia_Americas2.lfmm
## eigenvalue file: native_iberia_Americas2.eigenvalues
## eigenvector file: native_iberia_Americas2.eigenvectors
## standard deviation file: native_iberia_Americas2.sdev
## projection file: native_iberia_Americas2.projections
## pcaProject file: native_iberia_Americas2.pcaProject
## number of individuals: 331
## number of loci: 22537
## number of principal components: 331
## centered: TRUE
## scaled: FALSE
Test
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
##
## -n (number of eigenvalues) 331
## -i (input file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.pca/native_iberia_Americas2.eigenvalues
## -o (output file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_iberia_Americas2.pca/native_iberia_Americas2.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)
Get values
# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
sampling_loc <- read.csv(
here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_US.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_US.rds"))
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_US.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.929100 Americas BER 2018
## 2 Palm Beach USA 26.70560 -80.036400 Americas PAL 2018
## 3 Recife, PE Brazil -8.05882 -34.878100 Americas REC 2017
## 4 Gravatai Brazil -29.93760 -50.990700 Americas GRV 2018
## 5 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 6 Loule Portugal 37.09084 -8.092465 Europe POL 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 3 NA 77
## 3 South America 7 NA 81
## 4 South America 8 NA 82
## 5 Southern Europe West Europe 11 3 3
## 6 Southern Europe West Europe 12 4 4
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 39 Levels: BAR BEN BER CAM CHA GEL GRV HAI HAN HOC HUN INJ INW JAF KAC ... YUN
## [1] 39
Check the regions
## [1] "North America" "South America" "Southern Europe" "East Asia"
## [5] "South Asia" "Southeast Asia"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 BAR 25.5499 2.001850 32.4190 7.67455 -25.4593 -4.67251 13.7777
## 2 BAR 25.4107 0.354687 31.4114 5.91305 -20.6103 -7.52188 21.6435
## 3 BAR 21.9004 -1.111830 31.3627 5.46695 -22.3339 -6.51403 19.6255
## 4 BAR 22.5679 1.277120 33.8145 6.21831 -25.4711 -6.89546 20.8575
## 5 BAR 24.4462 -1.403420 29.1115 6.30322 -19.0357 -5.08700 14.6359
## 6 BAR 25.3203 0.761056 34.6690 7.37388 -25.1849 -6.13724 17.6580
## PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15
## 1 3.40690 -0.733271 -9.67442 -15.9058 18.0216 6.32654 5.98767 3.41399
## 2 5.84533 -2.214230 -12.85460 -19.9574 17.3894 6.11969 7.98925 1.07950
## 3 9.17413 -2.425720 -9.48653 -17.5797 18.6901 9.58027 5.83143 2.08171
## 4 8.02944 -3.644450 -12.77820 -19.6965 21.8319 10.70570 6.00047 5.04656
## 5 2.53529 -1.993170 -7.14319 -17.3004 14.1459 7.36393 5.16066 4.08381
## 6 8.71096 -1.058250 -12.28910 -19.4578 17.8304 7.96268 7.49455 2.73509
## PC16 PC17 PC18 PC19 PC20 PC21 PC22 PC23
## 1 -7.16208 7.71709 4.41331 -2.754290 1.854080 2.129810 0.884887 1.293300
## 2 -6.41903 7.30044 5.32912 3.931020 -1.013790 -0.234323 -8.631420 -2.862340
## 3 -3.89952 11.34300 4.84956 2.312330 -3.679250 -2.301880 -6.299350 -0.498285
## 4 -4.11928 10.77560 8.72978 1.012840 -0.646801 -2.926890 -7.660640 -0.725622
## 5 -3.15545 6.72153 4.27435 0.619417 -0.920164 -3.943620 -5.897880 -3.275340
## 6 -9.46860 11.47150 4.60506 -2.486380 0.665044 3.262860 -2.738350 2.407820
## PC24 PC25 PC26 PC27 PC28 PC29 PC30 PC31
## 1 -2.23555 3.980410 0.479427 -0.130636 0.0865897 -2.361130 0.557199 0.316978
## 2 2.34082 2.598980 0.134286 2.492170 4.3267900 -4.631260 1.770090 -0.939229
## 3 5.09459 0.749207 0.836163 -1.730430 -0.4442760 -2.515980 -0.942403 4.041180
## 4 3.40329 2.569730 4.961780 -4.176320 -0.1042140 -0.131276 -0.999407 -1.027160
## 5 1.23383 1.401020 2.345880 4.706740 3.5340900 -3.955960 -2.782230 2.525790
## 6 4.66316 4.781560 0.827205 -0.737351 -1.4653200 -3.554900 3.813600 -0.341777
## PC32 PC33 PC34 PC35 PC36 PC37 PC38
## 1 -0.4444750 -3.15818 -2.83666 4.82432000 -0.997399 1.21471 -5.746860
## 2 3.5647700 1.54800 -3.88493 9.35334000 -1.017370 -6.12165 6.997830
## 3 -2.4151800 -1.41287 4.46832 -7.77748000 -1.808960 8.18154 -0.910979
## 4 0.0760186 3.24226 2.29609 -2.64337000 4.722200 -1.96688 -1.398860
## 5 -1.2545300 2.59171 -4.77755 0.00356112 5.917210 -4.51056 -3.554590
## 6 -1.9315000 2.22768 -1.22077 -2.63133000 0.270552 6.88953 -4.599620
## PC39 PC40 PC41 PC42 PC43 PC44 PC45
## 1 -0.0614424 -10.545000 7.883870 6.857250 -3.2807000 2.47440 16.497700
## 2 -3.3119900 0.578622 2.470320 -6.460780 -4.1464900 2.14022 -7.894510
## 3 4.8478600 1.088610 -3.357980 -0.298669 0.0652313 -1.38465 -0.620771
## 4 -0.8465660 9.940250 -8.551070 -4.867380 5.7667200 -7.29192 -7.406230
## 5 -3.6516100 -4.192260 10.120500 -1.745520 -0.0524046 4.05158 12.631500
## 6 0.9997290 -2.663760 0.210486 3.010410 -3.9270600 3.11524 3.306900
## PC46 PC47 PC48 PC49 PC50 PC51 PC52 PC53
## 1 8.22976 6.59451 8.11986 5.01540 -4.40843 -0.740971 -15.62990 9.878280
## 2 3.88213 -6.63139 -3.31186 -9.77056 8.51794 -2.449940 7.63387 4.434340
## 3 -8.34699 -11.30220 -7.68358 12.71820 -20.46040 -2.483860 -21.09400 -21.488000
## 4 -7.00947 6.08540 -7.22786 -7.66642 11.90510 2.748040 16.59100 -10.143900
## 5 5.70632 20.08740 3.50690 -4.87685 18.25830 5.788220 13.66970 16.963500
## 6 5.80395 -7.31314 2.43073 11.88560 -19.00440 0.178450 -13.99530 0.764454
## PC54 PC55 PC56 PC57 PC58 PC59 PC60
## 1 -5.57121 -15.027800 -13.90860 -4.094810 4.21689 -10.382200 -4.29901
## 2 3.36386 -4.893800 2.43052 4.229120 -5.20076 1.254800 -8.31485
## 3 3.95256 6.789440 1.51764 -5.220900 8.97183 0.382508 7.51565
## 4 5.02958 18.559000 15.71960 3.405140 -6.72767 7.536590 9.78866
## 5 -15.53990 0.428366 -12.44660 -2.525550 -6.14423 -4.232770 6.26329
## 6 2.29198 -10.959400 -3.82909 0.495758 5.25124 3.543490 -6.27881
## PC61 PC62 PC63 PC64 PC65 PC66 PC67
## 1 -1.09797000 -4.804900 -0.156622 0.683491 -2.260210 4.234910 -3.649060
## 2 0.00490617 10.205300 -0.668617 -8.417400 -1.931710 3.357220 -5.219050
## 3 1.20415000 -0.623453 0.974949 10.128200 4.281520 -10.625900 9.376110
## 4 3.38923000 -4.474070 -3.546520 -1.106340 -0.252152 -3.496220 1.515490
## 5 -1.17010000 -16.055000 0.699717 -2.160300 2.908200 2.509440 -0.379699
## 6 0.01844000 5.739770 -9.486850 2.533710 -10.111700 -0.978215 -6.956280
## PC68 PC69 PC70 PC71 PC72 PC73 PC74
## 1 -1.574830 -5.243250 -1.403580 -12.63960 1.356470 -8.12799 0.859371
## 2 -0.278632 0.894293 -3.828950 5.62895 -1.293390 1.69540 4.779710
## 3 8.166690 -4.691220 3.184360 -9.13291 -0.734180 4.67734 2.345690
## 4 0.836239 8.973690 -6.029360 5.15928 -2.319940 4.61097 -1.591360
## 5 -2.314620 -3.092170 10.263800 3.76500 -4.196360 -18.39850 -3.893150
## 6 8.467670 -3.522660 0.544784 -13.40720 -0.114845 1.46456 12.050100
## PC75 PC76 PC77 PC78 PC79 PC80 PC81 PC82
## 1 6.572720 -1.970600 -6.48878 2.12883 -8.44218 2.83437 10.09980 -6.658690
## 2 0.289025 -10.742600 9.31079 2.62550 -5.67173 0.65423 -9.82558 6.597310
## 3 -4.700500 0.240791 -9.46227 -1.55213 2.30165 5.40084 1.85246 -0.882662
## 4 -3.918090 4.068350 -4.40486 -2.23271 12.17960 -2.56363 -9.31355 0.260666
## 5 4.600820 9.808770 4.18361 1.28728 -7.80091 -5.49614 10.93040 -3.438530
## 6 -5.969230 -10.239100 -4.05508 2.52897 -6.26424 -6.24197 2.27204 -0.564218
## PC83 PC84 PC85 PC86 PC87 PC88 PC89 PC90
## 1 -2.86887 14.001600 0.957331 -1.87941 4.64860 0.657698 0.399883 2.022860
## 2 -2.41667 0.050327 2.596270 1.67471 -2.48395 -1.396320 -4.257030 -2.291260
## 3 2.37558 -2.717030 0.846448 3.65477 -3.07108 -2.243730 3.539840 -0.099632
## 4 3.51192 -10.564200 -0.569494 -2.68905 -2.09208 5.139930 6.494860 5.454050
## 5 -2.26074 13.791700 3.296180 -5.23505 8.30755 -4.069930 -6.242270 -4.136490
## 6 -2.48147 10.155800 0.622089 4.60028 -1.74139 5.396000 2.946920 -2.739450
## PC91 PC92 PC93 PC94 PC95 PC96 PC97
## 1 -7.621980 0.347251 -7.679630 -4.570580 6.94600 2.55569 -6.281100
## 2 6.124300 -6.053970 5.249970 9.215560 1.07701 -3.67124 -7.650710
## 3 -0.821424 6.222880 -5.285170 -1.756490 -2.14933 -1.85051 0.284107
## 4 0.316467 -1.141910 -3.056630 -1.073980 -10.11590 3.00487 5.355180
## 5 3.806260 0.314570 0.591955 -9.671800 14.58320 2.33464 -1.780890
## 6 -3.593550 -4.794020 -7.208490 -0.857269 1.25426 -2.20382 -2.009490
## PC98 PC99 PC100 PC101 PC102 PC103 PC104
## 1 -10.40770 -2.110000 -9.424470 -1.839250 1.535390 -5.510170 -9.217160
## 2 1.88745 -0.834114 -2.570220 0.206347 4.293290 0.302222 1.592700
## 3 1.58134 -1.053580 6.435840 5.676030 -3.411820 -0.304258 -0.429984
## 4 8.50590 1.367650 0.608194 1.623170 -1.206740 7.056930 3.993230
## 5 -6.29561 -4.134710 -1.820040 -6.111940 0.997718 0.208238 4.110780
## 6 -2.58204 3.409730 0.152236 1.872320 -0.201032 -4.711020 -6.356940
## PC105 PC106 PC107 PC108 PC109 PC110 PC111 PC112
## 1 6.079390 1.64507 5.043130 -1.280310 -5.12226 2.400870 -2.93166 -2.596930
## 2 -6.858940 4.49297 10.819200 1.935290 1.18597 -3.354460 -3.12687 5.176590
## 3 1.402610 5.83262 -8.318620 -0.867838 4.82371 -2.783900 0.98931 -0.873792
## 4 0.540892 -8.13902 2.633320 2.897030 -2.00770 4.063370 5.14743 -4.799910
## 5 -2.495470 -2.72289 -0.883392 -1.856300 -3.07880 -1.573520 -5.40047 3.153300
## 6 2.410270 3.76763 0.650305 5.733720 -1.65211 -0.793212 -1.72445 -6.884940
## PC113 PC114 PC115 PC116 PC117 PC118 PC119
## 1 2.2578800 3.88597 2.867630 0.539158 -0.845930 -5.403780 2.784410
## 2 -4.3606400 -0.92686 -7.133760 -4.580270 -3.141370 -2.555780 -6.057470
## 3 -0.0760097 4.41340 2.063490 4.869730 1.861820 5.288130 -0.360741
## 4 -2.7829100 -2.09383 1.375930 -2.798820 -0.927025 -0.118288 8.700330
## 5 7.6067200 3.72398 2.286540 3.436420 1.227490 0.349492 0.649339
## 6 -0.9041910 -2.10562 0.943164 -0.286257 -4.915870 -1.528040 2.788780
## PC120 PC121 PC122 PC123 PC124 PC125 PC126 PC127
## 1 -4.91608 2.93566 -8.418820 3.80704 -1.52167 -5.461930 2.234070 -2.08119
## 2 -5.82860 2.04339 7.237420 1.77367 5.72487 4.807260 0.455513 -4.55348
## 3 1.93994 -6.71502 5.527480 -1.24756 -6.20322 -2.739320 -1.555350 2.43503
## 4 -4.21144 -2.04098 -5.836880 -6.76454 -2.89093 0.808653 0.486359 3.37248
## 5 6.23956 -3.65780 9.455560 1.56276 2.35529 -6.507030 -3.885540 -1.85520
## 6 -6.83929 3.39949 0.274695 -3.51281 -1.19905 5.340580 -0.898033 -5.43983
## PC128 PC129 PC130 PC131 PC132 PC133 PC134
## 1 -2.68327 -2.549630 9.401090 0.683652 -1.271010 -11.416600 -1.922070
## 2 9.75043 -0.917970 -0.372633 1.562830 -1.881060 4.285940 4.032880
## 3 2.38452 -5.374380 -3.196170 -2.384930 2.686380 4.526360 -2.830820
## 4 -11.87540 4.469200 -7.035340 -3.212530 -3.742870 8.108840 2.652020
## 5 4.62774 0.716657 1.085520 3.138260 0.723081 -5.909650 0.225585
## 6 -2.55966 -3.140090 3.077420 -0.500853 3.364680 0.357091 5.086970
## PC135 PC136 PC137 PC138 PC139 PC140 PC141 PC142
## 1 -4.87730 2.26513 0.262335 4.62478 5.99020 4.339540 0.189673 -3.939540
## 2 8.17511 1.93598 5.514280 1.03692 -1.33778 -6.677280 1.741950 0.921766
## 3 4.24522 -5.15313 0.201977 -6.31506 -1.37553 1.129400 -5.531570 3.147510
## 4 -5.15336 2.21695 -2.338480 -5.61432 -8.98571 -1.644590 4.369480 -1.193880
## 5 -2.41895 3.90826 3.841930 4.90093 14.33500 0.260579 -5.722460 7.555040
## 6 2.18485 -3.69796 0.569709 -1.83564 -6.22962 5.775860 -4.806010 -6.111940
## PC143 PC144 PC145 PC146 PC147 PC148 PC149 PC150
## 1 -0.699479 -2.504360 -2.254420 -3.395470 4.77453 -3.15509 -1.713990 3.44239
## 2 -2.513140 -0.844755 -3.618760 -3.942080 5.65364 -5.38484 -3.114800 -4.41353
## 3 5.840610 -1.219760 -8.340430 5.265660 3.93128 3.53840 5.863480 7.40169
## 4 2.002100 -1.773520 5.916870 -2.958120 -11.15000 -1.58413 -5.806020 -5.48468
## 5 -5.756550 4.389890 4.857280 6.174350 -4.38367 -1.28658 0.225397 -2.82186
## 6 4.348170 0.796501 -0.205014 -0.827709 -4.34729 -5.21046 -1.004390 4.42634
## PC151 PC152 PC153 PC154 PC155 PC156 PC157 PC158
## 1 2.118480 4.28212 -0.317366 -1.457550 1.15876 -1.83845 0.677561 2.787140
## 2 -4.782600 4.04689 -0.657632 3.414950 3.59248 1.42896 0.855985 0.960586
## 3 -0.398366 -1.25384 -2.677050 0.337193 -4.41156 -3.83077 -0.899710 0.143709
## 4 -4.051750 3.49544 9.373580 -3.702670 2.50726 -3.59417 -3.470970 -2.842710
## 5 0.403745 1.09411 -5.127360 -6.036350 -5.27839 7.88031 0.981945 -4.104420
## 6 2.459710 1.82108 3.974530 -0.626101 2.53644 -6.06100 -0.463329 2.182540
## PC159 PC160 PC161 PC162 PC163 PC164 PC165 PC166
## 1 2.61953 2.776970 6.499190 -3.17932 -1.26286 -5.81216 -3.177150 -1.86669
## 2 -2.04183 -3.528650 -5.698820 -4.64500 -4.59376 -5.62974 2.897230 -1.27826
## 3 -4.13686 0.359589 2.721880 2.89826 2.82899 7.24960 0.378805 -6.48326
## 4 2.79463 -1.328860 -0.184743 1.14260 1.50184 4.79097 -4.914690 8.68172
## 5 1.95661 -2.774440 1.031620 -0.31542 5.01982 -1.95284 -2.344740 8.01955
## 6 -2.27209 -2.122670 -2.977810 -1.96589 3.56997 3.67382 7.778470 -1.85858
## PC167 PC168 PC169 PC170 PC171 PC172 PC173
## 1 -0.0803253 2.848890 -2.162710 4.967950 -3.9800100 -4.861580 -1.59772
## 2 -3.7661300 -3.614570 -4.534350 -0.843756 -5.4733700 7.792640 -1.93847
## 3 -0.6931030 -1.764910 -1.017960 -3.551750 -2.6722700 -0.845494 -4.76455
## 4 3.4457900 -4.300140 5.789650 -0.429008 0.0647389 -7.637510 -1.81966
## 5 -0.7328930 0.499075 -0.141274 0.987371 14.8210000 5.370970 1.26637
## 6 4.7712000 -0.928789 -2.991940 -4.036390 -5.7398300 -5.186370 -3.25269
## PC174 PC175 PC176 PC177 PC178 PC179 PC180
## 1 9.62203 3.794880 7.310290 1.446480 3.169850 2.729350 -5.314270
## 2 -2.25811 14.894800 -5.213880 -4.870530 -2.762820 4.817930 3.516820
## 3 5.99955 2.589690 -1.611290 7.348660 -0.456571 2.428020 4.821800
## 4 -7.29503 -9.533150 0.415911 -8.096380 2.315460 -2.030240 -4.813670
## 5 -4.08710 -4.051960 2.816590 0.522548 5.103060 0.550644 -0.870163
## 6 -4.15292 -0.687135 -0.908193 -0.298223 2.353910 2.230670 3.742080
## PC181 PC182 PC183 PC184 PC185 PC186 PC187
## 1 3.4773000 4.92234 -0.658278 4.7902500 0.790756 5.81346 -1.153170
## 2 0.0253924 -6.73912 -0.494408 1.9940900 -4.615020 6.09937 -0.101715
## 3 -5.7484900 -2.87637 2.330840 -2.9863900 2.284260 5.20689 -2.454970
## 4 10.4513000 7.54592 3.610410 6.0622700 3.310300 -6.91275 1.656040
## 5 -1.4731600 1.22017 -6.339070 -7.6392100 -4.212780 1.59322 0.115176
## 6 -1.2248000 -2.79284 -2.296190 0.0286201 -3.590650 3.76164 1.137250
## PC188 PC189 PC190 PC191 PC192 PC193 PC194
## 1 4.5866100 0.486935 3.98583 1.18599 -0.239188 -7.5906200 -1.54424
## 2 -10.3407000 -2.671190 4.26308 -3.43792 2.857570 1.0492400 -3.08940
## 3 0.1671920 -6.557360 -9.36124 10.18680 1.382550 0.0706454 -4.30834
## 4 4.4596000 1.430970 3.55802 -11.40820 0.961035 -0.1869930 10.67490
## 5 -7.9391000 -3.556150 -2.03019 -0.64403 -6.336980 12.7351000 2.46297
## 6 0.0759437 3.128770 2.74974 -8.75257 -3.094980 -6.5699700 4.72646
## PC195 PC196 PC197 PC198 PC199 PC200 PC201
## 1 0.910948 -8.042340 5.23727 8.172020 1.74364 7.4632400 10.90750
## 2 -5.426750 0.130371 7.72826 -6.550310 -11.48980 -13.9436000 -8.64064
## 3 6.408280 4.563530 -4.61863 -6.578520 5.92747 0.0228253 -10.26530
## 4 1.673040 5.761970 -4.43693 0.321186 1.10595 -4.3009700 8.66860
## 5 1.472540 -5.483900 -6.46014 5.276940 -3.83060 -4.6839400 1.42336
## 6 3.555810 6.871120 -4.15241 -8.280110 1.35386 4.7200800 -9.43587
## PC202 PC203 PC204 PC205 PC206 PC207 PC208
## 1 -2.2669100 3.741520 -6.90213 1.521190 0.548047 1.04816 7.438350
## 2 1.6462600 9.763130 -2.57959 -2.844000 -1.215910 -2.81616 -3.145650
## 3 -0.0215309 -6.618470 6.54731 9.394450 -3.523440 -10.66750 6.256250
## 4 -6.2581700 -0.318148 -5.49795 -6.739860 8.748370 5.30643 -0.278222
## 5 -1.1527600 4.195740 3.75332 8.709440 0.724445 1.80931 -4.247090
## 6 -1.9675900 -2.659990 2.69489 -0.196236 -1.345850 -1.33835 -3.777190
## PC209 PC210 PC211 PC212 PC213 PC214 PC215
## 1 1.079830 0.804993 5.95026 -1.61042 3.547410 -0.497208 -2.09902
## 2 -3.828520 -3.319220 -3.68087 3.30502 0.411716 1.725200 -9.77083
## 3 3.878940 6.145040 6.15633 4.76610 2.528460 1.723470 11.92680
## 4 -3.133370 -0.159137 -1.99040 1.98730 -3.919200 -1.113520 -12.08330
## 5 -10.947600 2.338610 -11.36970 1.54996 -2.101040 2.795130 6.68741
## 6 0.730206 1.055790 4.48507 -3.09285 -3.694160 2.946190 -3.76150
## PC216 PC217 PC218 PC219 PC220 PC221 PC222
## 1 -1.93799 2.137280 1.49136 -6.35979 -1.999650 6.460560 -10.585700
## 2 10.25870 -0.681566 -8.56728 -2.77335 0.664778 7.269180 -5.625950
## 3 -1.91891 -4.036690 -5.57953 4.47943 -3.644230 -1.642510 0.177161
## 4 0.40950 -2.822450 1.48543 -6.64756 3.153860 -0.323252 1.453790
## 5 -11.59720 11.862400 5.37617 6.33545 -6.907660 -4.249690 -3.328230
## 6 -1.94337 -4.715090 -1.19449 3.98731 7.462940 -8.265040 7.105690
## PC223 PC224 PC225 PC226 PC227 PC228 PC229
## 1 6.26030 5.75470 -6.84284 11.715700 -5.6183400 0.2430480 -0.704013
## 2 17.91000 -7.41692 23.08070 0.976779 -5.1448300 -2.3510100 -0.273485
## 3 -14.50860 6.04174 -3.54294 1.507420 2.9864500 -0.8730820 4.071330
## 4 -2.47360 -5.11154 -2.27466 -2.801570 -2.3035100 -0.0670175 -9.115460
## 5 -1.17638 2.57996 -9.42025 -1.487250 6.2886600 3.8361400 0.896267
## 6 -3.81264 -3.02251 -3.97208 -11.323300 0.0969562 5.2164200 -1.921160
## PC230 PC231 PC232 PC233 PC234 PC235 PC236 PC237
## 1 -6.11467 -2.251420 6.120510 -2.641430 1.507590 -4.00376 -6.11933 -0.933163
## 2 -4.48263 1.855160 5.160780 5.599340 -4.713290 6.16179 -2.42093 -2.878430
## 3 7.98041 -0.923137 -0.136223 0.705681 -6.179890 -10.17500 -1.47485 3.840300
## 4 0.81340 -1.179330 0.886945 -2.777520 4.520280 -1.01872 7.61043 13.094500
## 5 4.38866 3.506720 -8.013210 -2.897140 -1.386990 1.37174 6.54433 3.476010
## 6 -2.58047 -1.612530 6.915120 -4.707690 -0.449379 5.55076 10.31380 0.237316
## PC238 PC239 PC240 PC241 PC242 PC243 PC244
## 1 21.10460 -15.26040 2.19886 -0.0960591 -3.26182 -18.25860 -4.793180
## 2 -14.65680 11.68660 -2.92870 -6.7929100 -3.74096 -3.30562 0.578432
## 3 -12.73490 1.53312 8.05155 1.6704700 -9.82794 -3.79779 -5.898550
## 4 6.76285 6.71659 -2.03321 7.6123200 9.25779 6.91597 -10.802400
## 5 -17.45570 5.53179 1.85606 0.4529880 3.55132 21.33230 -6.126410
## 6 -2.81911 6.30187 -3.38643 11.3769000 26.88920 5.01527 6.718160
## PC245 PC246 PC247 PC248 PC249 PC250 PC251
## 1 9.825920 0.368498 -10.421100 -3.01939 4.934450 5.364350 3.21276
## 2 0.249513 -3.860660 -6.924750 -7.77055 6.010400 1.173970 9.52594
## 3 -1.940680 -1.843600 0.517222 11.53290 12.955700 8.033140 23.63580
## 4 13.227400 -5.330850 -1.493860 7.58503 -3.396850 8.558150 14.18900
## 5 -12.008000 2.958860 3.197090 7.69150 0.490028 5.073940 4.34951
## 6 -14.440200 -1.294670 0.980852 3.58781 -12.775300 -0.912472 -22.90500
## PC252 PC253 PC254 PC255 PC256 PC257 PC258
## 1 18.37280 6.08839 -5.178480 -1.77009 -1.49813 0.215559 13.878100
## 2 -1.37248 1.89693 -18.117400 3.58664 -9.25055 -17.999100 9.275970
## 3 -5.39199 -10.17820 -9.378100 -11.21190 4.04999 -5.659700 7.364760
## 4 13.28860 0.77435 -0.941677 -10.06840 3.86081 4.607560 6.790750
## 5 -3.71363 -3.86821 -8.100050 -7.65747 -4.77171 6.838140 0.779761
## 6 5.60337 10.07590 -14.518000 1.70536 -8.48897 6.001850 -2.653630
## PC259 PC260 PC261 PC262 PC263 PC264 PC265 PC266
## 1 1.366520 -1.67545 12.12620 -2.73072 7.122350 5.61174 -0.369992 -3.337640
## 2 3.253030 -0.52109 11.01710 1.41546 -1.347250 -4.68588 -5.470220 -0.847408
## 3 0.240385 7.00778 3.52365 -1.66673 -1.377640 -7.34325 -6.123390 -7.495310
## 4 2.596990 -1.69915 11.56710 5.40263 8.225790 5.23127 2.497470 -1.212910
## 5 1.670470 -2.51541 3.88925 2.05927 0.447871 -2.91688 2.134180 2.218940
## 6 1.779740 -2.98839 -6.96636 1.62252 -5.128720 2.32212 4.330380 6.185210
## PC267 PC268 PC269 PC270 PC271 PC272 PC273 PC274
## 1 0.613616 5.68206 2.3816700 1.196100 -8.22209 -4.639850 1.34193 7.72916
## 2 -5.298480 5.02221 0.3060520 0.410279 -1.12968 -3.351980 -1.24930 -1.67022
## 3 0.169219 6.72680 1.2057500 -3.196180 2.88484 -6.311270 -1.33891 -3.01090
## 4 1.645960 2.84261 -0.0118685 -0.859617 -3.34874 0.263232 2.95285 2.27743
## 5 0.729482 3.56651 0.1658270 -2.992770 1.79006 0.343371 -3.37505 -3.23617
## 6 2.918850 -10.56080 -5.3668600 -3.750360 7.57666 6.797100 -1.14310 -7.39161
## PC275 PC276 PC277 PC278 PC279 PC280 PC281
## 1 -1.567000 -0.541626 -2.3688300 2.31419000 4.288540 -0.221416 2.291920
## 2 0.699928 -2.999690 -0.5837480 -2.83002000 1.545390 1.294580 1.069000
## 3 0.200851 -0.670598 -0.0464095 0.00465983 1.196600 -0.930999 3.447720
## 4 -2.834220 0.707821 3.9734100 3.71178000 2.108120 -0.984043 0.493455
## 5 1.411270 3.746540 0.4366800 1.48215000 -0.988513 -2.537130 1.234670
## 6 -3.469010 5.891030 3.8895800 -2.91172000 2.393080 -0.382463 -2.931400
## PC282 PC283 PC284 PC285 PC286 PC287 PC288
## 1 1.003570 -1.175530 -1.824410 5.13454 2.423280 -3.702960 -1.284480
## 2 2.713420 0.303270 -0.383434 -1.57956 1.351580 -0.952356 -0.811361
## 3 3.383290 -1.118160 -2.668420 -1.04655 0.701324 0.058565 -0.758462
## 4 1.375720 -1.235870 -1.625840 5.27569 0.522691 -4.968060 0.197759
## 5 2.055850 -0.958846 -1.070320 -1.32984 -0.462819 -1.993750 1.911980
## 6 -0.886907 -0.966520 4.639050 -5.32699 2.417190 1.671340 2.014800
## PC289 PC290 PC291 PC292 PC293 PC294 PC295
## 1 1.836550 -1.957090 0.530749 0.508176 0.636557 -1.4038200 -0.248397
## 2 -0.594592 -1.720770 -0.268210 0.288945 2.037530 -2.3017200 2.315090
## 3 -0.708405 2.430020 -2.235310 -0.810539 -0.489244 -0.5221430 1.293830
## 4 1.670870 -1.281180 1.018950 -2.859150 -1.072850 -0.0479585 -3.677910
## 5 0.898994 -0.508513 1.531620 -1.622450 -0.550698 2.2110100 -1.993760
## 6 2.775520 -0.158644 1.099160 -1.171090 -3.326430 -0.5605280 2.982730
## PC296 PC297 PC298 PC299 PC300 PC301 PC302
## 1 -0.0467915 -0.654160 -2.0625000 0.240649 -0.588341 0.295810 -0.0316656
## 2 1.8139700 0.779074 1.5598200 -1.556570 -0.529166 -0.825035 -0.3991970
## 3 0.2042640 -1.983090 0.8547860 0.188940 0.952455 -1.293090 0.4155780
## 4 -1.0536000 1.223620 -0.1958010 -0.205820 -1.186100 0.895926 0.9182580
## 5 -0.5999970 0.708073 -0.0893639 0.430328 0.908237 0.879889 -1.3128300
## 6 0.5782380 -1.207890 0.8823800 -0.102105 1.834670 1.117060 -0.7175880
## PC303 PC304 PC305 PC306 PC307 PC308 PC309
## 1 -0.1808410 -0.0681759 0.857804 0.903926 0.147160 -0.205308 -0.6859770
## 2 2.6869200 0.8234650 -0.634279 -0.204301 0.413023 0.684150 -0.7299410
## 3 -1.5163100 -0.6338990 -0.109502 -0.826637 -0.450889 0.641502 0.6849520
## 4 -0.0282513 -0.9098190 1.934460 0.766758 -1.479430 -0.584326 -0.7959710
## 5 0.7944070 -0.7262600 -0.943115 -1.985560 0.300072 -1.724090 0.0472941
## 6 0.7691120 2.1418600 -0.130876 -1.041520 0.215288 -1.160540 0.4151860
## PC310 PC311 PC312 PC313 PC314 PC315 PC316
## 1 -0.571153 -1.214920 -1.229820 -0.739570 -0.612735 -0.3053200 0.2118910
## 2 -0.394852 1.681880 -1.950200 0.699761 -0.967610 1.7095700 -0.0358667
## 3 -0.625501 -0.696387 -2.302250 0.224089 0.215718 -0.5012200 -1.0750800
## 4 0.374613 -2.571250 -0.825077 -0.727066 -0.606653 -1.7053100 -2.0381000
## 5 -0.173505 -0.443516 -0.093964 -0.639347 0.511583 0.4607520 -0.3114920
## 6 0.134549 0.454952 -0.129294 0.774373 -1.011360 -0.0309106 -0.1467060
## PC317 PC318 PC319 PC320 PC321 PC322 PC323
## 1 -1.2652000 -0.00506833 1.342590 0.8060470 -0.132721 -1.334280 -0.5696410
## 2 -0.4431830 0.71761600 0.373968 -0.3651000 1.493980 1.019030 0.1125780
## 3 0.9750040 -0.14353200 -0.372653 -0.3065110 1.438280 -0.585709 -0.0957389
## 4 -0.4231130 0.23121100 0.394054 0.4501970 1.270710 -0.501647 -0.2829260
## 5 -0.0134821 -0.05963560 0.595172 -0.6823740 0.412990 0.442514 0.1327450
## 6 -0.6708910 0.11538300 -1.251810 -0.0822363 -1.569170 -0.440656 1.0226600
## PC324 PC325 PC326 PC327 PC328 PC329 PC330
## 1 0.438355 0.0776800 0.0495142 -0.0506694 -0.0149687 -0.0430816 0.1677090
## 2 0.282173 0.2536410 0.4902050 -0.3234080 -0.1826820 -0.3846510 -0.1355330
## 3 0.524582 0.1188800 0.0529997 0.2741200 -0.3504540 -0.1118490 -0.2768630
## 4 -0.380963 0.4693500 -0.0589738 0.5795380 0.3944940 -0.0561675 0.2645650
## 5 0.618017 -0.2050030 -0.2749630 0.0557564 -0.1506440 -0.1818220 0.3267600
## 6 -0.152357 -0.0620448 0.1298630 -0.3807810 -0.1834630 0.1412770 0.0468199
## PC331 Individual Pop_City Country Latitude Longitude Continent Year
## 1 1.57436e-06 282 Barcelona Spain 41.3851 2.1734 Europe 2018
## 2 1.57436e-06 283 Barcelona Spain 41.3851 2.1734 Europe 2018
## 3 1.57436e-06 280 Barcelona Spain 41.3851 2.1734 Europe 2018
## 4 1.57436e-06 279 Barcelona Spain 41.3851 2.1734 Europe 2018
## 5 1.57436e-06 286 Barcelona Spain 41.3851 2.1734 Europe 2018
## 6 1.57436e-06 287 Barcelona Spain 41.3851 2.1734 Europe 2018
## Region Subregion order order2 orderold
## 1 Southern Europe West Europe 16 8 8
## 2 Southern Europe West Europe 16 8 8
## 3 Southern Europe West Europe 16 8 8
## 4 Southern Europe West Europe 16 8 8
## 5 Southern Europe West Europe 16 8 8
## 6 Southern Europe West Europe 16 8 8
sampling_loc <- read.csv(
here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_US_2.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_US_2.rds"))
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_US_2.rds"))
head(sampling_loc)
## Pop_City Location Latitude Longitude Continent Abbreviation
## 1 Berlin, NJ USA 39.79081 -74.929100 Americas BER
## 2 Palm Beach USA 26.70560 -80.036400 Americas PAL
## 3 Recife, PE Brazil -8.05882 -34.878100 Americas REC
## 4 Gravatai Brazil -29.93760 -50.990700 Americas GRV
## 5 Penafiel Penafiel, Portugal 41.18555 -8.329371 Europe POP
## 6 Loule Loule, Portugal 37.09084 -8.092465 Europe POL
## Year Region Subregion order order2 orderold
## 1 2018 North America 1 NA 75
## 2 2018 North America 3 NA 77
## 3 2017 South America 7 NA 81
## 4 2018 South America 8 NA 82
## 5 2017 Southern Europe West Europe 11 3 3
## 6 2017 Southern Europe West Europe 12 4 4
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 39 Levels: BAR BEN BER CAM CHA GEL GRV HAI HAN HOC HUN INJ INW JAF KAC ... YUN
## [1] 39
Check the regions
## [1] "North America" "South America" "Southern Europe" "East Asia"
## [5] "South Asia" "Southeast Asia"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 BAR 25.5499 2.001850 32.4190 7.67455 -25.4593 -4.67251 13.7777
## 2 BAR 25.4107 0.354687 31.4114 5.91305 -20.6103 -7.52188 21.6435
## 3 BAR 21.9004 -1.111830 31.3627 5.46695 -22.3339 -6.51403 19.6255
## 4 BAR 22.5679 1.277120 33.8145 6.21831 -25.4711 -6.89546 20.8575
## 5 BAR 24.4462 -1.403420 29.1115 6.30322 -19.0357 -5.08700 14.6359
## 6 BAR 25.3203 0.761056 34.6690 7.37388 -25.1849 -6.13724 17.6580
## PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15
## 1 3.40690 -0.733271 -9.67442 -15.9058 18.0216 6.32654 5.98767 3.41399
## 2 5.84533 -2.214230 -12.85460 -19.9574 17.3894 6.11969 7.98925 1.07950
## 3 9.17413 -2.425720 -9.48653 -17.5797 18.6901 9.58027 5.83143 2.08171
## 4 8.02944 -3.644450 -12.77820 -19.6965 21.8319 10.70570 6.00047 5.04656
## 5 2.53529 -1.993170 -7.14319 -17.3004 14.1459 7.36393 5.16066 4.08381
## 6 8.71096 -1.058250 -12.28910 -19.4578 17.8304 7.96268 7.49455 2.73509
## PC16 PC17 PC18 PC19 PC20 PC21 PC22 PC23
## 1 -7.16208 7.71709 4.41331 -2.754290 1.854080 2.129810 0.884887 1.293300
## 2 -6.41903 7.30044 5.32912 3.931020 -1.013790 -0.234323 -8.631420 -2.862340
## 3 -3.89952 11.34300 4.84956 2.312330 -3.679250 -2.301880 -6.299350 -0.498285
## 4 -4.11928 10.77560 8.72978 1.012840 -0.646801 -2.926890 -7.660640 -0.725622
## 5 -3.15545 6.72153 4.27435 0.619417 -0.920164 -3.943620 -5.897880 -3.275340
## 6 -9.46860 11.47150 4.60506 -2.486380 0.665044 3.262860 -2.738350 2.407820
## PC24 PC25 PC26 PC27 PC28 PC29 PC30 PC31
## 1 -2.23555 3.980410 0.479427 -0.130636 0.0865897 -2.361130 0.557199 0.316978
## 2 2.34082 2.598980 0.134286 2.492170 4.3267900 -4.631260 1.770090 -0.939229
## 3 5.09459 0.749207 0.836163 -1.730430 -0.4442760 -2.515980 -0.942403 4.041180
## 4 3.40329 2.569730 4.961780 -4.176320 -0.1042140 -0.131276 -0.999407 -1.027160
## 5 1.23383 1.401020 2.345880 4.706740 3.5340900 -3.955960 -2.782230 2.525790
## 6 4.66316 4.781560 0.827205 -0.737351 -1.4653200 -3.554900 3.813600 -0.341777
## PC32 PC33 PC34 PC35 PC36 PC37 PC38
## 1 -0.4444750 -3.15818 -2.83666 4.82432000 -0.997399 1.21471 -5.746860
## 2 3.5647700 1.54800 -3.88493 9.35334000 -1.017370 -6.12165 6.997830
## 3 -2.4151800 -1.41287 4.46832 -7.77748000 -1.808960 8.18154 -0.910979
## 4 0.0760186 3.24226 2.29609 -2.64337000 4.722200 -1.96688 -1.398860
## 5 -1.2545300 2.59171 -4.77755 0.00356112 5.917210 -4.51056 -3.554590
## 6 -1.9315000 2.22768 -1.22077 -2.63133000 0.270552 6.88953 -4.599620
## PC39 PC40 PC41 PC42 PC43 PC44 PC45
## 1 -0.0614424 -10.545000 7.883870 6.857250 -3.2807000 2.47440 16.497700
## 2 -3.3119900 0.578622 2.470320 -6.460780 -4.1464900 2.14022 -7.894510
## 3 4.8478600 1.088610 -3.357980 -0.298669 0.0652313 -1.38465 -0.620771
## 4 -0.8465660 9.940250 -8.551070 -4.867380 5.7667200 -7.29192 -7.406230
## 5 -3.6516100 -4.192260 10.120500 -1.745520 -0.0524046 4.05158 12.631500
## 6 0.9997290 -2.663760 0.210486 3.010410 -3.9270600 3.11524 3.306900
## PC46 PC47 PC48 PC49 PC50 PC51 PC52 PC53
## 1 8.22976 6.59451 8.11986 5.01540 -4.40843 -0.740971 -15.62990 9.878280
## 2 3.88213 -6.63139 -3.31186 -9.77056 8.51794 -2.449940 7.63387 4.434340
## 3 -8.34699 -11.30220 -7.68358 12.71820 -20.46040 -2.483860 -21.09400 -21.488000
## 4 -7.00947 6.08540 -7.22786 -7.66642 11.90510 2.748040 16.59100 -10.143900
## 5 5.70632 20.08740 3.50690 -4.87685 18.25830 5.788220 13.66970 16.963500
## 6 5.80395 -7.31314 2.43073 11.88560 -19.00440 0.178450 -13.99530 0.764454
## PC54 PC55 PC56 PC57 PC58 PC59 PC60
## 1 -5.57121 -15.027800 -13.90860 -4.094810 4.21689 -10.382200 -4.29901
## 2 3.36386 -4.893800 2.43052 4.229120 -5.20076 1.254800 -8.31485
## 3 3.95256 6.789440 1.51764 -5.220900 8.97183 0.382508 7.51565
## 4 5.02958 18.559000 15.71960 3.405140 -6.72767 7.536590 9.78866
## 5 -15.53990 0.428366 -12.44660 -2.525550 -6.14423 -4.232770 6.26329
## 6 2.29198 -10.959400 -3.82909 0.495758 5.25124 3.543490 -6.27881
## PC61 PC62 PC63 PC64 PC65 PC66 PC67
## 1 -1.09797000 -4.804900 -0.156622 0.683491 -2.260210 4.234910 -3.649060
## 2 0.00490617 10.205300 -0.668617 -8.417400 -1.931710 3.357220 -5.219050
## 3 1.20415000 -0.623453 0.974949 10.128200 4.281520 -10.625900 9.376110
## 4 3.38923000 -4.474070 -3.546520 -1.106340 -0.252152 -3.496220 1.515490
## 5 -1.17010000 -16.055000 0.699717 -2.160300 2.908200 2.509440 -0.379699
## 6 0.01844000 5.739770 -9.486850 2.533710 -10.111700 -0.978215 -6.956280
## PC68 PC69 PC70 PC71 PC72 PC73 PC74
## 1 -1.574830 -5.243250 -1.403580 -12.63960 1.356470 -8.12799 0.859371
## 2 -0.278632 0.894293 -3.828950 5.62895 -1.293390 1.69540 4.779710
## 3 8.166690 -4.691220 3.184360 -9.13291 -0.734180 4.67734 2.345690
## 4 0.836239 8.973690 -6.029360 5.15928 -2.319940 4.61097 -1.591360
## 5 -2.314620 -3.092170 10.263800 3.76500 -4.196360 -18.39850 -3.893150
## 6 8.467670 -3.522660 0.544784 -13.40720 -0.114845 1.46456 12.050100
## PC75 PC76 PC77 PC78 PC79 PC80 PC81 PC82
## 1 6.572720 -1.970600 -6.48878 2.12883 -8.44218 2.83437 10.09980 -6.658690
## 2 0.289025 -10.742600 9.31079 2.62550 -5.67173 0.65423 -9.82558 6.597310
## 3 -4.700500 0.240791 -9.46227 -1.55213 2.30165 5.40084 1.85246 -0.882662
## 4 -3.918090 4.068350 -4.40486 -2.23271 12.17960 -2.56363 -9.31355 0.260666
## 5 4.600820 9.808770 4.18361 1.28728 -7.80091 -5.49614 10.93040 -3.438530
## 6 -5.969230 -10.239100 -4.05508 2.52897 -6.26424 -6.24197 2.27204 -0.564218
## PC83 PC84 PC85 PC86 PC87 PC88 PC89 PC90
## 1 -2.86887 14.001600 0.957331 -1.87941 4.64860 0.657698 0.399883 2.022860
## 2 -2.41667 0.050327 2.596270 1.67471 -2.48395 -1.396320 -4.257030 -2.291260
## 3 2.37558 -2.717030 0.846448 3.65477 -3.07108 -2.243730 3.539840 -0.099632
## 4 3.51192 -10.564200 -0.569494 -2.68905 -2.09208 5.139930 6.494860 5.454050
## 5 -2.26074 13.791700 3.296180 -5.23505 8.30755 -4.069930 -6.242270 -4.136490
## 6 -2.48147 10.155800 0.622089 4.60028 -1.74139 5.396000 2.946920 -2.739450
## PC91 PC92 PC93 PC94 PC95 PC96 PC97
## 1 -7.621980 0.347251 -7.679630 -4.570580 6.94600 2.55569 -6.281100
## 2 6.124300 -6.053970 5.249970 9.215560 1.07701 -3.67124 -7.650710
## 3 -0.821424 6.222880 -5.285170 -1.756490 -2.14933 -1.85051 0.284107
## 4 0.316467 -1.141910 -3.056630 -1.073980 -10.11590 3.00487 5.355180
## 5 3.806260 0.314570 0.591955 -9.671800 14.58320 2.33464 -1.780890
## 6 -3.593550 -4.794020 -7.208490 -0.857269 1.25426 -2.20382 -2.009490
## PC98 PC99 PC100 PC101 PC102 PC103 PC104
## 1 -10.40770 -2.110000 -9.424470 -1.839250 1.535390 -5.510170 -9.217160
## 2 1.88745 -0.834114 -2.570220 0.206347 4.293290 0.302222 1.592700
## 3 1.58134 -1.053580 6.435840 5.676030 -3.411820 -0.304258 -0.429984
## 4 8.50590 1.367650 0.608194 1.623170 -1.206740 7.056930 3.993230
## 5 -6.29561 -4.134710 -1.820040 -6.111940 0.997718 0.208238 4.110780
## 6 -2.58204 3.409730 0.152236 1.872320 -0.201032 -4.711020 -6.356940
## PC105 PC106 PC107 PC108 PC109 PC110 PC111 PC112
## 1 6.079390 1.64507 5.043130 -1.280310 -5.12226 2.400870 -2.93166 -2.596930
## 2 -6.858940 4.49297 10.819200 1.935290 1.18597 -3.354460 -3.12687 5.176590
## 3 1.402610 5.83262 -8.318620 -0.867838 4.82371 -2.783900 0.98931 -0.873792
## 4 0.540892 -8.13902 2.633320 2.897030 -2.00770 4.063370 5.14743 -4.799910
## 5 -2.495470 -2.72289 -0.883392 -1.856300 -3.07880 -1.573520 -5.40047 3.153300
## 6 2.410270 3.76763 0.650305 5.733720 -1.65211 -0.793212 -1.72445 -6.884940
## PC113 PC114 PC115 PC116 PC117 PC118 PC119
## 1 2.2578800 3.88597 2.867630 0.539158 -0.845930 -5.403780 2.784410
## 2 -4.3606400 -0.92686 -7.133760 -4.580270 -3.141370 -2.555780 -6.057470
## 3 -0.0760097 4.41340 2.063490 4.869730 1.861820 5.288130 -0.360741
## 4 -2.7829100 -2.09383 1.375930 -2.798820 -0.927025 -0.118288 8.700330
## 5 7.6067200 3.72398 2.286540 3.436420 1.227490 0.349492 0.649339
## 6 -0.9041910 -2.10562 0.943164 -0.286257 -4.915870 -1.528040 2.788780
## PC120 PC121 PC122 PC123 PC124 PC125 PC126 PC127
## 1 -4.91608 2.93566 -8.418820 3.80704 -1.52167 -5.461930 2.234070 -2.08119
## 2 -5.82860 2.04339 7.237420 1.77367 5.72487 4.807260 0.455513 -4.55348
## 3 1.93994 -6.71502 5.527480 -1.24756 -6.20322 -2.739320 -1.555350 2.43503
## 4 -4.21144 -2.04098 -5.836880 -6.76454 -2.89093 0.808653 0.486359 3.37248
## 5 6.23956 -3.65780 9.455560 1.56276 2.35529 -6.507030 -3.885540 -1.85520
## 6 -6.83929 3.39949 0.274695 -3.51281 -1.19905 5.340580 -0.898033 -5.43983
## PC128 PC129 PC130 PC131 PC132 PC133 PC134
## 1 -2.68327 -2.549630 9.401090 0.683652 -1.271010 -11.416600 -1.922070
## 2 9.75043 -0.917970 -0.372633 1.562830 -1.881060 4.285940 4.032880
## 3 2.38452 -5.374380 -3.196170 -2.384930 2.686380 4.526360 -2.830820
## 4 -11.87540 4.469200 -7.035340 -3.212530 -3.742870 8.108840 2.652020
## 5 4.62774 0.716657 1.085520 3.138260 0.723081 -5.909650 0.225585
## 6 -2.55966 -3.140090 3.077420 -0.500853 3.364680 0.357091 5.086970
## PC135 PC136 PC137 PC138 PC139 PC140 PC141 PC142
## 1 -4.87730 2.26513 0.262335 4.62478 5.99020 4.339540 0.189673 -3.939540
## 2 8.17511 1.93598 5.514280 1.03692 -1.33778 -6.677280 1.741950 0.921766
## 3 4.24522 -5.15313 0.201977 -6.31506 -1.37553 1.129400 -5.531570 3.147510
## 4 -5.15336 2.21695 -2.338480 -5.61432 -8.98571 -1.644590 4.369480 -1.193880
## 5 -2.41895 3.90826 3.841930 4.90093 14.33500 0.260579 -5.722460 7.555040
## 6 2.18485 -3.69796 0.569709 -1.83564 -6.22962 5.775860 -4.806010 -6.111940
## PC143 PC144 PC145 PC146 PC147 PC148 PC149 PC150
## 1 -0.699479 -2.504360 -2.254420 -3.395470 4.77453 -3.15509 -1.713990 3.44239
## 2 -2.513140 -0.844755 -3.618760 -3.942080 5.65364 -5.38484 -3.114800 -4.41353
## 3 5.840610 -1.219760 -8.340430 5.265660 3.93128 3.53840 5.863480 7.40169
## 4 2.002100 -1.773520 5.916870 -2.958120 -11.15000 -1.58413 -5.806020 -5.48468
## 5 -5.756550 4.389890 4.857280 6.174350 -4.38367 -1.28658 0.225397 -2.82186
## 6 4.348170 0.796501 -0.205014 -0.827709 -4.34729 -5.21046 -1.004390 4.42634
## PC151 PC152 PC153 PC154 PC155 PC156 PC157 PC158
## 1 2.118480 4.28212 -0.317366 -1.457550 1.15876 -1.83845 0.677561 2.787140
## 2 -4.782600 4.04689 -0.657632 3.414950 3.59248 1.42896 0.855985 0.960586
## 3 -0.398366 -1.25384 -2.677050 0.337193 -4.41156 -3.83077 -0.899710 0.143709
## 4 -4.051750 3.49544 9.373580 -3.702670 2.50726 -3.59417 -3.470970 -2.842710
## 5 0.403745 1.09411 -5.127360 -6.036350 -5.27839 7.88031 0.981945 -4.104420
## 6 2.459710 1.82108 3.974530 -0.626101 2.53644 -6.06100 -0.463329 2.182540
## PC159 PC160 PC161 PC162 PC163 PC164 PC165 PC166
## 1 2.61953 2.776970 6.499190 -3.17932 -1.26286 -5.81216 -3.177150 -1.86669
## 2 -2.04183 -3.528650 -5.698820 -4.64500 -4.59376 -5.62974 2.897230 -1.27826
## 3 -4.13686 0.359589 2.721880 2.89826 2.82899 7.24960 0.378805 -6.48326
## 4 2.79463 -1.328860 -0.184743 1.14260 1.50184 4.79097 -4.914690 8.68172
## 5 1.95661 -2.774440 1.031620 -0.31542 5.01982 -1.95284 -2.344740 8.01955
## 6 -2.27209 -2.122670 -2.977810 -1.96589 3.56997 3.67382 7.778470 -1.85858
## PC167 PC168 PC169 PC170 PC171 PC172 PC173
## 1 -0.0803253 2.848890 -2.162710 4.967950 -3.9800100 -4.861580 -1.59772
## 2 -3.7661300 -3.614570 -4.534350 -0.843756 -5.4733700 7.792640 -1.93847
## 3 -0.6931030 -1.764910 -1.017960 -3.551750 -2.6722700 -0.845494 -4.76455
## 4 3.4457900 -4.300140 5.789650 -0.429008 0.0647389 -7.637510 -1.81966
## 5 -0.7328930 0.499075 -0.141274 0.987371 14.8210000 5.370970 1.26637
## 6 4.7712000 -0.928789 -2.991940 -4.036390 -5.7398300 -5.186370 -3.25269
## PC174 PC175 PC176 PC177 PC178 PC179 PC180
## 1 9.62203 3.794880 7.310290 1.446480 3.169850 2.729350 -5.314270
## 2 -2.25811 14.894800 -5.213880 -4.870530 -2.762820 4.817930 3.516820
## 3 5.99955 2.589690 -1.611290 7.348660 -0.456571 2.428020 4.821800
## 4 -7.29503 -9.533150 0.415911 -8.096380 2.315460 -2.030240 -4.813670
## 5 -4.08710 -4.051960 2.816590 0.522548 5.103060 0.550644 -0.870163
## 6 -4.15292 -0.687135 -0.908193 -0.298223 2.353910 2.230670 3.742080
## PC181 PC182 PC183 PC184 PC185 PC186 PC187
## 1 3.4773000 4.92234 -0.658278 4.7902500 0.790756 5.81346 -1.153170
## 2 0.0253924 -6.73912 -0.494408 1.9940900 -4.615020 6.09937 -0.101715
## 3 -5.7484900 -2.87637 2.330840 -2.9863900 2.284260 5.20689 -2.454970
## 4 10.4513000 7.54592 3.610410 6.0622700 3.310300 -6.91275 1.656040
## 5 -1.4731600 1.22017 -6.339070 -7.6392100 -4.212780 1.59322 0.115176
## 6 -1.2248000 -2.79284 -2.296190 0.0286201 -3.590650 3.76164 1.137250
## PC188 PC189 PC190 PC191 PC192 PC193 PC194
## 1 4.5866100 0.486935 3.98583 1.18599 -0.239188 -7.5906200 -1.54424
## 2 -10.3407000 -2.671190 4.26308 -3.43792 2.857570 1.0492400 -3.08940
## 3 0.1671920 -6.557360 -9.36124 10.18680 1.382550 0.0706454 -4.30834
## 4 4.4596000 1.430970 3.55802 -11.40820 0.961035 -0.1869930 10.67490
## 5 -7.9391000 -3.556150 -2.03019 -0.64403 -6.336980 12.7351000 2.46297
## 6 0.0759437 3.128770 2.74974 -8.75257 -3.094980 -6.5699700 4.72646
## PC195 PC196 PC197 PC198 PC199 PC200 PC201
## 1 0.910948 -8.042340 5.23727 8.172020 1.74364 7.4632400 10.90750
## 2 -5.426750 0.130371 7.72826 -6.550310 -11.48980 -13.9436000 -8.64064
## 3 6.408280 4.563530 -4.61863 -6.578520 5.92747 0.0228253 -10.26530
## 4 1.673040 5.761970 -4.43693 0.321186 1.10595 -4.3009700 8.66860
## 5 1.472540 -5.483900 -6.46014 5.276940 -3.83060 -4.6839400 1.42336
## 6 3.555810 6.871120 -4.15241 -8.280110 1.35386 4.7200800 -9.43587
## PC202 PC203 PC204 PC205 PC206 PC207 PC208
## 1 -2.2669100 3.741520 -6.90213 1.521190 0.548047 1.04816 7.438350
## 2 1.6462600 9.763130 -2.57959 -2.844000 -1.215910 -2.81616 -3.145650
## 3 -0.0215309 -6.618470 6.54731 9.394450 -3.523440 -10.66750 6.256250
## 4 -6.2581700 -0.318148 -5.49795 -6.739860 8.748370 5.30643 -0.278222
## 5 -1.1527600 4.195740 3.75332 8.709440 0.724445 1.80931 -4.247090
## 6 -1.9675900 -2.659990 2.69489 -0.196236 -1.345850 -1.33835 -3.777190
## PC209 PC210 PC211 PC212 PC213 PC214 PC215
## 1 1.079830 0.804993 5.95026 -1.61042 3.547410 -0.497208 -2.09902
## 2 -3.828520 -3.319220 -3.68087 3.30502 0.411716 1.725200 -9.77083
## 3 3.878940 6.145040 6.15633 4.76610 2.528460 1.723470 11.92680
## 4 -3.133370 -0.159137 -1.99040 1.98730 -3.919200 -1.113520 -12.08330
## 5 -10.947600 2.338610 -11.36970 1.54996 -2.101040 2.795130 6.68741
## 6 0.730206 1.055790 4.48507 -3.09285 -3.694160 2.946190 -3.76150
## PC216 PC217 PC218 PC219 PC220 PC221 PC222
## 1 -1.93799 2.137280 1.49136 -6.35979 -1.999650 6.460560 -10.585700
## 2 10.25870 -0.681566 -8.56728 -2.77335 0.664778 7.269180 -5.625950
## 3 -1.91891 -4.036690 -5.57953 4.47943 -3.644230 -1.642510 0.177161
## 4 0.40950 -2.822450 1.48543 -6.64756 3.153860 -0.323252 1.453790
## 5 -11.59720 11.862400 5.37617 6.33545 -6.907660 -4.249690 -3.328230
## 6 -1.94337 -4.715090 -1.19449 3.98731 7.462940 -8.265040 7.105690
## PC223 PC224 PC225 PC226 PC227 PC228 PC229
## 1 6.26030 5.75470 -6.84284 11.715700 -5.6183400 0.2430480 -0.704013
## 2 17.91000 -7.41692 23.08070 0.976779 -5.1448300 -2.3510100 -0.273485
## 3 -14.50860 6.04174 -3.54294 1.507420 2.9864500 -0.8730820 4.071330
## 4 -2.47360 -5.11154 -2.27466 -2.801570 -2.3035100 -0.0670175 -9.115460
## 5 -1.17638 2.57996 -9.42025 -1.487250 6.2886600 3.8361400 0.896267
## 6 -3.81264 -3.02251 -3.97208 -11.323300 0.0969562 5.2164200 -1.921160
## PC230 PC231 PC232 PC233 PC234 PC235 PC236 PC237
## 1 -6.11467 -2.251420 6.120510 -2.641430 1.507590 -4.00376 -6.11933 -0.933163
## 2 -4.48263 1.855160 5.160780 5.599340 -4.713290 6.16179 -2.42093 -2.878430
## 3 7.98041 -0.923137 -0.136223 0.705681 -6.179890 -10.17500 -1.47485 3.840300
## 4 0.81340 -1.179330 0.886945 -2.777520 4.520280 -1.01872 7.61043 13.094500
## 5 4.38866 3.506720 -8.013210 -2.897140 -1.386990 1.37174 6.54433 3.476010
## 6 -2.58047 -1.612530 6.915120 -4.707690 -0.449379 5.55076 10.31380 0.237316
## PC238 PC239 PC240 PC241 PC242 PC243 PC244
## 1 21.10460 -15.26040 2.19886 -0.0960591 -3.26182 -18.25860 -4.793180
## 2 -14.65680 11.68660 -2.92870 -6.7929100 -3.74096 -3.30562 0.578432
## 3 -12.73490 1.53312 8.05155 1.6704700 -9.82794 -3.79779 -5.898550
## 4 6.76285 6.71659 -2.03321 7.6123200 9.25779 6.91597 -10.802400
## 5 -17.45570 5.53179 1.85606 0.4529880 3.55132 21.33230 -6.126410
## 6 -2.81911 6.30187 -3.38643 11.3769000 26.88920 5.01527 6.718160
## PC245 PC246 PC247 PC248 PC249 PC250 PC251
## 1 9.825920 0.368498 -10.421100 -3.01939 4.934450 5.364350 3.21276
## 2 0.249513 -3.860660 -6.924750 -7.77055 6.010400 1.173970 9.52594
## 3 -1.940680 -1.843600 0.517222 11.53290 12.955700 8.033140 23.63580
## 4 13.227400 -5.330850 -1.493860 7.58503 -3.396850 8.558150 14.18900
## 5 -12.008000 2.958860 3.197090 7.69150 0.490028 5.073940 4.34951
## 6 -14.440200 -1.294670 0.980852 3.58781 -12.775300 -0.912472 -22.90500
## PC252 PC253 PC254 PC255 PC256 PC257 PC258
## 1 18.37280 6.08839 -5.178480 -1.77009 -1.49813 0.215559 13.878100
## 2 -1.37248 1.89693 -18.117400 3.58664 -9.25055 -17.999100 9.275970
## 3 -5.39199 -10.17820 -9.378100 -11.21190 4.04999 -5.659700 7.364760
## 4 13.28860 0.77435 -0.941677 -10.06840 3.86081 4.607560 6.790750
## 5 -3.71363 -3.86821 -8.100050 -7.65747 -4.77171 6.838140 0.779761
## 6 5.60337 10.07590 -14.518000 1.70536 -8.48897 6.001850 -2.653630
## PC259 PC260 PC261 PC262 PC263 PC264 PC265 PC266
## 1 1.366520 -1.67545 12.12620 -2.73072 7.122350 5.61174 -0.369992 -3.337640
## 2 3.253030 -0.52109 11.01710 1.41546 -1.347250 -4.68588 -5.470220 -0.847408
## 3 0.240385 7.00778 3.52365 -1.66673 -1.377640 -7.34325 -6.123390 -7.495310
## 4 2.596990 -1.69915 11.56710 5.40263 8.225790 5.23127 2.497470 -1.212910
## 5 1.670470 -2.51541 3.88925 2.05927 0.447871 -2.91688 2.134180 2.218940
## 6 1.779740 -2.98839 -6.96636 1.62252 -5.128720 2.32212 4.330380 6.185210
## PC267 PC268 PC269 PC270 PC271 PC272 PC273 PC274
## 1 0.613616 5.68206 2.3816700 1.196100 -8.22209 -4.639850 1.34193 7.72916
## 2 -5.298480 5.02221 0.3060520 0.410279 -1.12968 -3.351980 -1.24930 -1.67022
## 3 0.169219 6.72680 1.2057500 -3.196180 2.88484 -6.311270 -1.33891 -3.01090
## 4 1.645960 2.84261 -0.0118685 -0.859617 -3.34874 0.263232 2.95285 2.27743
## 5 0.729482 3.56651 0.1658270 -2.992770 1.79006 0.343371 -3.37505 -3.23617
## 6 2.918850 -10.56080 -5.3668600 -3.750360 7.57666 6.797100 -1.14310 -7.39161
## PC275 PC276 PC277 PC278 PC279 PC280 PC281
## 1 -1.567000 -0.541626 -2.3688300 2.31419000 4.288540 -0.221416 2.291920
## 2 0.699928 -2.999690 -0.5837480 -2.83002000 1.545390 1.294580 1.069000
## 3 0.200851 -0.670598 -0.0464095 0.00465983 1.196600 -0.930999 3.447720
## 4 -2.834220 0.707821 3.9734100 3.71178000 2.108120 -0.984043 0.493455
## 5 1.411270 3.746540 0.4366800 1.48215000 -0.988513 -2.537130 1.234670
## 6 -3.469010 5.891030 3.8895800 -2.91172000 2.393080 -0.382463 -2.931400
## PC282 PC283 PC284 PC285 PC286 PC287 PC288
## 1 1.003570 -1.175530 -1.824410 5.13454 2.423280 -3.702960 -1.284480
## 2 2.713420 0.303270 -0.383434 -1.57956 1.351580 -0.952356 -0.811361
## 3 3.383290 -1.118160 -2.668420 -1.04655 0.701324 0.058565 -0.758462
## 4 1.375720 -1.235870 -1.625840 5.27569 0.522691 -4.968060 0.197759
## 5 2.055850 -0.958846 -1.070320 -1.32984 -0.462819 -1.993750 1.911980
## 6 -0.886907 -0.966520 4.639050 -5.32699 2.417190 1.671340 2.014800
## PC289 PC290 PC291 PC292 PC293 PC294 PC295
## 1 1.836550 -1.957090 0.530749 0.508176 0.636557 -1.4038200 -0.248397
## 2 -0.594592 -1.720770 -0.268210 0.288945 2.037530 -2.3017200 2.315090
## 3 -0.708405 2.430020 -2.235310 -0.810539 -0.489244 -0.5221430 1.293830
## 4 1.670870 -1.281180 1.018950 -2.859150 -1.072850 -0.0479585 -3.677910
## 5 0.898994 -0.508513 1.531620 -1.622450 -0.550698 2.2110100 -1.993760
## 6 2.775520 -0.158644 1.099160 -1.171090 -3.326430 -0.5605280 2.982730
## PC296 PC297 PC298 PC299 PC300 PC301 PC302
## 1 -0.0467915 -0.654160 -2.0625000 0.240649 -0.588341 0.295810 -0.0316656
## 2 1.8139700 0.779074 1.5598200 -1.556570 -0.529166 -0.825035 -0.3991970
## 3 0.2042640 -1.983090 0.8547860 0.188940 0.952455 -1.293090 0.4155780
## 4 -1.0536000 1.223620 -0.1958010 -0.205820 -1.186100 0.895926 0.9182580
## 5 -0.5999970 0.708073 -0.0893639 0.430328 0.908237 0.879889 -1.3128300
## 6 0.5782380 -1.207890 0.8823800 -0.102105 1.834670 1.117060 -0.7175880
## PC303 PC304 PC305 PC306 PC307 PC308 PC309
## 1 -0.1808410 -0.0681759 0.857804 0.903926 0.147160 -0.205308 -0.6859770
## 2 2.6869200 0.8234650 -0.634279 -0.204301 0.413023 0.684150 -0.7299410
## 3 -1.5163100 -0.6338990 -0.109502 -0.826637 -0.450889 0.641502 0.6849520
## 4 -0.0282513 -0.9098190 1.934460 0.766758 -1.479430 -0.584326 -0.7959710
## 5 0.7944070 -0.7262600 -0.943115 -1.985560 0.300072 -1.724090 0.0472941
## 6 0.7691120 2.1418600 -0.130876 -1.041520 0.215288 -1.160540 0.4151860
## PC310 PC311 PC312 PC313 PC314 PC315 PC316
## 1 -0.571153 -1.214920 -1.229820 -0.739570 -0.612735 -0.3053200 0.2118910
## 2 -0.394852 1.681880 -1.950200 0.699761 -0.967610 1.7095700 -0.0358667
## 3 -0.625501 -0.696387 -2.302250 0.224089 0.215718 -0.5012200 -1.0750800
## 4 0.374613 -2.571250 -0.825077 -0.727066 -0.606653 -1.7053100 -2.0381000
## 5 -0.173505 -0.443516 -0.093964 -0.639347 0.511583 0.4607520 -0.3114920
## 6 0.134549 0.454952 -0.129294 0.774373 -1.011360 -0.0309106 -0.1467060
## PC317 PC318 PC319 PC320 PC321 PC322 PC323
## 1 -1.2652000 -0.00506833 1.342590 0.8060470 -0.132721 -1.334280 -0.5696410
## 2 -0.4431830 0.71761600 0.373968 -0.3651000 1.493980 1.019030 0.1125780
## 3 0.9750040 -0.14353200 -0.372653 -0.3065110 1.438280 -0.585709 -0.0957389
## 4 -0.4231130 0.23121100 0.394054 0.4501970 1.270710 -0.501647 -0.2829260
## 5 -0.0134821 -0.05963560 0.595172 -0.6823740 0.412990 0.442514 0.1327450
## 6 -0.6708910 0.11538300 -1.251810 -0.0822363 -1.569170 -0.440656 1.0226600
## PC324 PC325 PC326 PC327 PC328 PC329 PC330
## 1 0.438355 0.0776800 0.0495142 -0.0506694 -0.0149687 -0.0430816 0.1677090
## 2 0.282173 0.2536410 0.4902050 -0.3234080 -0.1826820 -0.3846510 -0.1355330
## 3 0.524582 0.1188800 0.0529997 0.2741200 -0.3504540 -0.1118490 -0.2768630
## 4 -0.380963 0.4693500 -0.0589738 0.5795380 0.3944940 -0.0561675 0.2645650
## 5 0.618017 -0.2050030 -0.2749630 0.0557564 -0.1506440 -0.1818220 0.3267600
## 6 -0.152357 -0.0620448 0.1298630 -0.3807810 -0.1834630 0.1412770 0.0468199
## PC331 Individual Pop_City Location Latitude Longitude
## 1 1.57436e-06 282 Barcelona Barcelona, Spain 41.3851 2.1734
## 2 1.57436e-06 283 Barcelona Barcelona, Spain 41.3851 2.1734
## 3 1.57436e-06 280 Barcelona Barcelona, Spain 41.3851 2.1734
## 4 1.57436e-06 279 Barcelona Barcelona, Spain 41.3851 2.1734
## 5 1.57436e-06 286 Barcelona Barcelona, Spain 41.3851 2.1734
## 6 1.57436e-06 287 Barcelona Barcelona, Spain 41.3851 2.1734
## Continent Year Region Subregion order order2 orderold
## 1 Europe 2018 Southern Europe West Europe 16 8 8
## 2 Europe 2018 Southern Europe West Europe 16 8 8
## 3 Europe 2018 Southern Europe West Europe 16 8 8
## 4 Europe 2018 Southern Europe West Europe 16 8 8
## 5 Europe 2018 Southern Europe West Europe 16 8 8
## 6 Europe 2018 Southern Europe West Europe 16 8 8
genotype <- here(
"euro_global/output/neuroadmixture/native_turkey_iberia2.vcf"
)
d <- read.vcfR(
genotype
)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22537
## column count: 301
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22537
## Character matrix gt cols: 301
## skip: 0
## nrows: 22537
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## BAR BEN CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN LAM MAT
## 12 12 12 12 2 12 4 7 12 11 4 2 6 12 11 6 4 4 9 12
## OKI POL POP QNC SON SPM SPS SSK SUF SUU TAI TUA TUH UTS YUN
## 12 2 12 11 3 5 8 12 6 6 7 9 12 12 9
Convert format
##
## - number of detected individuals: 292
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.geno"
##
## - number of detected individuals: 292
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.removed file, for more informations.
##
##
## - number of detected individuals: 292
## - number of detected loci: 22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.lfmm"
PCA for MAF 1% r2<0.01 snp set of native_turkey_iberia2
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
##
## -n (number of individuals) 292
## -L (number of loci) 22537
## -K (number of principal components) 292
## -x (genotype file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.lfmm
## -a (eigenvalue file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.pca/native_turkey_iberia2.eigenvalues
## -e (eigenvector file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.pca/native_turkey_iberia2.eigenvectors
## -d (standard deviation file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.pca/native_turkey_iberia2.sdev
## -p (projection file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.pca/native_turkey_iberia2.projections
## -c data centered
## * pca class *
##
## project directory: /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/
## pca result directory: native_turkey_iberia2.pca/
## input file: native_turkey_iberia2.lfmm
## eigenvalue file: native_turkey_iberia2.eigenvalues
## eigenvector file: native_turkey_iberia2.eigenvectors
## standard deviation file: native_turkey_iberia2.sdev
## projection file: native_turkey_iberia2.projections
## pcaProject file: native_turkey_iberia2.pcaProject
## number of individuals: 292
## number of loci: 22537
## number of principal components: 292
## centered: TRUE
## scaled: FALSE
Test
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
##
## -n (number of eigenvalues) 292
## -i (input file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.pca/native_turkey_iberia2.eigenvalues
## -o (output file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia2.pca/native_turkey_iberia2.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)
Get values
# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
sampling_loc <- read.csv(
here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_turkey.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_turkey.rds"))
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_turkey.rds"))
head(sampling_loc)
## Pop_City Location Latitude Longitude Continent Abbreviation Year
## 1 Penafiel Penafiel, Portugal 41.18555 -8.329371 Europe POP 2017
## 2 Loule Loule, Portugal 37.09084 -8.092465 Europe POL 2017
## 3 San Roque San Roque, Spain 36.17042 -5.371530 Europe SPS 2017
## 4 Barcelona Barcelona, Spain 41.38510 2.173400 Europe BAR 2018
## 5 Magaluf Magaluf, Spain 39.50679 2.530729 Europe SPM 2017
## 6 Aliaga Aliaga, Turkey 38.76390 26.944800 Europe TUA 2019
## Region Subregion order order2 orderold
## 1 Southern Europe West Europe 11 3 3
## 2 Southern Europe West Europe 12 4 4
## 3 Southern Europe West Europe 14 6 6
## 4 Southern Europe West Europe 16 8 8
## 5 Southern Europe West Europe 17 9 9
## 6 Eastern Europe East Europe 40 32 32
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 35 Levels: BAR BEN CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN ... YUN
## [1] 35
Check the regions
## [1] "Southern Europe" "Eastern Europe" "East Asia" "South Asia"
## [5] "Southeast Asia"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 BAR 32.1490 -3.57024e-05 -25.5612 7.45879 28.0198 -23.5221 -7.14769
## 2 BAR 29.7010 8.26655e-01 -29.7475 6.83475 32.8377 -22.6346 -6.74459
## 3 BAR 31.8548 1.57682e+00 -24.0926 6.63071 28.6902 -10.3283 -5.37942
## 4 BAR 30.6260 -1.78255e+00 -21.3461 7.65705 24.6081 -14.7701 -5.28804
## 5 BAR 32.7104 3.41229e-01 -27.6710 8.00618 31.9859 -18.7968 -5.40058
## 6 BAR 31.2687 -2.77012e-01 -24.7553 6.53760 27.1167 -18.1478 -5.27905
## PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15
## 1 -2.833060 2.94707 -6.67679 0.580087 6.75390 5.13864 0.642729 -7.98961
## 2 -5.341140 1.51015 -10.79810 1.184940 9.68668 2.47137 -3.023580 -10.83150
## 3 -0.786157 3.20566 -5.48401 4.546320 3.12622 2.25593 -2.696430 -4.70747
## 4 0.267839 1.94401 -7.51096 -0.489323 7.30012 1.47254 -0.811141 -7.49421
## 5 -5.396440 3.35203 -7.14974 3.565780 4.36574 5.12720 -6.290740 -6.65929
## 6 -4.958520 3.99904 -6.52298 0.101405 4.99329 3.55949 -6.392520 -7.75628
## PC16 PC17 PC18 PC19 PC20 PC21 PC22
## 1 11.70470 -10.20200 -4.88538 7.837980 -9.9147700 -0.479633 -6.28504
## 2 6.93279 -7.65994 -1.66107 -1.347090 -5.1788200 -0.305122 -14.25280
## 3 23.43990 -19.31760 -3.02697 -0.418472 2.3626600 0.958394 2.51428
## 4 14.93080 -8.47872 -3.09886 8.076280 -7.0295800 1.290610 -2.30400
## 5 9.19702 -11.17340 -3.94096 -2.087780 -0.0487011 2.746560 8.28849
## 6 -6.87185 3.23277 5.34675 -8.543800 1.2756200 -0.728523 -1.83813
## PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30
## 1 3.20330 2.141370 0.268091 3.14516 -1.39835 -2.59199 0.165947 0.138815
## 2 9.57207 2.445640 1.454880 4.90913 -11.01050 -1.14730 1.468560 16.808900
## 3 -1.93554 0.278962 -1.822940 2.83876 4.27482 1.89196 2.125540 -7.931290
## 4 -0.90770 -1.265050 1.081640 -4.95508 6.84403 -3.57526 3.163940 -3.457080
## 5 -12.78510 -0.956789 -2.225350 -5.53712 2.69595 -3.06201 2.974850 -0.710658
## 6 1.63721 3.185860 3.173470 8.87940 -3.41808 4.38339 0.370649 1.754760
## PC31 PC32 PC33 PC34 PC35 PC36 PC37
## 1 -2.49102 -15.19890 5.559990 7.70411 6.96136 -0.454729 2.3142500
## 2 -1.50218 -13.66460 8.896240 22.43480 12.56900 -1.421720 0.0656101
## 3 -8.84277 12.42150 0.588983 -25.72340 -12.06360 -5.559240 5.0160800
## 4 4.34493 1.24909 -1.646820 -14.65900 -1.61074 5.818420 18.9872000
## 5 -12.27880 9.48178 -6.799240 -2.05809 -3.65177 -2.650200 4.1611900
## 6 -4.45135 6.57529 5.896930 -4.39076 -1.92407 -20.792100 -8.3009400
## PC38 PC39 PC40 PC41 PC42 PC43 PC44
## 1 -6.5797300 -6.63206 -5.77841 -2.764440 4.63476 9.976810 -3.63684
## 2 0.0569845 -9.14024 11.38230 -4.742730 4.03482 0.127265 11.23750
## 3 6.1940000 9.08458 -1.00197 1.382480 -9.60538 -3.977130 -12.59740
## 4 11.5738000 -8.43252 2.79942 13.646200 -11.91980 1.882400 3.78714
## 5 -5.2823200 9.87206 -4.78070 -0.333928 -1.30264 -1.728110 -18.69690
## 6 2.2247800 6.09109 10.58240 -8.098160 3.31380 -6.138280 -8.47033
## PC45 PC46 PC47 PC48 PC49 PC50 PC51
## 1 1.24104 -1.79502 2.02788 0.202494 2.052020 0.583413 -17.27530
## 2 -2.77518 -4.82345 2.02721 6.254440 0.711571 1.389390 11.27720
## 3 3.60260 2.41759 2.71815 -6.492660 -1.199350 0.137072 3.73542
## 4 28.90900 3.53925 12.07280 12.783400 1.958420 -2.262660 15.48320
## 5 -3.16160 1.49227 -1.70442 -12.495000 -4.212900 6.230010 -17.26090
## 6 -10.56160 6.20400 -5.53827 -2.131910 -3.502590 4.657760 15.36510
## PC52 PC53 PC54 PC55 PC56 PC57 PC58
## 1 4.183290 -10.11660 -1.852700 -5.927280 -4.842320 1.14485 -6.60310
## 2 -0.434797 0.56894 -2.396640 3.629040 1.150830 -7.23607 2.01905
## 3 3.762090 8.53289 -5.933400 2.753020 -2.475180 4.68986 -4.34825
## 4 7.964550 -2.18586 -13.702500 -0.908266 4.209110 -1.89241 7.83188
## 5 -6.598620 9.26582 -0.395142 4.532910 -0.782116 16.06220 -8.84062
## 6 1.858520 18.80760 4.187990 9.145780 -8.813420 11.45840 2.93130
## PC59 PC60 PC61 PC62 PC63 PC64 PC65
## 1 1.428660 -0.224658 4.50809 -12.89220 6.966180 -3.4251400 -2.5965000
## 2 5.127400 7.047190 6.23085 10.22110 4.210590 -1.8488900 3.4306000
## 3 -3.699910 6.228740 -0.79353 -2.49357 -5.317330 0.0670438 -7.9249600
## 4 -0.946048 -8.536180 -1.21879 -9.62624 -8.943100 -5.6570100 -2.9408100
## 5 -3.218550 11.911700 -1.49187 2.03924 5.121750 4.2073500 -0.9615950
## 6 -8.914410 21.347100 1.64316 6.79338 -0.613567 0.7114540 -0.0850202
## PC66 PC67 PC68 PC69 PC70 PC71 PC72
## 1 -1.394440 2.5350300 -0.984569 -2.71401 3.76750 -2.391040 0.528144
## 2 -0.249666 -2.2332100 -0.951251 8.16241 7.01649 8.667030 -1.819340
## 3 1.730570 -1.2261300 -14.775400 -11.76170 -4.56802 5.436410 -2.579790
## 4 -4.234570 0.0494313 3.120380 -15.91000 1.10756 -0.720438 -5.229170
## 5 -5.575250 -6.3473100 -12.894600 3.96418 7.22063 10.553000 6.878650
## 6 -2.059380 6.0829700 -17.714300 -3.29783 5.48122 6.393830 7.033050
## PC73 PC74 PC75 PC76 PC77 PC78 PC79
## 1 9.170830 1.132140 5.41162 -1.01724 5.23653 -5.587490 -2.728420
## 2 -10.695100 -4.196060 -1.74054 1.03567 -5.94560 -2.838010 -6.004180
## 3 4.919590 -2.675190 -10.59510 -4.50283 1.95748 10.682800 7.429000
## 4 7.550350 -4.879980 -4.83926 -6.04896 4.72036 -1.833050 -1.261300
## 5 -0.259543 -1.715190 -3.29362 -4.68912 -3.67337 0.723783 0.798548
## 6 -3.543140 0.728822 -9.60782 -1.91837 5.43289 11.810700 3.962350
## PC80 PC81 PC82 PC83 PC84 PC85 PC86
## 1 7.51738 3.02058 2.110330 11.872000 0.177072 5.318140 -1.21857
## 2 -4.00588 -4.13504 2.248930 -12.196000 -7.105820 1.513840 -1.19066
## 3 -11.21860 -7.06428 -6.747470 3.836470 -11.728100 -0.962982 -1.17061
## 4 3.05410 3.08371 -6.788600 4.166460 2.937640 -4.680980 2.79978
## 5 -7.89223 -6.13900 -0.678201 -2.656480 -8.182620 1.449200 -2.73650
## 6 -9.60430 -3.84158 0.466986 0.752945 -3.672560 1.632090 -2.20957
## PC87 PC88 PC89 PC90 PC91 PC92 PC93 PC94
## 1 0.0157472 -4.909870 3.99136 -2.85633 9.271030 -0.923105 -6.57579 -1.49670
## 2 3.5776300 -0.999569 2.30794 1.76383 -1.017980 0.807255 6.24050 -9.88014
## 3 -5.6129500 -3.190530 1.75275 -2.39217 -1.213020 -6.918610 -4.30439 9.63454
## 4 -6.8574200 -4.303170 2.55183 1.03767 -9.147880 -9.851680 -9.94547 5.88331
## 5 2.8125600 -5.057380 1.35060 -6.82979 -0.999461 -2.151130 3.71211 3.76508
## 6 10.2339000 -5.916110 8.74460 8.40468 -0.140939 -2.441770 6.07298 1.14909
## PC95 PC96 PC97 PC98 PC99 PC100 PC101 PC102
## 1 -1.705970 -4.774080 3.89459 10.406200 -8.220620 2.35138 -5.480480 5.26092
## 2 -6.884480 -1.700440 -2.71008 2.630500 9.395200 -2.98169 6.741940 -1.67474
## 3 -2.656010 -0.190924 -1.81154 0.625412 -0.178168 3.36716 -4.078920 2.52581
## 4 2.205440 6.076500 3.00509 -5.880210 1.282360 -3.78624 0.641194 -3.76860
## 5 -5.484760 -5.676200 -3.67940 11.792000 -0.227397 -2.07177 -3.399660 5.28666
## 6 -0.873207 3.135320 2.43684 12.161800 -6.445410 -2.11264 1.008960 3.29930
## PC103 PC104 PC105 PC106 PC107 PC108 PC109 PC110
## 1 -3.023300 3.813990 7.93639 0.902842 7.41100 -0.2004300 3.67039 -4.85523
## 2 8.847310 -5.621070 -2.89782 6.028220 -8.71039 -4.3522200 -4.48840 -2.70624
## 3 -5.123530 5.568530 -1.57448 -7.204430 1.60002 -3.8318300 5.63741 2.17192
## 4 0.892564 3.807400 -8.26336 -3.123740 5.00888 4.2662400 5.76049 -3.50233
## 5 -0.746933 3.118180 -3.21939 -5.926790 -3.34355 2.6331700 -3.60086 -1.42533
## 6 3.570980 0.436257 5.13895 -5.720270 1.40391 0.0467343 -4.64397 2.15655
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## 1 2.522410 -11.42840 -2.584290 -2.659490 -5.204680 -8.146140 -11.65680
## 2 -2.035720 7.92575 -2.149800 -4.723350 1.163330 0.377635 6.62557
## 3 -3.940020 8.59267 3.408660 0.321313 -2.424040 6.045010 1.13054
## 4 1.557970 1.61051 -0.993359 1.721980 3.255830 3.871180 -6.47478
## 5 -0.166056 1.25854 2.024940 -4.074460 1.265390 2.840870 4.38115
## 6 5.345260 7.13038 6.443210 -2.166000 -0.616138 0.575602 -5.05057
## PC118 PC119 PC120 PC121 PC122 PC123 PC124
## 1 -2.444820 -0.0876937 -2.534290 3.282190 3.54598 -3.033000 0.446108
## 2 4.743430 -1.6208700 -0.420141 -3.080520 -2.12107 -2.867780 -1.178180
## 3 0.293379 9.8130200 5.764840 4.349400 -2.43091 1.888440 3.065260
## 4 -4.449650 1.9980900 -3.695730 1.491300 -1.01010 -0.254435 9.304650
## 5 1.693280 2.4008400 4.028670 0.599541 -2.61832 -2.957640 -1.233740
## 6 4.365700 3.6950100 -2.044930 2.396000 1.55483 2.945390 -3.625050
## PC125 PC126 PC127 PC128 PC129 PC130 PC131 PC132
## 1 -3.34879 -0.838937 3.108210 -2.070410 -4.60976 -2.649300 -0.799363 -5.62859
## 2 -2.28663 6.339860 -4.635130 0.670587 1.13643 3.782980 0.656040 5.42355
## 3 -1.32975 -0.110951 -5.971700 -1.143310 -3.48084 -0.134847 1.817120 -6.33430
## 4 7.30425 9.904460 -0.946319 1.385330 6.91505 -3.828870 -1.561360 3.72315
## 5 -5.81024 -3.087670 -3.632620 1.676840 4.64251 -4.749220 0.369647 -1.95511
## 6 6.59379 2.627040 8.158240 2.199520 1.16396 -0.636840 -4.195020 3.73059
## PC133 PC134 PC135 PC136 PC137 PC138 PC139
## 1 -3.98745 -5.58850 -2.916700 -4.850870 -5.0250300 2.713950 1.410660
## 2 1.14994 -1.35394 -3.837110 1.299120 1.4566400 -4.204280 -3.930120
## 3 -5.72722 2.47950 2.389970 -2.076530 2.6929300 -1.373520 -0.113003
## 4 6.01717 2.30139 4.156230 0.535404 1.2864400 -5.960520 -0.102168
## 5 6.94073 -1.74643 -3.264780 -7.717880 -4.0906700 0.633978 0.627256
## 6 2.99438 -5.72905 -0.031382 3.845800 -0.0921848 -7.688700 0.749744
## PC140 PC141 PC142 PC143 PC144 PC145 PC146
## 1 8.986360 2.970330 5.738310 -0.396681 -3.24753 3.486680 1.639590
## 2 -4.971700 -8.798420 -1.925670 -0.792272 -1.48837 -4.585590 -2.997830
## 3 -0.237661 0.160804 -0.968494 2.420390 4.50809 3.460580 1.402210
## 4 -6.319200 -0.945139 5.790410 3.402740 5.93614 1.375240 0.144302
## 5 4.835890 2.058880 -5.702570 -3.560350 3.44048 2.463880 3.648460
## 6 2.766190 -0.248406 -1.618830 0.625417 -2.02644 0.687294 -1.077740
## PC147 PC148 PC149 PC150 PC151 PC152 PC153 PC154
## 1 9.166460 9.42286 3.453520 6.036000 2.290240 -6.30507 8.949280 -2.819570
## 2 2.255950 2.42000 2.299810 -0.969596 -2.713220 6.24072 3.331990 8.921950
## 3 -5.661140 -8.68346 3.446820 5.278620 -0.748538 -3.56643 -5.487790 -4.618480
## 4 -4.091830 3.99384 -3.132580 1.630490 13.329000 5.85011 8.638840 -0.333806
## 5 1.918840 -0.40300 -3.815660 5.580700 -2.167700 3.27036 -1.819410 3.136120
## 6 -0.318047 5.18932 0.714816 5.114930 4.762080 -6.80116 -0.916525 8.191540
## PC155 PC156 PC157 PC158 PC159 PC160 PC161
## 1 4.331610 -12.618600 -7.389210 -6.0977800 0.493374 2.483150 -0.374770
## 2 -1.324990 4.929360 -0.236756 7.6325300 0.111205 -4.727440 -4.529530
## 3 -0.827121 2.600120 2.125740 2.8611100 -3.454370 -4.182660 0.476025
## 4 0.921155 0.225701 -3.727740 0.3532320 7.329480 6.054260 8.548000
## 5 -3.402070 -2.504550 2.723930 0.0601978 -1.111300 0.767375 0.506037
## 6 -1.525330 -9.115360 0.386766 -4.8976300 0.153708 5.832630 3.308670
## PC162 PC163 PC164 PC165 PC166 PC167 PC168 PC169
## 1 3.371260 3.05525 -1.77405 4.508040 -8.736710 9.760500 -5.64812 -1.89308
## 2 -1.112350 -1.01702 -2.92876 -1.105570 0.330393 -2.064830 5.83701 2.77659
## 3 5.770140 3.95876 3.66836 0.675254 5.562040 5.450400 -7.89879 -6.41106
## 4 -3.113610 -5.07163 5.18427 -4.616600 -3.064280 -12.380500 -2.63779 3.02937
## 5 -0.368924 2.05014 -3.53167 1.612200 -4.567550 4.571290 0.67456 9.37038
## 6 -8.006940 4.13406 -5.91255 3.384550 -3.937380 -0.270813 4.85521 -1.02440
## PC170 PC171 PC172 PC173 PC174 PC175 PC176
## 1 4.724860 2.80760 3.66319 -1.81166 -1.667590 -9.1643200 9.210010
## 2 0.671167 5.20303 1.64979 -3.81205 -0.545418 -0.1864850 10.373900
## 3 -1.125440 -2.49469 0.19660 6.17104 3.560690 -0.0123705 4.581950
## 4 4.574220 1.55755 1.77774 -1.86139 -3.834350 -4.9196000 -5.406110
## 5 -1.344380 -4.37764 1.53241 7.60831 2.868630 4.5294100 -3.267020
## 6 -1.653570 1.81293 -5.38385 2.12732 -4.363780 -1.8879800 -0.821741
## PC177 PC178 PC179 PC180 PC181 PC182 PC183
## 1 -0.808282 -2.600050 3.49627 -1.4633200 -0.0162058 -5.668270 -5.90813
## 2 -10.472800 7.652740 4.45539 3.7145200 -8.8267600 10.582600 2.09555
## 3 5.429350 0.900173 5.78224 5.9533500 -0.1632180 -5.070990 3.13627
## 4 -1.039870 7.838280 0.95028 -6.7603700 -2.4469700 0.158066 14.98770
## 5 2.239460 0.722735 1.32263 3.8853400 1.9179400 3.342270 5.12427
## 6 2.526650 0.142561 2.56224 0.0895991 -1.9851200 -5.257390 1.23091
## PC184 PC185 PC186 PC187 PC188 PC189 PC190
## 1 -1.088520 -4.75782 -6.192400 -7.359260 -9.826790 -0.388289 0.8833800
## 2 0.530547 2.07018 -0.294755 0.491013 -0.654046 7.179830 -1.3920400
## 3 2.324290 -2.10162 -4.341250 -2.911020 -2.507120 -11.000900 -0.0583295
## 4 0.637214 8.80171 0.781150 10.688300 1.175290 2.059360 -12.2454000
## 5 -6.039930 2.77807 0.322562 6.442530 3.073060 -0.794452 -3.7219700
## 6 1.404400 -2.80189 1.506750 -2.786420 -6.497590 -7.722230 -2.0715100
## PC191 PC192 PC193 PC194 PC195 PC196 PC197 PC198
## 1 -2.762770 -2.959770 1.660910 -2.79958 11.27050 -10.614700 1.33377 16.66170
## 2 -3.587680 0.683926 2.611860 -4.48210 -10.00910 -0.122655 -1.49524 -6.43537
## 3 -1.382200 -3.330270 2.211450 -2.12218 -3.24565 0.817553 -1.02774 13.17060
## 4 -1.659820 13.497100 -4.905160 -6.08711 5.08899 10.839900 3.52518 -4.48595
## 5 1.081110 -9.507780 2.682270 1.91901 6.77421 1.864110 -6.04193 -8.03805
## 6 -0.822098 13.106200 -0.921539 -8.00720 6.55654 5.630560 5.44407 -1.57349
## PC199 PC200 PC201 PC202 PC203 PC204 PC205 PC206
## 1 3.07933 4.9476800 5.079740 2.75030 -2.62805 -3.841330 2.16287 1.99476
## 2 -1.59744 11.0072000 -4.562800 -12.24880 -3.11288 4.940580 -14.96280 6.11133
## 3 -5.65437 0.0833825 0.930584 4.12751 -1.13538 -0.860866 1.83321 1.75967
## 4 9.57010 -0.8194510 -2.497450 -2.22290 5.76984 0.033245 -1.44901 10.06420
## 5 -4.87455 0.8831040 -2.155150 -7.54596 -4.93448 -1.012900 -6.68173 4.00790
## 6 7.03820 0.1863420 -1.690480 6.09464 5.80551 -2.320740 5.77096 6.02818
## PC207 PC208 PC209 PC210 PC211 PC212 PC213
## 1 -4.346700 -3.289450 6.25447 9.59359 11.39850 -10.664800 5.34098
## 2 -17.936600 -5.766270 17.09600 -8.23135 -12.39320 -14.725900 6.39669
## 3 5.970320 16.712200 -4.46878 -11.89830 -18.75880 0.183248 -3.94048
## 4 0.984328 -10.354400 2.88181 4.32485 9.14880 12.801800 1.43463
## 5 -1.279120 -21.589400 1.26366 8.92264 8.59670 -3.124050 -5.09427
## 6 2.254300 0.491288 8.94043 6.55083 8.92357 13.538100 10.99700
## PC214 PC215 PC216 PC217 PC218 PC219 PC220
## 1 2.03335 -4.89820 17.156000 0.569129 -8.54037 -4.27732 -7.58659
## 2 8.98167 1.12067 8.740180 -6.737930 9.81672 11.03830 11.39990
## 3 -13.35700 -10.16990 10.742800 7.821910 2.88008 8.07801 10.57460
## 4 3.72525 -2.91145 0.975213 -0.411577 -4.31412 6.24749 9.76544
## 5 8.99626 1.33743 -15.309300 -6.723800 2.39594 -5.56380 -7.92364
## 6 3.75290 8.41336 0.335052 -13.382600 -1.72089 -1.42374 -11.05330
## PC221 PC222 PC223 PC224 PC225 PC226 PC227
## 1 -9.71545000 -15.97020 1.57895 -10.059800 -5.54120 -4.42842 -7.280220
## 2 -10.26930000 4.02669 -6.23260 3.703880 -1.71717 2.41911 -0.722488
## 3 -5.67233000 4.59724 -7.69099 -0.731153 -6.17550 8.91809 -12.116000
## 4 0.84976400 6.04488 4.11756 5.144400 -0.51155 1.21722 1.729120
## 5 -0.00341819 -2.43077 5.94141 6.384400 2.35078 9.68112 -15.516400
## 6 2.63042000 2.41889 6.96757 -7.781280 3.84133 -6.94825 15.439900
## PC228 PC229 PC230 PC231 PC232 PC233 PC234 PC235
## 1 1.56653 1.9128200 6.663760 1.80641 3.21324 -7.058290 13.82720 1.490730
## 2 5.01804 1.4214500 4.968850 0.39041 4.73389 -0.209115 5.45948 -8.627970
## 3 7.24334 8.8614200 5.350230 -4.39558 10.22460 -9.296230 3.28080 -4.472500
## 4 6.97765 -0.9838880 6.531200 2.71362 2.40154 2.979480 -5.31963 0.778473
## 5 11.28150 0.0489831 -0.486633 -4.51210 -7.80903 12.007100 5.41963 3.159060
## 6 -7.67813 -11.2558000 -5.792450 -1.12982 -3.95942 -1.022840 -6.12596 -2.303430
## PC236 PC237 PC238 PC239 PC240 PC241 PC242
## 1 -2.45632 5.9709600 -1.92098 -1.27551 7.802630 2.615760 -1.23867
## 2 0.55105 -0.0382294 5.18558 4.81965 2.566010 0.935648 3.58327
## 3 4.30844 3.5837200 6.88010 10.31570 -0.215351 -10.027800 8.68316
## 4 -2.55565 -0.7882580 -2.53036 -12.35090 -1.156690 12.280900 -4.18891
## 5 -6.91222 3.5995900 3.16979 -3.76331 -15.541100 14.235900 -1.60570
## 6 2.47666 -8.3607100 -4.51232 3.00459 15.746700 -13.219700 -2.08820
## PC243 PC244 PC245 PC246 PC247 PC248 PC249 PC250
## 1 -2.74303 8.72729 3.841800 -13.477500 -6.394540 0.939012 -3.35675 0.301498
## 2 -1.18148 4.51859 5.689910 -0.543535 2.295940 4.706180 6.13716 -0.511929
## 3 -8.33822 5.63916 0.444548 2.968990 3.570350 4.390560 2.62112 -0.794626
## 4 4.97187 4.88890 4.143980 -8.726400 -1.017430 -4.743460 5.66040 2.951600
## 5 13.23640 -4.54562 -2.767880 12.071600 -0.395225 -2.381310 -7.41396 -1.400670
## 6 -6.61671 -1.43764 1.184490 0.489956 -0.692523 -2.291920 6.60338 1.607200
## PC251 PC252 PC253 PC254 PC255 PC256 PC257
## 1 -0.419299 -0.0641331 10.0366000 -3.687180 0.986868 2.00737 0.846952
## 2 2.355360 4.1964400 0.1148790 -2.663940 -3.998730 1.22131 -4.127310
## 3 0.488164 4.6687700 -2.6827800 2.383900 -2.245530 2.21993 -2.576030
## 4 4.683660 -1.3397700 2.9772200 -3.330460 0.547032 1.98340 0.068591
## 5 -3.516040 -8.2333800 0.0749863 0.315826 1.616660 2.00374 2.052890
## 6 2.239730 2.1253100 -6.2841100 0.981943 2.011350 -4.47946 -3.964310
## PC258 PC259 PC260 PC261 PC262 PC263 PC264
## 1 -0.7440720 0.288347 -1.179400 2.080990 1.604400 0.0755411 0.478741
## 2 0.0734875 -0.388623 -0.895096 -3.151140 1.084370 1.5254900 0.873749
## 3 -0.1464840 -4.073660 1.334820 -0.116169 -1.072870 1.4481000 -1.129210
## 4 0.0885669 1.687960 -1.387930 -1.708650 -0.411361 -1.1811700 0.386918
## 5 2.5220600 -4.583370 2.257550 -3.054520 3.266260 -0.7744740 -0.752204
## 6 0.6285420 1.638860 -0.995377 -0.716599 -2.626100 0.9657840 0.594791
## PC265 PC266 PC267 PC268 PC269 PC270 PC271
## 1 0.421656 -0.628479 -1.629130 -1.499560 1.8114200 -2.368890 0.5769160
## 2 1.040240 0.360824 2.533680 2.011430 -0.3900610 0.689523 -1.8109500
## 3 -1.806820 -0.229284 3.211490 1.677790 -0.0630426 0.751667 -0.9790850
## 4 -0.427434 0.996801 -1.440390 -2.051640 0.2172810 0.203962 1.0798100
## 5 -1.667390 2.070440 -0.469447 -1.935430 2.0364700 0.719146 -0.2692470
## 6 1.288230 -0.504035 1.051390 0.928381 -2.0237600 -0.998783 -0.0467495
## PC272 PC273 PC274 PC275 PC276 PC277 PC278
## 1 1.109900 -0.6796440 0.4229600 -1.7638400 2.446870 -0.9651020 -1.2354000
## 2 -0.272658 0.1830280 -0.6587680 -1.0463900 -1.118450 0.7515890 -2.8140300
## 3 -0.678672 -0.0811709 -0.4936660 -0.4592600 -0.502171 -0.3357670 -0.4385010
## 4 2.401550 -1.5051200 0.6993520 -0.6823450 -0.285309 -0.5145040 -0.0204774
## 5 1.465060 -0.6795800 0.2306010 0.4262590 1.230140 0.0184329 -0.4976150
## 6 -1.289780 0.2974330 0.0672784 -0.0663638 0.914317 -0.2827370 0.0993676
## PC279 PC280 PC281 PC282 PC283 PC284 PC285
## 1 1.032090 1.47404000 -1.528150 -4.037510 0.32388100 -0.317983 -1.1915300
## 2 0.818536 0.77091300 -0.543492 -4.133750 0.00422665 0.422983 0.7635770
## 3 -1.210300 0.00167828 0.785943 -0.189621 -0.25221300 -0.277350 0.7455640
## 4 0.444233 1.35126000 0.380841 2.377150 0.09532000 -0.619482 -0.0575488
## 5 -0.377898 -1.08836000 -0.017085 1.589780 0.96122500 0.256280 0.8698030
## 6 -0.630993 1.27841000 1.361400 1.974320 -0.19138000 0.237632 -0.3190350
## PC286 PC287 PC288 PC289 PC290 PC291 PC292
## 1 0.300507 -0.1658100 0.4254090 0.4903590 -0.1762250 -1.95817e-01 0
## 2 -0.288328 0.3950560 0.0283805 0.0701378 0.2312970 1.44166e-01 0
## 3 0.127325 -0.0381009 0.1522850 0.1031790 0.1557410 1.08869e-01 0
## 4 -0.246705 0.1027270 0.1072570 -0.0732240 0.3829160 4.32772e-05 0
## 5 0.496852 -0.1101960 -0.3285820 -0.7412270 0.0165348 2.78505e-02 0
## 6 -0.956114 -0.1354700 -0.2202060 0.1508820 0.1766310 2.62704e-02 0
## Individual Pop_City Location Latitude Longitude Continent Year
## 1 283 Barcelona Barcelona, Spain 41.3851 2.1734 Europe 2018
## 2 279 Barcelona Barcelona, Spain 41.3851 2.1734 Europe 2018
## 3 282 Barcelona Barcelona, Spain 41.3851 2.1734 Europe 2018
## 4 286 Barcelona Barcelona, Spain 41.3851 2.1734 Europe 2018
## 5 287 Barcelona Barcelona, Spain 41.3851 2.1734 Europe 2018
## 6 284 Barcelona Barcelona, Spain 41.3851 2.1734 Europe 2018
## Region Subregion order order2 orderold
## 1 Southern Europe West Europe 16 8 8
## 2 Southern Europe West Europe 16 8 8
## 3 Southern Europe West Europe 16 8 8
## 4 Southern Europe West Europe 16 8 8
## 5 Southern Europe West Europe 16 8 8
## 6 Southern Europe West Europe 16 8 8
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_turkey_pc1_pc2_b.pdf"
),
width = 8,
height = 8,
units = "in"
)
PC1 & PC3
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_turkey_pc1_pc3_b.pdf"
),
width = 8,
height = 8,
units = "in"
)
For some reason I left out 2 of the Spanish pops in the above; use the dataset below (which also includes the USA) instead
Turkey + Iberia + US
echo "BER
PAL
POP
POL
SPB
SPS
SPC
BAR
SPM
TUA
TUH
HAI
YUN
HUN
OKI
KAN
UTS
KAG
TAI
GEL
BEN
SUF
INW
KLP
KUN
KAT
JAF
CAM
SUU
INJ
MAT
SSK
KAC
SON
CHA
LAM
HAN
HOC
QNC
" > euro_global/output/neuroadmixture/native_turkey_iberia_US.txt
cd /gpfs/gibbs/pi/caccone/mkc54/albo
plink \
--keep-allele-order \
--keep-fam euro_global/output/neuroadmixture/native_turkey_iberia_US.txt \
--bfile euro_global/output/file7b \
--make-bed \
--export vcf \
--out euro_global/output/neuroadmixture/native_turkey_iberia_US2 \
--extract euro_global/output/neuroadmixture/train/train_euro_nativeb.snplist \
--silent
grep "samples\|variants" euro_global/output/neuroadmixture/native_turkey_iberia_US2.log
100367 variants loaded from .bim file. –extract: 22537 variants remaining. Total genotyping rate in remaining samples is 0.969253. 22537 variants and 329 people pass filters and QC.
genotype <- here(
"euro_global/output/neuroadmixture/native_turkey_iberia_US2.vcf"
)
d <- read.vcfR(
genotype
)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22537
## column count: 338
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22537
## Character matrix gt cols: 338
## skip: 0
## nrows: 22537
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## BAR BEN BER CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN LAM
## 12 12 12 12 12 2 12 4 7 12 11 4 2 6 12 11 6 4 4 9
## MAT OKI PAL POL POP QNC SON SPB SPC SPM SPS SSK SUF SUU TAI TUA TUH UTS YUN
## 12 12 11 2 12 11 3 8 6 5 8 12 6 6 7 9 12 12 9
Convert format
##
## - number of detected individuals: 329
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.geno"
##
## - number of detected individuals: 329
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.removed file, for more informations.
##
##
## - number of detected individuals: 329
## - number of detected loci: 22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.lfmm"
PCA for MAF 1% r2<0.01 snp set of native_turkey_iberia2
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
##
## -n (number of individuals) 329
## -L (number of loci) 22537
## -K (number of principal components) 329
## -x (genotype file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.lfmm
## -a (eigenvalue file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.pca/native_turkey_iberia_US2.eigenvalues
## -e (eigenvector file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.pca/native_turkey_iberia_US2.eigenvectors
## -d (standard deviation file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.pca/native_turkey_iberia_US2.sdev
## -p (projection file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.pca/native_turkey_iberia_US2.projections
## -c data centered
## * pca class *
##
## project directory: /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/
## pca result directory: native_turkey_iberia_US2.pca/
## input file: native_turkey_iberia_US2.lfmm
## eigenvalue file: native_turkey_iberia_US2.eigenvalues
## eigenvector file: native_turkey_iberia_US2.eigenvectors
## standard deviation file: native_turkey_iberia_US2.sdev
## projection file: native_turkey_iberia_US2.projections
## pcaProject file: native_turkey_iberia_US2.pcaProject
## number of individuals: 329
## number of loci: 22537
## number of principal components: 329
## centered: TRUE
## scaled: FALSE
Test
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
##
## -n (number of eigenvalues) 329
## -i (input file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.pca/native_turkey_iberia_US2.eigenvalues
## -o (output file) /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.pca/native_turkey_iberia_US2.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "blue", cex = .8)
Get values
# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
sampling_loc <- read.csv(
here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_turkey_US.csv"))
saveRDS(sampling_loc, here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_turkey_US.rds"))
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "sampling_loc_native_iberia_turkey_US.rds"))
head(sampling_loc)
## Pop_City Location Latitude Longitude Continent Abbreviation
## 1 Berlin, NJ Berlin (NJ), USA 39.79081 -74.929100 Americas BER
## 2 Palm Beach Palm Beach (FL), USA 26.70560 -80.036400 Americas PAL
## 3 Penafiel Penafiel, Portugal 41.18555 -8.329371 Europe POP
## 4 Loule Loule, Portugal 37.09084 -8.092465 Europe POL
## 5 Badajoz Badajoz, Spain 38.86622 -6.974194 Europe SPB
## 6 San Roque San Roque, Spain 36.17042 -5.371530 Europe SPS
## Year Region Subregion order order2 orderold
## 1 2018 North America 1 NA 75
## 2 2018 North America 3 NA 77
## 3 2017 Southern Europe West Europe 11 3 3
## 4 2017 Southern Europe West Europe 12 4 4
## 5 2018 Southern Europe West Europe 13 5 5
## 6 2017 Southern Europe West Europe 14 6 6
Check pops
## [1] OKI OKI OKI OKI OKI OKI
## 39 Levels: BAR BEN BER CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG ... YUN
## [1] 39
Check the regions
## [1] "North America" "Southern Europe" "Eastern Europe" "East Asia"
## [5] "South Asia" "Southeast Asia"
Merge
merged_data <- merge(pc.coord, sampling_loc, by.x = "Population", by.y = "Abbreviation")
head(merged_data)
## Population PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 1 BAR 25.7129 -2.62185 28.9986 -5.51174 -28.5950 -4.85375 8.80835
## 2 BAR 25.9719 -4.37975 30.2435 -6.29308 -28.0503 -9.28326 20.98460
## 3 BAR 22.6584 -5.98900 31.2084 -6.17300 -30.1854 -8.67753 20.77940
## 4 BAR 23.3355 -3.95914 34.0338 -5.23732 -32.6703 -8.54093 20.55520
## 5 BAR 24.6073 -5.65383 25.9796 -6.70372 -24.6932 -6.85840 13.02770
## 6 BAR 26.0343 -4.48019 33.1304 -6.75906 -32.0280 -8.35747 16.63600
## PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15
## 1 1.870870 1.734370 -4.91634 -5.53705 -3.56304 -2.94512 4.457260 -4.79310
## 2 1.474940 0.220050 -7.61993 -2.89329 -8.41748 -5.24904 0.814279 -5.94347
## 3 -1.932840 -1.092970 -4.85270 -8.35119 -7.82170 -5.77658 0.190452 -4.28722
## 4 -0.531471 -1.355440 -7.56444 -9.04959 -8.02850 -9.13727 0.506113 -4.30162
## 5 2.721540 1.018850 -2.60428 -4.39771 -7.28905 -6.78252 1.763620 -3.00040
## 6 -2.175300 0.747143 -6.44493 -4.93338 -6.69604 -3.22217 4.346380 -8.02626
## PC16 PC17 PC18 PC19 PC20 PC21 PC22 PC23
## 1 1.590870 5.39503 2.290690 9.44932 19.933400 -15.758100 11.18300 1.14165
## 2 -0.368734 4.33769 -4.907070 2.69515 10.715300 -2.582720 16.10230 5.94577
## 3 5.501230 6.05556 -3.303920 -3.16960 0.672816 2.631570 2.17795 4.66959
## 4 2.402530 7.61034 -0.886132 1.93028 6.566990 0.742576 10.85790 8.38076
## 5 0.393075 4.37686 -1.464760 2.51858 13.918800 -2.127500 12.61620 2.04683
## 6 5.193690 5.56186 1.268110 4.25996 7.034090 -8.806690 5.58722 5.85928
## PC24 PC25 PC26 PC27 PC28 PC29 PC30
## 1 0.333029 -0.122214 -0.3611860 -0.59082200 0.457773 1.87716 0.932496
## 2 1.410770 -4.356960 -1.5981200 -0.00533906 -8.634270 1.59730 2.394030
## 3 -0.108263 -0.805248 0.0503982 2.43193000 5.947910 4.08274 -2.081860
## 4 -5.096590 6.249210 -7.5627300 -5.53342000 -8.613630 -5.50024 1.394430
## 5 3.214470 -6.152820 -1.7519700 0.66419400 -3.034210 2.56470 -3.608680
## 6 2.108070 -1.624790 -0.3402630 4.89707000 11.758800 5.48238 0.971937
## PC31 PC32 PC33 PC34 PC35 PC36 PC37
## 1 0.6491640 -3.785210 -0.843485 -2.46992 10.71850 -1.998580 3.27762
## 2 -0.0736669 -0.482962 0.522689 4.43410 1.89056 7.354840 -1.98055
## 3 2.6615400 1.039170 0.727252 -3.15391 2.01250 -7.941030 3.76717
## 4 -0.6493140 -6.555910 5.050470 5.58842 -8.85389 -0.556657 -2.59914
## 5 2.1252000 4.771000 -3.755900 -1.84615 1.88672 -1.541800 -8.72959
## 6 -2.3446000 4.829720 0.339756 -1.15407 6.99774 0.138146 3.05512
## PC38 PC39 PC40 PC41 PC42 PC43 PC44
## 1 -13.296200 -8.42828 -3.509060 -23.379600 -6.42773 1.246000 4.111360
## 2 8.781640 9.11525 -1.841470 10.115300 12.48200 -0.592523 -0.672527
## 3 0.686314 -7.56950 1.042220 -3.505750 -1.65533 -2.831840 -1.518690
## 4 -3.414240 17.77690 10.326000 19.538900 13.86880 -4.557990 -8.983550
## 5 -9.756850 -10.41210 -9.030700 -8.702520 3.47650 3.723150 -1.278030
## 6 -5.096560 -1.93924 0.693932 -0.733361 -12.07670 4.917280 -0.077805
## PC45 PC46 PC47 PC48 PC49 PC50 PC51
## 1 -8.22693 7.26459 2.81229 -13.69750 0.00529661 7.22674 7.18839
## 2 3.38634 5.00751 -3.71344 2.27498 5.70275000 -5.13276 -6.63773
## 3 -14.49830 3.85632 -12.99480 -11.99780 -2.84140000 -6.46671 6.38107
## 4 3.07865 -6.04796 1.83729 14.83490 -5.04357000 -5.73618 -8.20044
## 5 4.45817 5.27687 19.34660 1.15268 -11.74550000 8.01450 1.28139
## 6 -4.10900 13.32710 -9.49633 -11.68910 -3.05790000 2.61201 2.57471
## PC52 PC53 PC54 PC55 PC56 PC57 PC58 PC59
## 1 -2.48475 -3.391080 9.13812 11.920900 -0.238951 -1.51282 1.87436 -1.01701
## 2 5.22769 -2.487550 5.47851 4.870490 0.584372 -2.86258 -6.79152 -5.49720
## 3 -1.42788 -0.025347 -4.05139 0.913042 1.998210 21.02590 -1.94426 -7.02490
## 4 2.82566 11.016500 -11.09660 -6.243970 -0.969577 -1.51034 1.29543 7.58641
## 5 9.87423 16.311000 25.72790 -4.130750 -3.567030 -8.54634 -2.06231 17.18530
## 6 -7.43502 -9.250040 3.42509 12.396700 0.165556 3.05928 -2.30151 -14.62470
## PC60 PC61 PC62 PC63 PC64 PC65 PC66
## 1 -2.23455 -0.177461 -7.37615 1.940790 10.91640 2.69161 6.945990
## 2 6.31023 -5.034510 11.50390 0.596963 -7.65995 1.95384 5.083740
## 3 14.60620 9.074690 -20.43490 2.095490 6.29937 -17.01570 -14.686000
## 4 -2.72874 0.546962 -1.29349 2.048900 -1.10257 4.74395 -3.385140
## 5 -10.08030 -6.365580 -3.59783 1.705990 2.52326 4.03709 0.651713
## 6 13.69920 4.198100 -2.36435 0.707916 3.81585 2.18127 -1.560010
## PC67 PC68 PC69 PC70 PC71 PC72 PC73
## 1 0.917349 7.35632 -1.57878 5.889800 -10.698400 2.62260 6.20659
## 2 0.197489 2.87269 2.20831 -5.609890 1.243510 -1.96598 -2.96545
## 3 10.067700 -2.97230 -5.42337 -0.881617 -0.155405 14.57200 -2.60278
## 4 -2.023540 -3.34201 5.79487 -3.921900 6.365240 2.93555 1.89292
## 5 -4.264750 4.21339 -7.45710 9.331820 -1.367420 -13.22440 1.68586
## 6 -0.949399 6.69744 -6.73716 -6.014000 -6.764330 19.51010 3.30330
## PC74 PC75 PC76 PC77 PC78 PC79 PC80 PC81
## 1 2.393500 -1.228940 1.65910 -2.49965 -0.395638 -6.365730 -3.90690 -6.40180
## 2 -0.889967 9.492890 8.07974 -7.77725 -2.983300 -0.546322 -5.86025 6.90206
## 3 -11.783100 -5.797900 -9.11690 2.16389 -1.813100 3.329050 13.50310 -2.92843
## 4 2.405830 -5.448380 -1.06254 1.01008 -2.424320 2.709030 14.50870 -3.00442
## 5 4.348830 9.638210 1.26453 12.57100 -0.381574 0.754405 -12.03030 2.78153
## 6 -7.999040 -0.687616 2.21934 -2.31997 -6.937260 -2.012540 8.46498 -8.61532
## PC82 PC83 PC84 PC85 PC86 PC87 PC88
## 1 7.634040 -8.58928 6.864730 5.214140 -13.132200 -6.822920 -1.89355
## 2 7.743140 -1.80619 -1.761400 4.625120 -0.432541 -2.978370 -1.18636
## 3 -0.914486 5.87550 0.891326 -6.076440 6.584080 12.120100 1.18209
## 4 -4.396260 -8.21684 -1.405600 -3.149420 1.306570 -0.226873 6.57654
## 5 -3.688690 -1.17806 -0.431416 0.654583 -9.213000 -9.637340 -6.24666
## 6 6.685710 -14.22560 3.122500 7.727820 -3.419080 -1.335180 4.31747
## PC89 PC90 PC91 PC92 PC93 PC94 PC95
## 1 -0.243170 12.631100 9.771980 7.22482 0.453895 -4.69968 -8.237440
## 2 -1.721830 -5.142270 2.703170 -15.97690 -4.544550 8.85826 1.801130
## 3 -0.781144 -2.744640 -8.071470 3.65437 3.360340 -3.39123 1.189590
## 4 2.235670 0.522126 -1.432080 7.00188 7.283740 -7.05028 2.663720
## 5 -1.591390 -3.129380 6.857100 4.46007 -0.876335 1.50089 -3.151160
## 6 0.308486 3.797690 -0.361274 1.70548 3.427190 -2.16816 0.803193
## PC96 PC97 PC98 PC99 PC100 PC101 PC102
## 1 -1.438810 -1.87967 -4.16469 0.455943 -2.1381200 -10.157700 2.930280
## 2 -0.211897 5.15472 -1.60486 -6.203590 -2.6882800 0.958768 -0.885385
## 3 3.623900 -1.79305 -1.07488 5.120680 -0.7640870 -2.019440 -1.978810
## 4 -2.889360 6.44299 -3.84847 4.838880 -0.0480383 0.572218 -4.688430
## 5 9.152500 -4.41583 6.59044 2.456890 2.3220800 -0.853881 -0.815616
## 6 -2.430890 6.96112 -3.40400 -0.208818 -0.7036810 -3.604280 -1.187880
## PC103 PC104 PC105 PC106 PC107 PC108 PC109 PC110
## 1 1.23902 -5.120930 -4.368840 1.037800 7.20698 3.68224 -0.368935 -1.15635
## 2 -3.39272 -1.157220 -0.758977 -0.745568 3.71898 -6.88873 7.221980 -2.64895
## 3 1.98004 0.716473 5.637400 -3.504650 -3.92182 3.46150 -7.183810 -5.62683
## 4 -1.52862 5.666670 -5.294130 2.326670 -5.19702 1.87295 4.297490 7.95955
## 5 1.62872 -8.420870 -5.656670 3.814460 -2.19808 -2.60299 0.521129 -5.61614
## 6 -7.22169 2.379020 -1.308250 -0.870971 5.86702 4.99827 0.935283 -3.22324
## PC111 PC112 PC113 PC114 PC115 PC116 PC117
## 1 0.600113 3.70751 -5.547490 2.866860 -1.676030 -2.86628 -2.4995400
## 2 1.268020 2.06241 -3.280630 -0.670925 0.352867 7.81723 -6.4582600
## 3 -5.493960 3.26006 2.601890 -6.114910 3.526690 3.56937 3.3057300
## 4 -8.377160 -7.17172 -0.978643 -4.673240 -2.612030 -3.42140 1.2679300
## 5 -0.379843 9.57801 2.224540 5.764800 -2.447190 -1.41866 0.0821863
## 6 -3.641380 -3.68478 -13.761700 -3.286960 2.376510 -2.51607 -1.2937700
## PC118 PC119 PC120 PC121 PC122 PC123 PC124
## 1 1.152380 1.22148 1.35734 -4.75114 -0.869155 -1.2783300 0.106456
## 2 -0.930811 -13.56500 10.66720 -3.95575 4.973730 -0.0271546 2.242130
## 3 -1.494830 4.91867 -6.50309 1.23996 3.410710 2.5321400 -5.066040
## 4 3.660750 4.03208 -4.78504 6.70646 -0.650758 -1.9886800 -1.732770
## 5 -3.553300 -2.80008 -3.72177 7.63769 0.821607 1.1064100 4.676510
## 6 3.677810 -3.98069 -1.46809 2.81461 -5.016200 -0.7803590 -2.897490
## PC125 PC126 PC127 PC128 PC129 PC130 PC131 PC132
## 1 3.14992 -6.801810 -3.449730 -2.921770 4.384100 -6.01140 8.71534 -8.418310
## 2 5.08365 2.089020 0.222601 -6.857800 -3.845480 4.69359 5.26967 3.309780
## 3 -1.46590 4.046790 4.163530 -1.177230 1.276930 2.56634 -9.04946 -1.268550
## 4 -11.20610 -1.634600 -1.271220 8.729580 1.645530 -4.47525 -7.73236 3.178270
## 5 10.61010 5.909370 4.285370 0.288425 4.797270 1.33226 10.08740 0.620095
## 6 -1.24259 -0.987467 1.959150 -1.316770 0.770285 -4.00533 2.21727 -4.599960
## PC133 PC134 PC135 PC136 PC137 PC138 PC139 PC140
## 1 1.247160 -3.6746600 6.08511 -5.12278 -3.44129 0.68073 0.884904 -4.72218
## 2 0.550375 2.0801000 -6.33822 6.22251 -4.86637 4.13430 -1.732350 5.29714
## 3 1.377060 -0.0354954 -5.76098 4.52578 1.33248 2.71518 -0.636026 -2.01015
## 4 -4.919670 -0.8878070 3.74182 3.33335 5.36889 -2.79551 3.634640 1.38568
## 5 7.066210 4.3528600 2.34308 -3.76563 -7.62249 -8.11352 2.224500 -2.35684
## 6 -1.291370 -6.2836800 4.32724 -1.94272 -2.85946 -4.05331 3.258220 -2.45105
## PC141 PC142 PC143 PC144 PC145 PC146 PC147
## 1 -1.444540 5.484490 -0.32021 -3.446660 -11.478100 1.359720 2.262930
## 2 1.657710 4.174880 1.08986 -10.475200 -0.206588 0.848232 0.693631
## 3 -0.854115 2.174070 -2.75772 -0.706346 4.781330 0.346067 -2.499990
## 4 -2.066620 -4.076950 1.22522 -0.198642 1.127730 -5.188380 -5.001110
## 5 -5.628000 -7.421560 6.22590 5.442630 0.855120 -2.511970 -1.218310
## 6 1.823440 -0.485569 -5.11459 1.353360 1.204220 5.280530 -2.129380
## PC148 PC149 PC150 PC151 PC152 PC153 PC154 PC155
## 1 -1.5995600 2.930680 1.25866 1.313810 3.953900 3.37841 -1.226190 -4.06738
## 2 0.0533152 -1.080660 2.98743 2.907660 -1.649560 5.18786 4.043370 1.24385
## 3 1.1897600 -2.096530 -2.58254 -3.632070 2.155250 -5.74233 -1.430910 4.59022
## 4 -3.4337100 -1.676370 3.10077 -0.548271 -4.798290 -1.05579 1.475150 6.12158
## 5 1.5200700 0.242815 -1.22193 -5.419560 0.755099 -2.93214 -6.324730 -1.97275
## 6 0.4135370 0.836087 7.43499 -0.662060 -1.328750 3.40554 0.793226 1.84796
## PC156 PC157 PC158 PC159 PC160 PC161 PC162 PC163
## 1 0.0200301 -0.198192 -4.09215 1.34319 5.45502 -5.16670 0.748815 -4.250780
## 2 -0.9412180 8.028920 4.53141 -2.14673 -5.54470 -3.44115 -0.306315 2.182890
## 3 -6.1530200 -6.446070 1.44059 -5.08515 -1.00983 -1.19545 -2.392580 1.128390
## 4 1.1947300 -2.754890 2.59877 5.17269 -4.48090 4.58743 -4.364330 -0.327329
## 5 6.7848300 1.190440 -5.43150 7.32264 -2.02349 -1.63856 -0.605595 3.293560
## 6 2.9913600 3.059010 3.59015 -11.93240 -3.18970 -1.92553 -0.481372 2.239820
## PC164 PC165 PC166 PC167 PC168 PC169 PC170 PC171
## 1 1.33749 1.21474 -6.598020 -0.795236 4.978780 0.560395 -3.30080 -4.185800
## 2 -5.11573 5.23798 2.314560 -6.867580 0.198715 -0.439490 8.39280 2.711710
## 3 -6.41499 -1.35580 -1.843520 4.107610 -1.846500 0.476608 6.01534 1.541340
## 4 2.89053 -2.85711 -1.651910 -0.805614 2.109260 -4.529850 -4.21240 -2.149130
## 5 6.69327 -7.24007 6.646710 -2.481420 -4.517500 -1.050480 -6.23273 4.969200
## 6 5.73663 3.07591 -0.642563 0.570129 0.363035 -4.123080 0.85659 0.543589
## PC172 PC173 PC174 PC175 PC176 PC177 PC178
## 1 0.717898 0.701147 -0.0744813 6.310380 0.673007 -6.672160 -0.547337
## 2 -0.238610 9.963270 -2.4448600 -16.645900 -5.058020 1.246670 -2.543140
## 3 -1.948520 1.676890 1.9756400 -0.651444 -0.388861 -4.898290 -5.976720
## 4 1.650750 -13.219700 -2.2461700 3.712520 -1.790180 -1.646410 3.921580
## 5 9.230910 3.536260 7.3502200 -3.973760 -4.863820 -0.490094 1.035400
## 6 -2.349120 -0.735585 -5.0606100 0.973047 1.675880 4.786290 -0.439128
## PC179 PC180 PC181 PC182 PC183 PC184 PC185 PC186
## 1 0.374826 -1.902990 -1.998040 -2.83480 -4.639700 -1.54106 -3.21807 3.63484
## 2 2.348450 -0.913489 3.398340 -0.22483 2.326280 4.18375 -3.63032 -7.29759
## 3 3.458210 2.874280 4.809760 1.29689 -0.611268 -1.61441 8.20865 5.30831
## 4 -1.150270 -3.200230 -1.130810 1.11435 -2.256870 -0.78954 -0.42869 8.50227
## 5 2.611880 6.006020 0.230703 4.81012 -3.384240 -1.40677 -4.23615 -3.91429
## 6 -0.911831 -2.733000 -2.226200 -2.12319 0.676775 3.86474 -2.42279 -8.56611
## PC187 PC188 PC189 PC190 PC191 PC192 PC193
## 1 -4.579180 -7.858830 0.335818 -4.92246 10.50470 2.906410 3.250850
## 2 -6.723570 -0.603431 -5.736950 -11.97710 2.50693 -3.615450 -1.908840
## 3 2.945630 1.479520 2.778580 1.53770 2.74395 4.408030 -4.887140
## 4 5.058180 1.477590 2.287680 2.16909 -15.67790 2.858210 1.051200
## 5 0.966594 -4.056130 1.024060 7.74491 -11.46850 -9.392760 -0.621817
## 6 1.316620 4.087070 8.082890 -3.17717 3.19539 0.125781 -4.479550
## PC194 PC195 PC196 PC197 PC198 PC199 PC200 PC201
## 1 -2.803420 0.940661 4.550060 -6.312980 -0.871334 2.97657 2.48236 0.176071
## 2 3.934200 3.359870 4.392700 -1.148540 -3.500150 8.54854 -5.29140 -0.345064
## 3 -0.479624 2.438460 -4.651430 4.272160 1.218910 -7.26198 -2.72198 -5.283790
## 4 4.348100 -3.088640 6.140210 -6.708430 6.038530 4.18459 -2.13864 14.368900
## 5 1.259300 -4.584890 8.022080 3.414910 -0.533752 2.07845 3.71373 -3.263110
## 6 -4.395400 -7.833460 0.521059 0.710471 -3.947820 -2.35297 1.21840 -0.378980
## PC202 PC203 PC204 PC205 PC206 PC207 PC208 PC209
## 1 -3.11112 -4.598450 -1.96079 3.52513 -1.31918 -6.670200 -1.34216 -3.612510
## 2 1.54712 2.344110 -2.94640 -3.13680 7.17757 -0.395078 -5.69215 8.461160
## 3 3.38690 5.952260 1.38994 -5.29183 -4.22535 9.260910 2.98283 -0.764994
## 4 5.31532 0.932129 -3.17659 -2.80713 2.56407 1.306090 2.79659 1.101850
## 5 -2.00416 -0.203443 4.38951 6.97284 -9.70525 -3.697460 6.39656 8.730730
## 6 2.50952 -1.602600 -2.30116 1.19615 -4.62051 -3.716480 6.20565 -5.826400
## PC210 PC211 PC212 PC213 PC214 PC215 PC216 PC217
## 1 -1.661940 -2.46879 6.710560 -2.8241400 7.22261 -3.770720 5.20565 3.42311
## 2 3.492260 -1.72476 10.872400 0.6363790 3.57742 -3.091950 1.20010 8.41381
## 3 -0.703084 2.14656 -6.903270 -0.0225667 -2.40430 3.660320 1.55056 -1.84901
## 4 -0.832008 3.17770 -2.990440 -4.8824500 4.54524 -0.998691 2.75592 -4.78552
## 5 -8.730790 7.31274 -0.552715 8.4335300 -1.56579 -0.817271 -12.71400 -1.70658
## 6 -8.740840 -5.74159 -1.670020 2.4412600 -3.45297 3.287710 6.58442 -2.43074
## PC218 PC219 PC220 PC221 PC222 PC223 PC224 PC225
## 1 -3.57521 -0.925743 -6.040000 5.841540 -4.217500 2.565470 1.25886 2.31512
## 2 -3.62146 -8.082930 -9.108710 -9.552030 0.604587 9.647750 1.56354 -2.98224
## 3 5.59194 0.255314 6.716710 -1.586710 1.778830 -2.208140 -3.38602 -1.42084
## 4 -3.74929 -1.932080 2.643500 3.766340 8.106450 -4.411350 -1.80712 -2.66210
## 5 -6.29215 12.235500 -0.865865 -0.144382 -0.940503 -0.838229 -8.17155 -1.57281
## 6 5.48879 -0.402248 -0.760478 -0.933320 2.795250 -5.700500 7.01591 -2.00649
## PC226 PC227 PC228 PC229 PC230 PC231 PC232
## 1 -5.812820 -0.556436 5.421390 0.973315 4.22507 7.74684 -2.824980
## 2 -8.759420 -5.133450 10.434700 1.852170 -3.21919 -3.28281 3.737270
## 3 5.424080 -2.494230 -0.398656 0.790017 2.30765 5.70697 -2.449790
## 4 1.892400 -2.255280 -10.998000 -4.597690 -15.59370 -10.99590 18.098300
## 5 6.300520 5.356360 -4.260340 1.476340 -9.80437 -3.12196 -4.577790
## 6 0.898544 4.905560 -4.256270 -13.299500 -5.55964 -6.69199 0.910271
## PC233 PC234 PC235 PC236 PC237 PC238 PC239
## 1 13.31080 -14.730400 -10.598300 5.86821 11.89720 9.15428 15.522100
## 2 4.15302 7.857400 6.537100 2.57194 -2.80241 -5.56853 0.433601
## 3 4.56916 -0.185496 -13.465200 1.58770 2.80303 -6.51858 11.355300
## 4 2.44302 -5.491020 21.092000 8.65642 -3.30788 10.82330 3.974270
## 5 -13.60080 9.396310 0.541495 -11.66070 -1.05810 -4.85984 0.621867
## 6 -11.91010 9.942480 7.015510 2.87224 -13.57450 -8.40561 -5.611740
## PC240 PC241 PC242 PC243 PC244 PC245 PC246
## 1 -4.644390 -11.122300 -6.166970 -3.32413 6.832570 -11.75250 -7.66241
## 2 12.428600 0.691729 -0.452836 4.99957 12.638600 12.76320 -8.00939
## 3 0.110384 -3.356570 5.381920 -7.69284 19.911200 -9.56644 -5.50014
## 4 1.485160 1.767310 -10.804000 11.09060 0.416645 -12.45470 -8.62727
## 5 2.528690 -4.375780 -0.512906 7.34698 1.382710 -8.82247 -1.21971
## 6 -1.892380 12.843200 4.894520 2.67951 -10.023300 3.72962 2.72864
## PC247 PC248 PC249 PC250 PC251 PC252 PC253
## 1 -6.937510 -12.83190 1.755680 3.4566200 12.67260 7.218590 -8.65325
## 2 -13.479700 5.93387 12.874600 11.8111000 -3.10706 4.321340 1.47639
## 3 -12.127900 -6.71963 -0.699389 8.0150400 -2.73580 -4.820810 6.34486
## 4 -7.063090 -11.45900 -8.729490 -0.0793172 2.14604 -0.974239 -4.99281
## 5 0.914657 1.14351 -2.689320 0.4449780 2.56107 -3.629580 -8.71877
## 6 0.316696 7.46687 -2.639550 3.7237300 6.38938 13.615600 -15.47010
## PC254 PC255 PC256 PC257 PC258 PC259 PC260
## 1 -6.25543 -6.73511 6.193890 8.329880 -4.816680 0.541965 3.560440
## 2 -2.23967 -3.84316 4.929060 2.167880 -1.366380 -8.534950 17.023800
## 3 -4.47488 5.17743 5.237630 4.352360 8.581240 4.618530 -6.328750
## 4 -2.15612 -1.94788 -2.493940 5.955880 -8.083960 3.160150 7.550890
## 5 -1.76590 8.92968 0.184374 -0.355971 0.469736 4.693120 -2.885510
## 6 -2.66822 -5.84437 -7.129670 -12.423400 2.449930 5.382430 -0.471878
## PC261 PC262 PC263 PC264 PC265 PC266 PC267 PC268
## 1 -4.425650 4.27069 -1.25455 -9.34464 5.559410 6.18695 0.462009 -9.02276
## 2 -2.629560 1.50408 -3.30473 -1.35286 -3.996380 -7.36740 1.449120 2.02989
## 3 4.362500 -4.13551 -2.78760 9.91492 3.298040 -16.04700 2.584200 10.84370
## 4 -0.347105 -2.87329 -3.63635 -2.70680 0.360816 -1.46211 -0.878445 -2.89191
## 5 -2.124490 2.83095 -1.44903 9.42488 0.346431 -9.87365 -3.597790 10.50730
## 6 -2.978680 -1.46830 -6.50351 7.48992 -5.964130 7.05490 -6.785280 3.70866
## PC269 PC270 PC271 PC272 PC273 PC274 PC275
## 1 1.141710 -7.186420 10.57470 -2.68709 3.47205 1.1103100 -0.810875
## 2 1.192420 2.483320 4.30524 6.19651 11.50560 -8.5042200 -1.974170
## 3 1.192010 6.057580 1.61979 7.68589 12.31950 -6.4028600 -1.019940
## 4 -0.576283 0.352734 7.91840 0.83963 3.54216 0.6112570 4.864560
## 5 2.559920 4.747950 -2.79979 8.44632 4.10378 0.0290276 -0.917252
## 6 10.045800 11.496600 -11.87330 4.83425 -14.90620 3.2880400 1.765770
## PC276 PC277 PC278 PC279 PC280 PC281 PC282
## 1 0.790267 3.214050 -2.5700800 -2.826450 -0.366929 4.122520 -2.259730
## 2 0.966456 -0.283686 0.0998602 -1.628590 -7.814720 -5.775000 2.118510
## 3 -4.030300 -2.866380 7.2347400 -2.698500 -4.185240 -0.281134 -1.627250
## 4 -3.583800 0.187923 0.4278200 -2.382940 -2.748640 0.440407 -5.040810
## 5 -3.616090 -6.134920 7.3913300 -0.984962 -2.159430 -0.970610 0.136633
## 6 1.738120 3.473030 -1.9091000 8.148860 2.158070 -2.777400 3.523600
## PC283 PC284 PC285 PC286 PC287 PC288 PC289
## 1 3.672340 1.8835700 5.066380 -2.349720 2.4786700 1.407310 -2.68301
## 2 -5.648640 1.1015000 -1.305600 0.533424 -1.2730100 0.831478 1.60040
## 3 -6.075130 1.7544100 0.249563 0.237337 -6.3643400 -2.454020 1.95367
## 4 0.228679 0.5514890 3.395440 -5.114750 0.0399911 0.445819 1.14122
## 5 -5.147220 -0.0582729 -0.038785 -1.173280 -1.9469400 0.726258 1.32271
## 6 0.387900 0.3683700 1.687890 3.454950 2.4010700 2.583450 -2.11218
## PC290 PC291 PC292 PC293 PC294 PC295 PC296
## 1 -0.753858 -0.542549 -1.997630 0.0354434 1.927740 1.1620000 1.838630
## 2 2.806650 1.957440 -0.168855 -1.2627400 -2.278180 -1.3238000 -1.647870
## 3 1.550630 3.414140 1.327070 -0.4078890 -2.231820 -0.4119370 0.250438
## 4 -3.312840 0.264682 -1.876620 0.4229650 1.819580 -0.0481879 -1.276520
## 5 -2.071300 -0.558831 1.313250 0.5746580 0.503964 0.0674113 -0.216368
## 6 -3.694120 2.995870 0.601978 -1.8855900 -1.583300 -1.1155900 1.513920
## PC297 PC298 PC299 PC300 PC301 PC302 PC303
## 1 -0.140480 -2.214850 -2.31808 -0.703568 -0.113414 -0.574813 -0.676687
## 2 -0.708318 1.880170 2.63775 -2.554080 -0.555429 0.606084 0.957965
## 3 -0.579125 1.810380 2.46392 1.421180 0.275171 0.218699 0.836659
## 4 -0.157423 -1.078660 -2.84694 -0.782049 1.190840 -0.917747 -2.590050
## 5 1.229620 0.976108 2.50623 -0.649626 0.888099 -0.722353 1.189520
## 6 1.612860 -0.807718 2.68794 -0.276612 -2.457360 -0.800217 0.289320
## PC304 PC305 PC306 PC307 PC308 PC309 PC310
## 1 -0.277139 1.008660 -0.378657 0.252205 1.480000 0.308352 0.3872220
## 2 0.984959 0.176792 -0.421508 -1.631460 -1.291760 0.393527 1.3237900
## 3 0.837330 -1.237040 0.667640 -0.916607 0.419896 2.058720 0.8124120
## 4 -0.992738 0.699266 -0.405584 0.334573 1.557710 1.619710 -0.0314967
## 5 1.052450 0.405087 2.204360 -1.001040 -0.162834 1.168010 0.9724400
## 6 0.907985 -0.268888 1.328200 0.103274 -0.802521 0.262558 -0.1756110
## PC311 PC312 PC313 PC314 PC315 PC316 PC317
## 1 -0.889394 -0.255358 -1.340710 -0.3103120 -1.312420 0.76824100 -0.0165334
## 2 -2.533370 0.148725 0.642613 -0.3722790 0.438960 -1.67704000 -1.2035500
## 3 -0.959478 -1.302850 1.367620 0.1548570 -0.390091 0.00345459 2.5224200
## 4 0.226174 -2.424980 1.293800 0.2648420 0.489714 -0.49734800 -1.0871300
## 5 0.335292 0.209168 0.374381 0.0715358 -1.294310 -0.42690300 1.7618000
## 6 -1.175180 -0.360316 -0.409936 -0.3753060 1.115560 0.34592600 -0.1183310
## PC318 PC319 PC320 PC321 PC322 PC323 PC324
## 1 -0.592226 -0.2698480 0.2902790 0.0689809 0.7612440 -0.0592515 0.0208142
## 2 -4.146340 0.3083650 0.2248010 0.1965100 -1.1406300 -0.2998080 0.1335550
## 3 0.516375 0.0640749 0.7191560 -0.2257290 -0.1598330 0.5081930 0.0913785
## 4 -3.886320 0.0446736 -0.4160680 -0.1636460 0.7368040 0.3300800 -0.3511050
## 5 1.589810 -0.0528384 0.7272740 -0.1102090 -0.0970665 0.2105090 -0.0522031
## 6 2.103760 0.9858290 -0.0967412 0.2292060 0.8340250 -0.5315110 0.0766531
## PC325 PC326 PC327 PC328 PC329 Individual
## 1 0.1203100 0.1043010 0.16595600 0.11606000 1.78132e-06 282
## 2 0.5015050 0.4163570 -0.16696300 -0.20619300 1.78132e-06 283
## 3 0.1932900 0.2411500 -0.22998900 0.06923640 1.78132e-06 280
## 4 0.0371669 0.0575980 0.21069900 0.14293000 1.78132e-06 279
## 5 0.0995012 -0.0928798 0.38160600 -0.00189713 1.78132e-06 286
## 6 -0.3880720 -0.6782730 0.00931281 0.01990270 1.78132e-06 287
## Pop_City Location Latitude Longitude Continent Year Region
## 1 Barcelona Barcelona, Spain 41.3851 2.1734 Europe 2018 Southern Europe
## 2 Barcelona Barcelona, Spain 41.3851 2.1734 Europe 2018 Southern Europe
## 3 Barcelona Barcelona, Spain 41.3851 2.1734 Europe 2018 Southern Europe
## 4 Barcelona Barcelona, Spain 41.3851 2.1734 Europe 2018 Southern Europe
## 5 Barcelona Barcelona, Spain 41.3851 2.1734 Europe 2018 Southern Europe
## 6 Barcelona Barcelona, Spain 41.3851 2.1734 Europe 2018 Southern Europe
## Subregion order order2 orderold
## 1 West Europe 16 8 8
## 2 West Europe 16 8 8
## 3 West Europe 16 8 8
## 4 West Europe 16 8 8
## 5 West Europe 16 8 8
## 6 West Europe 16 8 8
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_turkey_US_pc1_pc2_b.pdf"
),
width = 8,
height = 8,
units = "in"
)
PC 1 & PC 3
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "MAF_1", "PCA_lea_iberia_turkey_US_pc1_pc3_b.pdf"
),
width = 8,
height = 8,
units = "in"
)
chartcolors <- c("#66C2A5", "#c41A1C", "goldenrod", "#146c45", "#2524f9", "#a113b2", "#FF7F00")
pch=15
genotype <- here(
"euro_global/output/neuroadmixture/euro_native2_italy_all.vcf"
)
d <- read.vcfR(
genotype
)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22537
## column count: 339
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22537
## Character matrix gt cols: 339
## skip: 0
## nrows: 22537
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## BEN BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC KAG
## 12 13 12 14 12 16 2 12 4 7 12 4 11 4 5 9 12 2 6 12
## KAN KAT KLP KUN LAM MAT OKI QNC ROM SIC SON SSK SUF SUU TAI TRE UTS YUN
## 11 6 4 4 9 12 12 11 4 9 3 12 6 6 7 12 12 9
Convert format
##
## - number of detected individuals: 330
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.geno"
##
## - number of detected individuals: 330
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.removed file, for more informations.
##
##
## - number of detected individuals: 330
## - number of detected loci: 22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.lfmm"
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
We will do 5 repetitions
# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
genotype,
K = 1:25,
project = "new",
repetitions = 5,
percentage = 0.25,
iterations = 500,
CPU = 10,
entropy = TRUE
)
Cross entropy
# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","MAF_1","lea_cross_entropy_euro_native2_italy_all_r01_b.pdf"), width = 6, height = 4)
# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)
# Close the pdf file
dev.off()
## png
## 2
Summary of project
check with run is best
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9284443 0.8883377 0.8814999 0.8742773 0.8714906 0.8685528 0.8669024
## mean 0.9289609 0.8888186 0.8819074 0.8746872 0.8720530 0.8697568 0.8674704
## max 0.9295439 0.8893572 0.8823791 0.8751120 0.8724608 0.8710534 0.8681971
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8663302 0.8656725 0.8647275 0.8655945 0.8643631 0.8656917 0.8661961
## mean 0.8665036 0.8661961 0.8660245 0.8661466 0.8658581 0.8661260 0.8666861
## max 0.8668618 0.8666665 0.8674751 0.8674410 0.8676213 0.8672100 0.8672014
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8669973 0.8678448 0.8697238 0.8712374 0.8722511 0.8742418 0.8777928
## mean 0.8684575 0.8693895 0.8707579 0.8731435 0.8741718 0.8767588 0.8790701
## max 0.8697920 0.8701229 0.8736439 0.8746222 0.8755863 0.8805344 0.8812548
## K = 22 K = 23 K = 24 K = 25
## min 0.8794246 0.8832928 0.8826137 0.8861262
## mean 0.8824751 0.8852063 0.8854825 0.8883168
## max 0.8848575 0.8913951 0.8885691 0.8898279
# get the cross-entropy of all runs for K = 10
ce10 = cross.entropy(project, K = 10)
ce10 #run 2 is best for k=10
## K = 10
## run 1 0.8674751
## run 2 0.8647275
## run 3 0.8673379
## run 4 0.8656414
## run 5 0.8649409
# get the cross-entropy of all runs for K = 5
ce5 = cross.entropy(project, K = 5)
ce5 #run 2 is best for k=5
## K = 5
## run 1 0.8724608
## run 2 0.8714906
## run 3 0.8722575
## run 4 0.8722302
## run 5 0.8718258
# get the cross-entropy of all runs for K = 7
ce7 = cross.entropy(project, K = 7)
ce7 #run 2 is best for k=7
## K = 7
## run 1 0.8676905
## run 2 0.8669024
## run 3 0.8675180
## run 4 0.8681971
## run 5 0.8670443
using ggplot
best = which.min(cross.entropy(project, K = 7)) #2
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:7)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_7[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=12.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_7)
Extract ancestry coefficients for K=7
change to correct matrix
leak7 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.snmf/K7/run2/euro_native2_italy_all_r2.7.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak7)
## # A tibble: 6 × 7
## X1 X2 X3 X4 X5 X6 X7
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0657 0.0204 0.585 0.0137 0.0746 0.229 0.0116
## 2 0.100 0.0460 0.484 0.0435 0.0729 0.239 0.0147
## 3 0.000100 0.00976 0.986 0.00429 0.000100 0.000100 0.000100
## 4 0.0172 0.000100 0.912 0.0300 0.0407 0.000100 0.000100
## 5 0.0157 0.000100 0.922 0.0412 0.0139 0.00718 0.000100
## 6 0.000100 0.000100 0.999 0.000100 0.000100 0.000100 0.000100
The fam file
fam_file <- here("euro_global/output/neuroadmixture/euro_native2_italy_all.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 0.065675700 0.020350700 0.585107 0.01372930 0.074562700 0.228981000
## 2 1002 OKI 0.099996700 0.046040100 0.484319 0.04348660 0.072893000 0.238553000
## 3 1003 OKI 0.000099964 0.009755190 0.985557 0.00428777 0.000099964 0.000099964
## 4 1004 OKI 0.017164400 0.000099973 0.911757 0.03002970 0.040749400 0.000099973
## 5 1005 OKI 0.015661800 0.000099982 0.921853 0.04116820 0.013933200 0.007183290
## 6 1006 OKI 0.000099950 0.000099950 0.999400 0.00009995 0.000099950 0.000099950
## X7
## 1 0.011593700
## 2 0.014711700
## 3 0.000099964
## 4 0.000099973
## 5 0.000099982
## 6 0.000099950
Rename the columns
# Rename the columns starting from the third one
leak7 <- leak7 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak7)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 0.065675700 0.020350700 0.585107 0.01372930 0.074562700 0.228981000
## 2 1002 OKI 0.099996700 0.046040100 0.484319 0.04348660 0.072893000 0.238553000
## 3 1003 OKI 0.000099964 0.009755190 0.985557 0.00428777 0.000099964 0.000099964
## 4 1004 OKI 0.017164400 0.000099973 0.911757 0.03002970 0.040749400 0.000099973
## 5 1005 OKI 0.015661800 0.000099982 0.921853 0.04116820 0.013933200 0.007183290
## 6 1006 OKI 0.000099950 0.000099950 0.999400 0.00009995 0.000099950 0.000099950
## v7
## 1 0.011593700
## 2 0.014711700
## 3 0.000099964
## 4 0.000099973
## 5 0.000099982
## 6 0.000099950
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak7 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:7)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_7[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_7) +
expand_limits(y = c(0, 1.5))
color_palette_10 <-
c(
"#00FF00",
"#FFFF00",
"#FF0000",
"#0000FF",
"orange",
"purple",
"#1E90FF",
"magenta",
"#75FAFF",
"#F49AC2"
)
using ggplot
best = which.min(cross.entropy(project, K = 10)) #2
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 10, run = best))
# Create a named vector to map countries to regions
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:10)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_10[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=12.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_10)
Extract ancestry coefficients for k=10
change to correct matrix
leak10 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_italy_all.snmf/K10/run2/euro_native2_italy_all_r2.10.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak10)
## # A tibble: 6 × 10
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.77e-1 1.15e-1 1.00e-4 0.481 2.94e-2 3.71e-2 4.58e-2 1.02e-1 1.10e-2 1.81e-3
## 2 1.63e-1 6.11e-2 1.00e-4 0.358 1.17e-1 7.46e-2 1.53e-2 1.82e-1 1.00e-4 2.89e-2
## 3 9.99e-5 9.99e-5 6.97e-3 0.991 9.99e-5 9.99e-5 1.47e-3 9.99e-5 9.99e-5 9.99e-5
## 4 5.52e-3 3.92e-2 1.00e-4 0.875 1.23e-2 4.73e-2 8.01e-4 1.00e-4 1.00e-4 1.97e-2
## 5 1.76e-3 4.30e-2 1.00e-4 0.895 1.00e-4 4.25e-2 1.00e-4 1.72e-2 1.00e-4 1.00e-4
## 6 9.99e-5 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
The fam file
fam_file <- here("euro_global/output/neuroadmixture/euro_native2_italy_all.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 1.77105e-01 1.14984e-01 9.99910e-05 0.481022 2.93972e-02 3.71193e-02
## 2 1002 OKI 1.62571e-01 6.11419e-02 9.99820e-05 0.358093 1.17088e-01 7.46483e-02
## 3 1003 OKI 9.99370e-05 9.99370e-05 6.96579e-03 0.990862 9.99370e-05 9.99370e-05
## 4 1004 OKI 5.52047e-03 3.91880e-02 9.99730e-05 0.874837 1.22991e-02 4.73223e-02
## 5 1005 OKI 1.76204e-03 4.30026e-02 9.99550e-05 0.895062 9.99550e-05 4.24868e-02
## 6 1006 OKI 9.99201e-05 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05
## X7 X8 X9 X10
## 1 4.58028e-02 1.01648e-01 1.10103e-02 1.81238e-03
## 2 1.53470e-02 1.81973e-01 9.99820e-05 2.89379e-02
## 3 1.47223e-03 9.99370e-05 9.99370e-05 9.99370e-05
## 4 8.01401e-04 9.99730e-05 9.99730e-05 1.97314e-02
## 5 9.99550e-05 1.71871e-02 9.99550e-05 9.99550e-05
## 6 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
Rename the columns
# Rename the columns starting from the third one
leak10 <- leak10 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak10)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 1.77105e-01 1.14984e-01 9.99910e-05 0.481022 2.93972e-02 3.71193e-02
## 2 1002 OKI 1.62571e-01 6.11419e-02 9.99820e-05 0.358093 1.17088e-01 7.46483e-02
## 3 1003 OKI 9.99370e-05 9.99370e-05 6.96579e-03 0.990862 9.99370e-05 9.99370e-05
## 4 1004 OKI 5.52047e-03 3.91880e-02 9.99730e-05 0.874837 1.22991e-02 4.73223e-02
## 5 1005 OKI 1.76204e-03 4.30026e-02 9.99550e-05 0.895062 9.99550e-05 4.24868e-02
## 6 1006 OKI 9.99201e-05 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05
## v7 v8 v9 v10
## 1 4.58028e-02 1.01648e-01 1.10103e-02 1.81238e-03
## 2 1.53470e-02 1.81973e-01 9.99820e-05 2.89379e-02
## 3 1.47223e-03 9.99370e-05 9.99370e-05 9.99370e-05
## 4 8.01401e-04 9.99730e-05 9.99730e-05 1.97314e-02
## 5 9.99550e-05 1.71871e-02 9.99550e-05 9.99550e-05
## 6 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak10 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:10)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_10[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=10.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_10) +
expand_limits(y = c(0, 1.5))
ggsave(
here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "native_italy", "lea_k=10_native_italy_MAF1.pdf"),
width = 12,
height = 6,
units = "in",
device = cairo_pdf
)
Clean env & memory
# Remove all objects from the environment
rm(list = ls())
# Run the garbage collector to free up memory
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3233303 172.7 5892838 314.8 5892838 314.8
## Vcells 6299688 48.1 68763439 524.7 84253609 642.9
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22642
## column count: 362
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22642
## Character matrix gt cols: 362
## skip: 0
## nrows: 22642
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22642
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## BEN BER BRE CAM CES CHA DES GEL HAI HAN HOC HUN IMP INJ INW ITB ITP ITR JAF KAC
## 12 12 13 12 14 12 16 2 12 4 7 12 4 11 4 5 9 12 2 6
## KAG KAN KAT KLP KUN LAM MAT OKI PAL QNC ROM SIC SON SSK SUF SUU TAI TRE UTS YUN
## 12 11 6 4 4 9 12 12 11 11 4 9 3 12 6 6 7 12 12 9
Convert format
##
## - number of detected individuals: 353
## - number of detected loci: 22642
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.geno"
##
## - number of detected individuals: 353
## - number of detected loci: 22642
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.removed file, for more informations.
##
##
## - number of detected individuals: 353
## - number of detected loci: 22642
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.lfmm"
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
We will do 5 repetitions
# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
genotype,
K = 1:25,
project = "new",
repetitions = 5,
percentage = 0.25,
iterations = 500,
CPU = 10,
entropy = TRUE
)
Cross entropy
# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "native_italy", "lea_cross_entropy_italy_all_and_US_r01_b.pdf"), width = 6, height = 4)
# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)
# Close the pdf file
dev.off()
## png
## 2
Summary of project
check with run is best
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9268937 0.8877699 0.8813283 0.8744758 0.8719716 0.8693805 0.8675017
## mean 0.9272783 0.8880830 0.8816350 0.8749238 0.8730286 0.8702210 0.8682202
## max 0.9275629 0.8884493 0.8819855 0.8755391 0.8748210 0.8711330 0.8695598
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8665617 0.8660492 0.8652353 0.8653178 0.8640970 0.8651959 0.8661665
## mean 0.8677238 0.8665552 0.8661283 0.8660485 0.8656138 0.8663148 0.8668948
## max 0.8697357 0.8671418 0.8668879 0.8671324 0.8669948 0.8691199 0.8673556
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8661263 0.8674308 0.8684976 0.8693216 0.8712749 0.8715545 0.8731175
## mean 0.8672159 0.8684909 0.8692299 0.8704666 0.8721665 0.8745922 0.8754878
## max 0.8686471 0.8702589 0.8700549 0.8714425 0.8733087 0.8779314 0.8794419
## K = 22 K = 23 K = 24 K = 25
## min 0.8762912 0.8799641 0.8797621 0.8838846
## mean 0.8787773 0.8803735 0.8821449 0.8847680
## max 0.8828910 0.8806781 0.8844514 0.8862852
# get the cross-entropy of all runs for K = 10
ce10 = cross.entropy(project, K = 10)
ce10 #run 2 is best for k=10
## K = 10
## run 1 0.8668879
## run 2 0.8652353
## run 3 0.8660570
## run 4 0.8664507
## run 5 0.8660108
# get the cross-entropy of all runs for K = 7
ce7 = cross.entropy(project, K = 7)
ce7 #run 2 is best for k=7
## K = 7
## run 1 0.8695598
## run 2 0.8675017
## run 3 0.8678661
## run 4 0.8682642
## run 5 0.8679091
color_palette_7 <-
c(
"#77DD77",
"yellow2",
"magenta",
"#75FAFF",
"orange",
"#1E90FF",
"orangered"
)
using ggplot
best = which.min(cross.entropy(project, K = 7)) #2
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:7)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_7[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=7.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_7)
Extract ancestry coefficients for K=7
change to correct matrix
leak7 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.snmf/K7/run2/dapc_italy_all_and_US_r2.7.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak7)
## # A tibble: 6 × 7
## X1 X2 X3 X4 X5 X6 X7
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.601 0.0661 0.00149 0.0456 0.213 0.0388 0.0345
## 2 0.482 0.0982 0.0615 0.0976 0.203 0.000100 0.0576
## 3 0.999 0.000100 0.000100 0.000100 0.000100 0.000100 0.000100
## 4 0.915 0.0145 0.0464 0.0236 0.000100 0.000100 0.000100
## 5 0.927 0.0248 0.0232 0.0244 0.000100 0.000100 0.000100
## 6 0.999 0.000100 0.000100 0.000100 0.000100 0.000100 0.000100
The fam file
fam_file <- here("euro_global/output/dapc/dapc_italy_all_and_US.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 0.600727 0.06607960 0.00149369 0.04558840 0.212836000 0.038797300
## 2 1002 OKI 0.482055 0.09821330 0.06151460 0.09755160 0.203013000 0.000099991
## 3 1003 OKI 0.999400 0.00009995 0.00009995 0.00009995 0.000099950 0.000099950
## 4 1004 OKI 0.915228 0.01449530 0.04638100 0.02359630 0.000099973 0.000099973
## 5 1005 OKI 0.927280 0.02484630 0.02316850 0.02440550 0.000099973 0.000099973
## 6 1006 OKI 0.999400 0.00009995 0.00009995 0.00009995 0.000099950 0.000099950
## X7
## 1 0.034478100
## 2 0.057552800
## 3 0.000099950
## 4 0.000099973
## 5 0.000099973
## 6 0.000099950
Rename the columns
# Rename the columns starting from the third one
leak7 <- leak7 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak7)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 0.600727 0.06607960 0.00149369 0.04558840 0.212836000 0.038797300
## 2 1002 OKI 0.482055 0.09821330 0.06151460 0.09755160 0.203013000 0.000099991
## 3 1003 OKI 0.999400 0.00009995 0.00009995 0.00009995 0.000099950 0.000099950
## 4 1004 OKI 0.915228 0.01449530 0.04638100 0.02359630 0.000099973 0.000099973
## 5 1005 OKI 0.927280 0.02484630 0.02316850 0.02440550 0.000099973 0.000099973
## 6 1006 OKI 0.999400 0.00009995 0.00009995 0.00009995 0.000099950 0.000099950
## v7
## 1 0.034478100
## 2 0.057552800
## 3 0.000099950
## 4 0.000099973
## 5 0.000099973
## 6 0.000099950
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak7 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:7)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_7[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_7) +
expand_limits(y = c(0, 1.5))
color_palette_10 <-
c(
"#1E90FF",
"#75FAFF",
"orangered",
"#77DD77",
"orange",
"magenta",
"yellow2",
"navy",
"#008080",
"green4"
)
best = which.min(cross.entropy(project, K = 10)) #2
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 10, run = best))
# Create a named vector to map countries to regions
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:10)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_10[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=10.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_10)
Extract ancestry coefficients for K=10
change to correct matrix
leak10 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/dapc/dapc_italy_all_and_US.snmf/K10/run2/dapc_italy_all_and_US_r2.10.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak10)
## # A tibble: 6 × 10
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3.03e-2 3.70e-2 5.68e-2 3.26e-2 1.06e-1 1.37e-3 2.06e-1 0.458 6.21e-2 9.26e-3
## 2 1.00e-4 1.00e-4 1.02e-1 6.74e-2 1.74e-1 1.52e-2 1.60e-1 0.335 1.09e-1 3.62e-2
## 3 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5
## 4 1.00e-4 1.00e-4 1.00e-4 2.34e-2 1.55e-3 5.97e-2 3.82e-2 0.844 3.25e-2 1.00e-4
## 5 1.00e-4 1.40e-3 1.00e-4 1.41e-2 6.60e-3 1.60e-2 2.90e-2 0.907 2.56e-2 1.00e-4
## 6 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5
The fam file
fam_file <- here("euro_global/output/dapc/dapc_italy_all_and_US.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 3.02897e-02 3.70008e-02 5.68101e-02 3.25922e-02 1.06060e-01
## 2 1002 OKI 9.99820e-05 9.99820e-05 1.01823e-01 6.74361e-02 1.74391e-01
## 3 1003 OKI 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## 4 1004 OKI 9.99640e-05 9.99640e-05 9.99640e-05 2.34413e-02 1.54534e-03
## 5 1005 OKI 9.99730e-05 1.40348e-03 9.99730e-05 1.40783e-02 6.59955e-03
## 6 1006 OKI 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## X6 X7 X8 X9 X10
## 1 1.36729e-03 2.06353e-01 0.458153 6.21146e-02 9.25911e-03
## 2 1.52188e-02 1.60348e-01 0.335293 1.09113e-01 3.61762e-02
## 3 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05
## 4 5.97380e-02 3.81962e-02 0.844131 3.25481e-02 9.99640e-05
## 5 1.60403e-02 2.89676e-02 0.906966 2.56453e-02 9.99730e-05
## 6 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05
Rename the columns
# Rename the columns starting from the third one
leak10 <- leak10 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak10)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 3.02897e-02 3.70008e-02 5.68101e-02 3.25922e-02 1.06060e-01
## 2 1002 OKI 9.99820e-05 9.99820e-05 1.01823e-01 6.74361e-02 1.74391e-01
## 3 1003 OKI 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## 4 1004 OKI 9.99640e-05 9.99640e-05 9.99640e-05 2.34413e-02 1.54534e-03
## 5 1005 OKI 9.99730e-05 1.40348e-03 9.99730e-05 1.40783e-02 6.59955e-03
## 6 1006 OKI 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## v6 v7 v8 v9 v10
## 1 1.36729e-03 2.06353e-01 0.458153 6.21146e-02 9.25911e-03
## 2 1.52188e-02 1.60348e-01 0.335293 1.09113e-01 3.61762e-02
## 3 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05
## 4 5.97380e-02 3.81962e-02 0.844131 3.25481e-02 9.99640e-05
## 5 1.60403e-02 2.89676e-02 0.906966 2.56453e-02 9.99730e-05
## 6 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak10 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:10)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_10[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=10.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_10) +
expand_limits(y = c(0, 1.5))
genotype <- here(
"euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.vcf"
)
d <- read.vcfR(
genotype
)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22537
## column count: 323
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22537
## Character matrix gt cols: 323
## skip: 0
## nrows: 22537
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## ALD ALV BEN BER CAM CHA CRO GEL GRA GRC HAI HAN HOC HUN INJ INW JAF KAC KAG KAN
## 10 12 12 12 12 12 12 2 11 10 12 4 7 12 11 4 2 6 12 11
## KAT KLP KUN LAM MAT OKI PAL QNC SON SSK SUF SUU TAI TIR UTS YUN
## 6 4 4 9 12 12 11 11 3 12 6 6 7 4 12 9
Convert format
##
## - number of detected individuals: 314
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.geno"
##
## - number of detected individuals: 314
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.removed file, for more informations.
##
##
## - number of detected individuals: 314
## - number of detected loci: 22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.lfmm"
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
We will do 5 repetitions
# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
genotype,
K = 1:25,
project = "new",
repetitions = 5,
percentage = 0.25,
iterations = 500,
CPU = 10,
entropy = TRUE
)
project = load.snmfProject("euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.snmfProject")
Cross entropy
# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","MAF_1", "albania_croatia_greece", "lea_cross_entropy_albania_croatia_greece_r01_b.pdf"), width = 6, height = 4)
# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)
# Close the pdf file
dev.off()
## png
## 2
Summary of project
check with run is best
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9233611 0.8893582 0.8820397 0.8755814 0.8720341 0.8713425 0.8702904
## mean 0.9236991 0.8898473 0.8829263 0.8762218 0.8727421 0.8721093 0.8713905
## max 0.9241163 0.8903764 0.8838403 0.8767106 0.8732175 0.8728521 0.8723603
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8696857 0.8693687 0.8690723 0.8705174 0.8707420 0.8725734 0.8725764
## mean 0.8707711 0.8700650 0.8704738 0.8710626 0.8714378 0.8731421 0.8742283
## max 0.8711857 0.8707463 0.8714205 0.8720667 0.8722824 0.8742300 0.8748453
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8741498 0.8765791 0.8777430 0.8804020 0.8840067 0.8862442 0.8867222
## mean 0.8761364 0.8777634 0.8796934 0.8815985 0.8853774 0.8887773 0.8904791
## max 0.8778219 0.8801763 0.8822988 0.8834231 0.8865690 0.8908116 0.8925081
## K = 22 K = 23 K = 24 K = 25
## min 0.8912113 0.8957265 0.8983535 0.9041044
## mean 0.8923743 0.8978368 0.9009491 0.9050913
## max 0.8929114 0.8993496 0.9025162 0.9064686
# get the cross-entropy of all runs for K = 10
ce10 = cross.entropy(project, K = 10)
ce10 #run 5 is best for k=10
## K = 10
## run 1 0.8707718
## run 2 0.8708009
## run 3 0.8703036
## run 4 0.8714205
## run 5 0.8690723
## K = 7
## run 1 0.8720676
## run 2 0.8723603
## run 3 0.8705620
## run 4 0.8716721
## run 5 0.8702904
using ggplot
best = which.min(cross.entropy(project, K = 7)) #2
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))
# Create a named vector to map countries to regions
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:7)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_7[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=7.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_7)
Extract ancestry coefficients for K=7
change to correct matrix
leak7 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.snmf/K7/run5/euro_native2_albania_croatia_greece_US_r5.7.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak7)
## # A tibble: 6 × 7
## X1 X2 X3 X4 X5 X6 X7
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0749 0.0949 0.127 0.0358 0.0119 0.00637 0.649
## 2 0.124 0.0895 0.137 0.0723 0.0179 0.000100 0.559
## 3 0.000100 0.000100 0.000100 0.000100 0.00434 0.00426 0.991
## 4 0.0257 0.0349 0.0305 0.0166 0.00141 0.00607 0.885
## 5 0.0183 0.000100 0.0578 0.000100 0.000100 0.000100 0.923
## 6 0.000100 0.000100 0.000100 0.000100 0.000100 0.000100 0.999
The fam file
fam_file <- here("euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 0.074938700 0.094872400 0.126692000 0.035842500 0.011863100
## 2 1002 OKI 0.124185000 0.089502600 0.137391000 0.072307900 0.017885400
## 3 1003 OKI 0.000099964 0.000099964 0.000099964 0.000099964 0.004338800
## 4 1004 OKI 0.025675400 0.034944100 0.030518100 0.016572200 0.001405040
## 5 1005 OKI 0.018338000 0.000099964 0.057766800 0.000099964 0.000099964
## 6 1006 OKI 0.000099950 0.000099950 0.000099950 0.000099950 0.000099950
## X6 X7
## 1 0.006368430 0.649423
## 2 0.000099991 0.558628
## 3 0.004255540 0.991006
## 4 0.006070940 0.884814
## 5 0.000099964 0.923495
## 6 0.000099950 0.999400
Rename the columns
# Rename the columns starting from the third one
leak7 <- leak7 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak7)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 0.074938700 0.094872400 0.126692000 0.035842500 0.011863100
## 2 1002 OKI 0.124185000 0.089502600 0.137391000 0.072307900 0.017885400
## 3 1003 OKI 0.000099964 0.000099964 0.000099964 0.000099964 0.004338800
## 4 1004 OKI 0.025675400 0.034944100 0.030518100 0.016572200 0.001405040
## 5 1005 OKI 0.018338000 0.000099964 0.057766800 0.000099964 0.000099964
## 6 1006 OKI 0.000099950 0.000099950 0.000099950 0.000099950 0.000099950
## v6 v7
## 1 0.006368430 0.649423
## 2 0.000099991 0.558628
## 3 0.004255540 0.991006
## 4 0.006070940 0.884814
## 5 0.000099964 0.923495
## 6 0.000099950 0.999400
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak7 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:7)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_7[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_7) +
expand_limits(y = c(0, 1.5))
color_palette_10 <-
c(
"#75FAFF",
"navy",
"orangered",
"yellow2",
"#FFFF99",
"green",
"magenta",
"orange",
"#008080",
"goldenrod"
)
using ggplot
best = which.min(cross.entropy(project, K = 10)) #5
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 10, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:10)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_10[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=10.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_10)
Extract ancestry coefficients for k=10
change to correct matrix
leak10 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.snmf/K10/run5/euro_native2_albania_croatia_greece_US_r5.10.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak10)
## # A tibble: 6 × 10
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2.35e-2 8.97e-2 0.484 4.88e-2 3.95e-2 2.54e-2 6.44e-3 1.26e-1 1.23e-2 1.44e-1
## 2 5.29e-2 8.05e-2 0.328 7.14e-2 7.53e-2 1.00e-4 6.83e-3 1.89e-1 9.41e-3 1.86e-1
## 3 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 4 1.00e-4 1.00e-4 0.847 3.33e-2 1.09e-2 4.57e-2 1.00e-4 5.68e-2 5.79e-3 1.00e-4
## 5 8.97e-3 1.00e-4 0.884 2.42e-2 1.00e-4 1.00e-4 1.00e-4 3.45e-2 1.00e-4 4.74e-2
## 6 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
The fam file
fam_file <- here("euro_global/output/neuroadmixture/euro_native2_albania_croatia_greece_US.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 2.35101e-02 8.97365e-02 0.483871 4.88332e-02 3.95461e-02 2.54034e-02
## 2 1002 OKI 5.28657e-02 8.04670e-02 0.328388 7.14030e-02 7.52950e-02 9.99910e-05
## 3 1003 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
## 4 1004 OKI 9.99640e-05 9.99640e-05 0.847077 3.33007e-02 1.09430e-02 4.57350e-02
## 5 1005 OKI 8.96517e-03 9.99550e-05 0.884467 2.41564e-02 9.99550e-05 9.99550e-05
## 6 1006 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
## X7 X8 X9 X10
## 1 6.44432e-03 1.26041e-01 1.23340e-02 1.44280e-01
## 2 6.82661e-03 1.89227e-01 9.40816e-03 1.86019e-01
## 3 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## 4 9.99640e-05 5.67593e-02 5.78536e-03 9.99640e-05
## 5 9.99550e-05 3.45474e-02 9.99550e-05 4.73644e-02
## 6 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
Rename the columns
# Rename the columns starting from the third one
leak10 <- leak10 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak10)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 2.35101e-02 8.97365e-02 0.483871 4.88332e-02 3.95461e-02 2.54034e-02
## 2 1002 OKI 5.28657e-02 8.04670e-02 0.328388 7.14030e-02 7.52950e-02 9.99910e-05
## 3 1003 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
## 4 1004 OKI 9.99640e-05 9.99640e-05 0.847077 3.33007e-02 1.09430e-02 4.57350e-02
## 5 1005 OKI 8.96517e-03 9.99550e-05 0.884467 2.41564e-02 9.99550e-05 9.99550e-05
## 6 1006 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
## v7 v8 v9 v10
## 1 6.44432e-03 1.26041e-01 1.23340e-02 1.44280e-01
## 2 6.82661e-03 1.89227e-01 9.40816e-03 1.86019e-01
## 3 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## 4 9.99640e-05 5.67593e-02 5.78536e-03 9.99640e-05
## 5 9.99550e-05 3.45474e-02 9.99550e-05 4.73644e-02
## 6 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak10 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:10)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_10[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=10.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_10) +
expand_limits(y = c(0, 1.5))
genotype <- here(
"euro_global/output/neuroadmixture/native_far_east_euro2.vcf"
)
d <- read.vcfR(
genotype
)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22537
## column count: 347
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22537
## Character matrix gt cols: 347
## skip: 0
## nrows: 22537
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## ALU ARM BEN CAM CHA GEL GES HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KER KLP
## 12 10 12 12 12 2 12 12 4 7 12 11 4 2 6 12 11 6 12 4
## KRA KUN LAM MAT OKI QNC RAR SEV SOC SON SSK SUF SUU TAI TIK UTS YUN
## 12 4 9 12 12 11 12 12 12 3 12 6 6 7 12 12 9
Convert format
##
## - number of detected individuals: 338
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.geno"
##
## - number of detected individuals: 338
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.removed file, for more informations.
##
##
## - number of detected individuals: 338
## - number of detected loci: 22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.lfmm"
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
We will do 5 repetitions
# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
genotype,
K = 1:25,
project = "new",
repetitions = 5,
percentage = 0.25,
iterations = 500,
CPU = 10,
entropy = TRUE
)
Cross entropy
# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","MAF_1", "eastern_europe", "lea_cross_entropy_eastern_europe_r01_b.pdf"), width = 6, height = 4)
# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)
# Close the pdf file
dev.off()
## png
## 2
Summary of project
check with run is best
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9217190 0.8821233 0.8717093 0.8645070 0.8610212 0.8604267 0.8590902
## mean 0.9222289 0.8827113 0.8721769 0.8650111 0.8616927 0.8609514 0.8601843
## max 0.9230844 0.8835033 0.8728852 0.8657793 0.8625686 0.8616257 0.8613609
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8589110 0.8585290 0.8581418 0.8579527 0.8583874 0.8592648 0.8591720
## mean 0.8594724 0.8591332 0.8587629 0.8587118 0.8590905 0.8594949 0.8600420
## max 0.8599754 0.8603889 0.8594127 0.8591350 0.8595783 0.8598640 0.8611707
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8596772 0.8612203 0.8625132 0.8631991 0.8659968 0.8663554 0.8683850
## mean 0.8607397 0.8614333 0.8637799 0.8649254 0.8677074 0.8693006 0.8711266
## max 0.8623412 0.8618979 0.8647681 0.8676732 0.8691997 0.8711685 0.8740780
## K = 22 K = 23 K = 24 K = 25
## min 0.8697842 0.8725891 0.8741687 0.8757722
## mean 0.8735215 0.8756872 0.8763341 0.8776887
## max 0.8754081 0.8779592 0.8797642 0.8809103
# get the cross-entropy of all runs for K = 10
ce10 = cross.entropy(project, K = 10)
ce10 #run 3 is best for k=10
## K = 10
## run 1 0.8586326
## run 2 0.8588221
## run 3 0.8581418
## run 4 0.8588052
## run 5 0.8594127
## K = 6
## run 1 0.8616257
## run 2 0.8611494
## run 3 0.8604267
## run 4 0.8606753
## run 5 0.8608797
## K = 7
## run 1 0.8603667
## run 2 0.8613609
## run 3 0.8602921
## run 4 0.8590902
## run 5 0.8598116
using ggplot
best = which.min(cross.entropy(project, K = 7)) #2
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))
# Create a named vector to map countries to regions
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:7)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_7[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=7.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_7)
Extract ancestry coefficients for k=7
change to correct matrix
leak7 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.snmf/K7/run4/native_far_east_euro2_r4.7.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak7)
## # A tibble: 6 × 7
## X1 X2 X3 X4 X5 X6 X7
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0228 0.133 0.570 0.0853 0.0902 0.0324 0.0673
## 2 0.0793 0.238 0.385 0.139 0.0208 0.0228 0.114
## 3 0.000100 0.000100 0.999 0.000100 0.000100 0.000100 0.000100
## 4 0.0117 0.0127 0.890 0.0480 0.000100 0.000100 0.0372
## 5 0.000100 0.0364 0.905 0.0137 0.0305 0.000100 0.0145
## 6 0.000100 0.000100 0.999 0.000100 0.000100 0.000100 0.000100
The fam file
fam_file <- here("euro_global/output/neuroadmixture/native_far_east_euro2.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 0.022762300 0.13252800 0.569580 0.08532410 0.090171400 0.032372900
## 2 1002 OKI 0.079327200 0.23798600 0.385303 0.13944400 0.020824700 0.022758000
## 3 1003 OKI 0.000099950 0.00009995 0.999400 0.00009995 0.000099950 0.000099950
## 4 1004 OKI 0.011698200 0.01266560 0.890219 0.04800090 0.000099982 0.000099982
## 5 1005 OKI 0.000099982 0.03638430 0.904681 0.01372770 0.030542900 0.000099982
## 6 1006 OKI 0.000099950 0.00009995 0.999400 0.00009995 0.000099950 0.000099950
## X7
## 1 0.06726200
## 2 0.11435800
## 3 0.00009995
## 4 0.03721680
## 5 0.01446400
## 6 0.00009995
Rename the columns
# Rename the columns starting from the third one
leak7 <- leak7 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak7)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 0.022762300 0.13252800 0.569580 0.08532410 0.090171400 0.032372900
## 2 1002 OKI 0.079327200 0.23798600 0.385303 0.13944400 0.020824700 0.022758000
## 3 1003 OKI 0.000099950 0.00009995 0.999400 0.00009995 0.000099950 0.000099950
## 4 1004 OKI 0.011698200 0.01266560 0.890219 0.04800090 0.000099982 0.000099982
## 5 1005 OKI 0.000099982 0.03638430 0.904681 0.01372770 0.030542900 0.000099982
## 6 1006 OKI 0.000099950 0.00009995 0.999400 0.00009995 0.000099950 0.000099950
## v7
## 1 0.06726200
## 2 0.11435800
## 3 0.00009995
## 4 0.03721680
## 5 0.01446400
## 6 0.00009995
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak7 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:7)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_7[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_7) +
expand_limits(y = c(0, 1.5))
using ggplot
best = which.min(cross.entropy(project, K = 6)) #3
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 6, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:6)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_6[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=6.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_6)
Extract ancestry coefficients for K=6
change to correct matrix
leak6 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.snmf/K6/run3/native_far_east_euro2_r3.6.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak6)
## # A tibble: 6 × 6
## X1 X2 X3 X4 X5 X6
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0884 0.0378 0.104 0.0444 0.0536 0.671
## 2 0.0801 0.0356 0.142 0.0504 0.110 0.583
## 3 0.00626 0.000100 0.000100 0.000100 0.000100 0.993
## 4 0.0231 0.000100 0.0268 0.00188 0.0269 0.921
## 5 0.0367 0.000100 0.0161 0.000100 0.000100 0.947
## 6 0.000100 0.000100 0.000100 0.000100 0.000100 1.00
The fam file
fam_file <- here("euro_global/output/neuroadmixture/native_far_east_euro2.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 0.08835020 0.037768300 0.104384000 0.044442500 0.053568700 0.671486
## 2 1002 OKI 0.08007590 0.035595900 0.141756000 0.050354400 0.109508000 0.582710
## 3 1003 OKI 0.00625699 0.000099964 0.000099964 0.000099964 0.000099964 0.993343
## 4 1004 OKI 0.02305960 0.000099991 0.026848700 0.001880900 0.026911500 0.921199
## 5 1005 OKI 0.03666370 0.000099973 0.016109600 0.000099973 0.000099973 0.946927
## 6 1006 OKI 0.00009996 0.000099960 0.000099960 0.000099960 0.000099960 0.999500
Rename the columns
# Rename the columns starting from the third one
leak6 <- leak6 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak6)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 0.08835020 0.037768300 0.104384000 0.044442500 0.053568700 0.671486
## 2 1002 OKI 0.08007590 0.035595900 0.141756000 0.050354400 0.109508000 0.582710
## 3 1003 OKI 0.00625699 0.000099964 0.000099964 0.000099964 0.000099964 0.993343
## 4 1004 OKI 0.02305960 0.000099991 0.026848700 0.001880900 0.026911500 0.921199
## 5 1005 OKI 0.03666370 0.000099973 0.016109600 0.000099973 0.000099973 0.946927
## 6 1006 OKI 0.00009996 0.000099960 0.000099960 0.000099960 0.000099960 0.999500
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak6 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:6)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_6[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=6.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_6) +
expand_limits(y = c(0, 1.5))
color_palette_10 <-
c(
"#75FAFF",
"navy",
"orangered",
"yellow2",
"#FFFF99",
"purple",
"magenta",
"purple4",
"green4",
"#B20CC9"
)
using ggplot
best = which.min(cross.entropy(project, K = 10)) #3
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 10, run = best))
# Create a named vector to map countries to regions
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:10)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_10[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=10.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_10)
Extract ancestry coefficients for K=10
change to correct matrix
leak10 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_far_east_euro2.snmf/K10/run3/native_far_east_euro2_r3.10.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak10)
## # A tibble: 6 × 10
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.00e-4 5.05e-2 0.523 3.65e-2 1.61e-1 2.27e-2 5.89e-2 1.49e-3 3.57e-2 1.10e-1
## 2 8.99e-2 1.80e-2 0.375 2.92e-2 1.40e-1 5.09e-2 4.42e-2 5.06e-2 2.04e-2 1.82e-1
## 3 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 4 8.98e-3 1.41e-2 0.857 1.00e-4 6.82e-2 1.45e-2 1.00e-4 3.35e-3 1.00e-4 3.39e-2
## 5 1.00e-4 1.28e-2 0.888 1.00e-4 3.48e-2 1.27e-2 1.00e-4 3.45e-3 1.55e-2 3.26e-2
## 6 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
The fam file
fam_file <- here("euro_global/output/neuroadmixture/native_far_east_euro2.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 9.99910e-05 5.05316e-02 0.522522 3.65307e-02 1.61025e-01 2.27072e-02
## 2 1002 OKI 8.98890e-02 1.79684e-02 0.375147 2.92204e-02 1.40089e-01 5.09065e-02
## 3 1003 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
## 4 1004 OKI 8.97959e-03 1.41323e-02 0.856627 9.99730e-05 6.82032e-02 1.44932e-02
## 5 1005 OKI 9.99730e-05 1.28261e-02 0.887890 9.99730e-05 3.48179e-02 1.26841e-02
## 6 1006 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
## X7 X8 X9 X10
## 1 5.89181e-02 1.49097e-03 3.57039e-02 1.10470e-01
## 2 4.41989e-02 5.06259e-02 2.04483e-02 1.81507e-01
## 3 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## 4 9.99730e-05 3.35282e-03 9.99730e-05 3.39115e-02
## 5 9.99730e-05 3.45216e-03 1.54640e-02 3.25658e-02
## 6 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
Rename the columns
# Rename the columns starting from the third one
leak10 <- leak10 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak10)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 9.99910e-05 5.05316e-02 0.522522 3.65307e-02 1.61025e-01 2.27072e-02
## 2 1002 OKI 8.98890e-02 1.79684e-02 0.375147 2.92204e-02 1.40089e-01 5.09065e-02
## 3 1003 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
## 4 1004 OKI 8.97959e-03 1.41323e-02 0.856627 9.99730e-05 6.82032e-02 1.44932e-02
## 5 1005 OKI 9.99730e-05 1.28261e-02 0.887890 9.99730e-05 3.48179e-02 1.26841e-02
## 6 1006 OKI 9.99201e-05 9.99201e-05 0.999101 9.99201e-05 9.99201e-05 9.99201e-05
## v7 v8 v9 v10
## 1 5.89181e-02 1.49097e-03 3.57039e-02 1.10470e-01
## 2 4.41989e-02 5.06259e-02 2.04483e-02 1.81507e-01
## 3 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
## 4 9.99730e-05 3.35282e-03 9.99730e-05 3.39115e-02
## 5 9.99730e-05 3.45216e-03 1.54640e-02 3.25658e-02
## 6 9.99201e-05 9.99201e-05 9.99201e-05 9.99201e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak10 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:10)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_10[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=10.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_10) +
expand_limits(y = c(0, 1.5))
echo "BER
PAL
POL
POP
SPB
BAR
SPC
SPM
SPS
TUA
TUH
HAI
YUN
HUN
OKI
KAN
UTS
KAG
TAI
GEL
BEN
SUF
INW
KLP
KUN
KAT
JAF
CAM
SUU
INJ
MAT
SSK
KAC
SON
CHA
LAM
HAN
HOC
QNC
" > euro_global/output/neuroadmixture/native_turkey_iberia_US.txt
Turkey+iberia+US
cd /gpfs/gibbs/pi/caccone/mkc54/albo
plink \
--keep-allele-order \
--keep-fam euro_global/output/neuroadmixture/native_turkey_iberia_US.txt \
--bfile euro_global/output/file7b \
--make-bed \
--export vcf \
--out euro_global/output/neuroadmixture/native_turkey_iberia_US2 \
--extract euro_global/output/neuroadmixture/train/train_euro_nativeb.snplist \
--silent
grep "samples\|variants" euro_global/output/neuroadmixture/native_turkey_iberia_US2.log
100367 variants loaded from .bim file. –extract: 22537 variants remaining. Total genotyping rate in remaining samples is 0.969253. 22537 variants and 329 people pass filters and QC.
genotype <- here(
"euro_global/output/neuroadmixture/native_turkey_iberia_US2.vcf"
)
d <- read.vcfR(
genotype
)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22537
## column count: 338
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22537
## Character matrix gt cols: 338
## skip: 0
## nrows: 22537
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## BAR BEN BER CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN LAM
## 12 12 12 12 12 2 12 4 7 12 11 4 2 6 12 11 6 4 4 9
## MAT OKI PAL POL POP QNC SON SPB SPC SPM SPS SSK SUF SUU TAI TUA TUH UTS YUN
## 12 12 11 2 12 11 3 8 6 5 8 12 6 6 7 9 12 12 9
Convert format
##
## - number of detected individuals: 329
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.geno"
##
## - number of detected individuals: 329
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.removed file, for more informations.
##
##
## - number of detected individuals: 329
## - number of detected loci: 22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.lfmm"
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
We will do 5 repetitions
# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
genotype,
K = 1:25,
project = "new",
repetitions = 5,
percentage = 0.25,
iterations = 500,
CPU = 10,
entropy = TRUE
)
project = load.snmfProject("euro_global/output/neuroadmixture/native_turkey_iberia_US2.snmfProject")
Cross entropy
# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","MAF_1", "iberia_turkey", "lea_cross_entropy_iberia_turkey_US_r01_b.pdf"), width = 6, height = 4)
# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)
# Close the pdf file
dev.off()
## png
## 2
Summary of project check with run is best
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9271062 0.8900207 0.8839852 0.8765154 0.8723017 0.8691442 0.8669760
## mean 0.9278533 0.8907388 0.8847615 0.8772518 0.8728473 0.8703742 0.8679693
## max 0.9282251 0.8910604 0.8852040 0.8776148 0.8734732 0.8716343 0.8694265
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8656042 0.8649707 0.8649777 0.8639331 0.8642309 0.8657547 0.8653990
## mean 0.8675944 0.8658563 0.8654312 0.8655895 0.8653379 0.8660151 0.8673528
## max 0.8690673 0.8662602 0.8660101 0.8667388 0.8661961 0.8667623 0.8693168
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8669662 0.8669479 0.8687847 0.8714309 0.8723204 0.8731121 0.8753858
## mean 0.8674553 0.8696696 0.8697166 0.8723978 0.8736355 0.8748723 0.8772996
## max 0.8679922 0.8726039 0.8707179 0.8729233 0.8747222 0.8776899 0.8784752
## K = 22 K = 23 K = 24 K = 25
## min 0.8789713 0.8811850 0.8838420 0.8849231
## mean 0.8796446 0.8819028 0.8851866 0.8877920
## max 0.8811388 0.8839430 0.8862658 0.8898421
# get the cross-entropy of all runs for K = 11
ce11 = cross.entropy(project, K = 11)
ce11 #run 3 is best for k=11
## K = 11
## run 1 0.8664886
## run 2 0.8647956
## run 3 0.8639331
## run 4 0.8667388
## run 5 0.8659916
color_palette_11 <-
c(
"#75FAFF",
"navy",
"orangered",
"yellow2",
"#FFFF99",
"blue",
"#B22222",
"#F49AC2",
"#008080",
"orange",
"magenta"
)
using ggplot
best = which.min(cross.entropy(project, K = 11)) #3
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 11, run = best))
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:11)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_11[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=11.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_11)
Extract ancestry coefficients for k=11
change to correct matrix
leak11 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.snmf/K11/run3/native_turkey_iberia_US2_r3.11.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak11)
## # A tibble: 6 × 11
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 5.84e-2 1.75e-2 0.538 2.48e-2 1.32e-1 4.00e-2 1.48e-1 7.68e-3 3.02e-2 9.80e-4
## 2 1.22e-1 9.77e-2 0.316 1.00e-4 4.79e-2 1.67e-1 1.65e-1 1.28e-2 2.72e-2 3.90e-2
## 3 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 4 1.00e-4 7.00e-2 0.885 1.26e-2 7.66e-3 1.00e-4 1.00e-4 1.00e-4 6.89e-3 1.27e-2
## 5 9.99e-5 6.56e-2 0.911 9.99e-5 9.99e-5 9.99e-5 1.15e-2 9.99e-5 6.70e-3 4.35e-3
## 6 9.99e-5 9.99e-5 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## # ℹ 1 more variable: X11 <dbl>
The fam file
fam_file <- here("euro_global/output/neuroadmixture/native_turkey_iberia_US2.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 5.84230e-02 1.74758e-02 0.538229 2.48206e-02 1.31607e-01 3.99854e-02
## 2 1002 OKI 1.22436e-01 9.76610e-02 0.316025 9.99910e-05 4.78928e-02 1.67050e-01
## 3 1003 OKI 9.99101e-05 9.99101e-05 0.999001 9.99101e-05 9.99101e-05 9.99101e-05
## 4 1004 OKI 9.99713e-05 6.99730e-02 0.885188 1.26074e-02 7.65750e-03 9.99713e-05
## 5 1005 OKI 9.99460e-05 6.55608e-02 0.911314 9.99460e-05 9.99460e-05 9.99460e-05
## 6 1006 OKI 9.99101e-05 9.99101e-05 0.999001 9.99101e-05 9.99101e-05 9.99101e-05
## X7 X8 X9 X10 X11
## 1 1.48209e-01 7.67503e-03 3.02475e-02 9.79923e-04 2.34667e-03
## 2 1.65019e-01 1.27839e-02 2.71506e-02 3.90228e-02 4.85968e-03
## 3 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05
## 4 9.99713e-05 9.99713e-05 6.88714e-03 1.27036e-02 4.58366e-03
## 5 1.14668e-02 9.99460e-05 6.70434e-03 4.35455e-03 9.99460e-05
## 6 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05
Rename the columns
# Rename the columns starting from the third one
leak11 <- leak11 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak11)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 5.84230e-02 1.74758e-02 0.538229 2.48206e-02 1.31607e-01 3.99854e-02
## 2 1002 OKI 1.22436e-01 9.76610e-02 0.316025 9.99910e-05 4.78928e-02 1.67050e-01
## 3 1003 OKI 9.99101e-05 9.99101e-05 0.999001 9.99101e-05 9.99101e-05 9.99101e-05
## 4 1004 OKI 9.99713e-05 6.99730e-02 0.885188 1.26074e-02 7.65750e-03 9.99713e-05
## 5 1005 OKI 9.99460e-05 6.55608e-02 0.911314 9.99460e-05 9.99460e-05 9.99460e-05
## 6 1006 OKI 9.99101e-05 9.99101e-05 0.999001 9.99101e-05 9.99101e-05 9.99101e-05
## v7 v8 v9 v10 v11
## 1 1.48209e-01 7.67503e-03 3.02475e-02 9.79923e-04 2.34667e-03
## 2 1.65019e-01 1.27839e-02 2.71506e-02 3.90228e-02 4.85968e-03
## 3 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05
## 4 9.99713e-05 9.99713e-05 6.88714e-03 1.27036e-02 4.58366e-03
## 5 1.14668e-02 9.99460e-05 6.70434e-03 4.35455e-03 9.99460e-05
## 6 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak11 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:11)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_11[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=11.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_11) +
expand_limits(y = c(0, 1.5))
color_palette_9 <-
c(
"#75FAFF",
"orangered",
"yellow2",
"#FFFF99",
"blue",
"#B22222",
"#F49AC2",
"magenta",
"navy"
)
## K = 9
## run 1 0.8660490
## run 2 0.8660339
## run 3 0.8649707
## run 4 0.8662602
## run 5 0.8659677
using ggplot
best = which.min(cross.entropy(project, K = 9)) #3
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 9, run = best))
# Create a named vector to map countries to regions
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:9)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_9[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=9.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_9)
color_palette_9 <-
c(
"#F49AC2",
"#75FAFF",
"#B22222",
"blue",
"yellow2",
"green4",
"magenta",
"#FFFF99",
"orangered")
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=9.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_9)
ggsave(
here("scripts", "RMarkdowns",
"output", "euro_global", "lea", "MAF_1", "iberia_turkey", "LEA_admixture_by_country _iberia_turkey_US_k9_r01_MAF1.pdf"
),
width = 10,
height = 7,
units = "in"
)
Extract ancestry coefficients for K=9
change to correct matrix
leak9 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_turkey_iberia_US2.snmf/K9/run3/native_turkey_iberia_US2_r3.9.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak9)
## # A tibble: 6 × 9
## X1 X2 X3 X4 X5 X6 X7 X8 X9
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0261 0.0150 0.0865 0.142 0.541 1.25e-2 4.78e-2 1.23e-1 5.26e-3
## 2 0.0256 0.0158 0.187 0.202 0.308 5.33e-2 1.09e-1 9.91e-2 1.00e-4
## 3 0.0000999 0.0000999 0.0000999 0.0000999 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5
## 4 0.000100 0.0110 0.000100 0.0118 0.908 1.50e-2 6.95e-3 4.05e-2 6.96e-3
## 5 0.000100 0.0235 0.000100 0.0296 0.912 5.80e-3 1.00e-4 2.84e-2 1.00e-4
## 6 0.0000999 0.0000999 0.0000999 0.0000999 0.999 9.99e-5 9.99e-5 9.99e-5 9.99e-5
The fam file
fam_file <- here("euro_global/output/neuroadmixture/native_turkey_iberia_US2.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5 X6
## 1 1001 OKI 0.026078400 0.01504910 0.086530300 0.14187700 0.541452 0.01254240
## 2 1002 OKI 0.025618700 0.01577310 0.187377000 0.20171500 0.307703 0.05328930
## 3 1003 OKI 0.000099930 0.00009993 0.000099930 0.00009993 0.999201 0.00009993
## 4 1004 OKI 0.000099982 0.01102460 0.000099982 0.01177900 0.907593 0.01501460
## 5 1005 OKI 0.000099964 0.02347530 0.000099964 0.02963310 0.912340 0.00580212
## 6 1006 OKI 0.000099930 0.00009993 0.000099930 0.00009993 0.999201 0.00009993
## X7 X8 X9
## 1 0.047774900 0.12343600 0.005260830
## 2 0.109337000 0.09908680 0.000099991
## 3 0.000099930 0.00009993 0.000099930
## 4 0.006948470 0.04048100 0.006959580
## 5 0.000099964 0.02835000 0.000099964
## 6 0.000099930 0.00009993 0.000099930
Rename the columns
# Rename the columns starting from the third one
leak9 <- leak9 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak9)
## ind pop v1 v2 v3 v4 v5 v6
## 1 1001 OKI 0.026078400 0.01504910 0.086530300 0.14187700 0.541452 0.01254240
## 2 1002 OKI 0.025618700 0.01577310 0.187377000 0.20171500 0.307703 0.05328930
## 3 1003 OKI 0.000099930 0.00009993 0.000099930 0.00009993 0.999201 0.00009993
## 4 1004 OKI 0.000099982 0.01102460 0.000099982 0.01177900 0.907593 0.01501460
## 5 1005 OKI 0.000099964 0.02347530 0.000099964 0.02963310 0.912340 0.00580212
## 6 1006 OKI 0.000099930 0.00009993 0.000099930 0.00009993 0.999201 0.00009993
## v7 v8 v9
## 1 0.047774900 0.12343600 0.005260830
## 2 0.109337000 0.09908680 0.000099991
## 3 0.000099930 0.00009993 0.000099930
## 4 0.006948470 0.04048100 0.006959580
## 5 0.000099964 0.02835000 0.000099964
## 6 0.000099930 0.00009993 0.000099930
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak9 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:9)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_9[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=9.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_9) +
expand_limits(y = c(0, 1.5))
Create new file native_eastern_europe
echo "
SER
BUL
ROS
TUA
TUH
SEV
ALU
KER
KRA
TIK
RAR
SOC
GES
ARM
HAI
YUN
HUN
OKI
KAN
UTS
KAG
TAI
GEL
BEN
SUF
INW
KLP
KUN
KAT
JAF
CAM
SUU
INJ
MAT
SSK
KAC
SON
CHA
LAM
HAN
HOC
QNC
" > euro_global/output/neuroadmixture/native_eastern_europe_b.txt
Turkey+iberia+US
cd /gpfs/gibbs/pi/caccone/mkc54/albo
plink \
--keep-allele-order \
--keep-fam euro_global/output/neuroadmixture/native_eastern_europe_b.txt \
--bfile euro_global/output/file7b \
--make-bed \
--export vcf \
--out euro_global/output/neuroadmixture/native_eastern_europe_b \
--extract euro_global/output/neuroadmixture/train/train_euro_nativeb.snplist \
--silent
grep "samples\|variants" euro_global/output/neuroadmixture/native_eastern_europe_b.log
100367 variants loaded from .bim file. –extract: 22537 variants remaining. Total genotyping rate in remaining samples is 0.970557. 22537 variants and 384 people pass filters and QC.
genotype <- here(
"euro_global/output/neuroadmixture/native_eastern_europe_b.vcf"
)
d <- read.vcfR(
genotype
)
## Scanning file to determine attributes.
## File attributes:
## meta lines: 8
## header_line: 9
## variant count: 22537
## column count: 393
##
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 22537
## Character matrix gt cols: 393
## skip: 0
## nrows: 22537
## row_num: 0
##
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant: 22537
## All variants processed
Get population and individuals information
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1]))))
table(pops)
## pops
## ALU ARM BEN BUL CAM CHA GEL GES HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KER
## 12 10 12 10 12 12 2 12 12 4 7 12 11 4 2 6 12 11 6 12
## KLP KRA KUN LAM MAT OKI QNC RAR ROS SER SEV SOC SON SSK SUF SUU TAI TIK TUA TUH
## 4 12 4 9 12 12 11 12 11 4 12 12 3 12 6 6 7 12 9 12
## UTS YUN
## 12 9
Convert format
##
## - number of detected individuals: 384
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.removed file, for more informations.
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.geno"
##
## - number of detected individuals: 384
## - number of detected loci: 22537
##
## For SNP info, please check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.vcfsnp.
##
## 0 line(s) were removed because these are not SNPs.
## Please, check /gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.removed file, for more informations.
##
##
## - number of detected individuals: 384
## - number of detected loci: 22537
## [1] "/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.lfmm"
Sample data
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
We will do 5 repetitions
# set output dir
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
genotype,
K = 1:25,
project = "new",
repetitions = 5,
percentage = 0.25,
iterations = 500,
CPU = 10,
entropy = TRUE
)
Cross entropy
# Open a new pdf file
pdf(here("scripts", "RMarkdowns", "output","euro_global","lea","MAF_1", "eastern_europe", "lea_cross_entropy_eastern_europe_all_r01_b.pdf"), width = 6, height = 4)
# Create your plot
plot(project, col = "blue", pch = 19, cex = 1.2)
# Close the pdf file
dev.off()
## png
## 2
Summary of project
check with run is best
## $repetitions
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7 K = 8 K = 9
## with cross-entropy 5 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5 5
## K = 10 K = 11 K = 12 K = 13 K = 14 K = 15 K = 16 K = 17
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
## K = 18 K = 19 K = 20 K = 21 K = 22 K = 23 K = 24 K = 25
## with cross-entropy 5 5 5 5 5 5 5 5
## without cross-entropy 0 0 0 0 0 0 0 0
## total 5 5 5 5 5 5 5 5
##
## $crossEntropy
## K = 1 K = 2 K = 3 K = 4 K = 5 K = 6 K = 7
## min 0.9263358 0.8900728 0.8784947 0.8723280 0.8692091 0.8675548 0.8667524
## mean 0.9266675 0.8904538 0.8788574 0.8726525 0.8695476 0.8679456 0.8671573
## max 0.9270804 0.8909701 0.8793918 0.8731650 0.8699389 0.8684354 0.8679222
## K = 8 K = 9 K = 10 K = 11 K = 12 K = 13 K = 14
## min 0.8659163 0.8653125 0.8656195 0.8645445 0.8643073 0.8639835 0.8644666
## mean 0.8665982 0.8659715 0.8660902 0.8648374 0.8650374 0.8654291 0.8656613
## max 0.8674655 0.8666695 0.8664496 0.8650090 0.8658513 0.8666653 0.8663259
## K = 15 K = 16 K = 17 K = 18 K = 19 K = 20 K = 21
## min 0.8647958 0.8647558 0.8666941 0.8662315 0.8665095 0.8679754 0.8683581
## mean 0.8655229 0.8660045 0.8669962 0.8666929 0.8676454 0.8689586 0.8695316
## max 0.8660169 0.8667583 0.8674466 0.8669997 0.8687801 0.8694516 0.8719143
## K = 22 K = 23 K = 24 K = 25
## min 0.8691795 0.8695068 0.8709611 0.8722640
## mean 0.8707714 0.8715768 0.8732896 0.8734511
## max 0.8727627 0.8742344 0.8764915 0.8741712
# get the cross-entropy of all runs for K = 11
ce11 = cross.entropy(project, K = 11)
ce11 #run 1 is best for k=11
## K = 11
## run 1 0.8645445
## run 2 0.8649721
## run 3 0.8650090
## run 4 0.8649443
## run 5 0.8647170
color_palette_11 <-
c(
"#75FAFF",
"navy",
"orangered",
"yellow2",
"#FFFF99",
"purple",
"magenta",
"purple4",
"green4",
"#B20CC9",
"orange"
)
using ggplot
best = which.min(cross.entropy(project, K = 11)) #3
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
library(reshape2)
# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 11, run = best))
# Create a named vector to map countries to regions
# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops
# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
mutate(Region_Country = interaction(Region, Country, sep = "_"))
# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
arrange(Region, Country) |>
mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))
# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(Region_Country, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))
# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5 # Shift borders to the right edge of the bars
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(Region_Country) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# source the plotting function
source(
here("scripts", "RMarkdowns",
"analyses", "my_theme2.R"
)
)
# Generate all potential variable names
all_variables <- paste0("V", 1:11)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_11[1:length(all_variables)])
# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") + # Add borders
my_theme() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none") + # Hide legend
xlab("") + # Suppress x-axis label
ylab("Ancestry proportions") +
ggtitle("Ancestry matrix") +
labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=11.") +
# scale_fill_manual(values = color) +
scale_x_discrete(labels = function(x) gsub(".*_", "", x)) + # Remove Region prefix from labels
scale_fill_manual(values = color_palette_11)
Extract ancestry coefficients for K=11
change to correct matrix
leak11 <- read_delim(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/euro_global/output/neuroadmixture/native_eastern_europe_b.snmf/K11/run1/native_eastern_europe_b_r1.11.Q"),
delim = " ", # Specify the delimiter if different from the default (comma)
col_names = FALSE,
show_col_types = FALSE
)
head(leak11)
## # A tibble: 6 × 11
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.00e-4 2.78e-2 2.00e-2 1.01e-1 8.08e-2 7.20e-2 2.86e-2 4.80e-2 2.91e-2 0.496
## 2 3.60e-2 5.85e-3 2.81e-2 1.06e-1 3.51e-2 1.09e-1 2.12e-2 8.85e-2 6.43e-2 0.350
## 3 9.99e-5 9.99e-5 1.23e-2 9.99e-5 1.89e-3 9.99e-5 9.68e-4 9.99e-5 9.99e-5 0.981
## 4 1.68e-2 2.10e-2 1.00e-4 9.30e-4 1.00e-4 7.12e-2 1.00e-4 1.00e-4 1.29e-2 0.872
## 5 1.41e-2 1.00e-4 3.81e-3 1.77e-2 1.00e-4 4.01e-2 1.00e-4 3.13e-3 9.52e-3 0.911
## 6 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 9.99e-5 0.999
## # ℹ 1 more variable: X11 <dbl>
The fam file
fam_file <- here("euro_global/output/neuroadmixture/native_eastern_europe_b.fam")
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 OKI 1002 0 0 2 -9
## 3 OKI 1003 0 0 2 -9
## 4 OKI 1004 0 0 2 -9
## 5 OKI 1005 0 0 2 -9
## 6 OKI 1006 0 0 1 -9
Create column ID
# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"
# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"
# Select ID
fam_data <- fam_data |>
dplyr::select("ind", "pop")
# View the first few rows
head(fam_data)
## ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI
Add it to the matrix
## ind pop X1 X2 X3 X4 X5
## 1 1001 OKI 9.99910e-05 2.77788e-02 2.00482e-02 1.00739e-01 8.07674e-02
## 2 1002 OKI 3.60497e-02 5.85426e-03 2.80621e-02 1.05538e-01 3.50887e-02
## 3 1003 OKI 9.99460e-05 9.99460e-05 1.23075e-02 9.99460e-05 1.88513e-03
## 4 1004 OKI 1.67591e-02 2.10264e-02 9.99640e-05 9.29501e-04 9.99640e-05
## 5 1005 OKI 1.40587e-02 9.99640e-05 3.80564e-03 1.76831e-02 9.99640e-05
## 6 1006 OKI 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05
## X6 X7 X8 X9 X10 X11
## 1 7.20425e-02 2.86226e-02 4.80016e-02 2.91051e-02 0.496439 9.63562e-02
## 2 1.09040e-01 2.11568e-02 8.84652e-02 6.42642e-02 0.350395 1.56085e-01
## 3 9.99460e-05 9.68483e-04 9.99460e-05 9.99460e-05 0.981302 2.93705e-03
## 4 7.12489e-02 9.99640e-05 9.99640e-05 1.28659e-02 0.871708 5.06209e-03
## 5 4.01411e-02 9.99640e-05 3.13253e-03 9.52133e-03 0.911258 9.99640e-05
## 6 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 0.999001 9.99101e-05
Rename the columns
# Rename the columns starting from the third one
leak11 <- leak11 |>
rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))
# View the first few rows
head(leak11)
## ind pop v1 v2 v3 v4 v5
## 1 1001 OKI 9.99910e-05 2.77788e-02 2.00482e-02 1.00739e-01 8.07674e-02
## 2 1002 OKI 3.60497e-02 5.85426e-03 2.80621e-02 1.05538e-01 3.50887e-02
## 3 1003 OKI 9.99460e-05 9.99460e-05 1.23075e-02 9.99460e-05 1.88513e-03
## 4 1004 OKI 1.67591e-02 2.10264e-02 9.99640e-05 9.29501e-04 9.99640e-05
## 5 1005 OKI 1.40587e-02 9.99640e-05 3.80564e-03 1.76831e-02 9.99640e-05
## 6 1006 OKI 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05
## v6 v7 v8 v9 v10 v11
## 1 7.20425e-02 2.86226e-02 4.80016e-02 2.91051e-02 0.496439 9.63562e-02
## 2 1.09040e-01 2.11568e-02 8.84652e-02 6.42642e-02 0.350395 1.56085e-01
## 3 9.99460e-05 9.68483e-04 9.99460e-05 9.99460e-05 0.981302 2.93705e-03
## 4 7.12489e-02 9.99640e-05 9.99640e-05 1.28659e-02 0.871708 5.06209e-03
## 5 4.01411e-02 9.99640e-05 3.13253e-03 9.52133e-03 0.911258 9.99640e-05
## 6 9.99101e-05 9.99101e-05 9.99101e-05 9.99101e-05 0.999001 9.99101e-05
Import Sample Locations
sampling_loc <- readRDS(here("scripts", "RMarkdowns", "output", "sampling_loc_euro_global.rds"))
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
source(
here("scripts", "RMarkdowns",
"my_theme3.R"
)
)
# Melt the data frame for plotting
Q_melted <- leak11 |>
pivot_longer(
cols = -c(ind, pop),
names_to = "variable",
values_to = "value"
)
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
left_join(sampling_loc, by = c("pop" = "Abbreviation"))
# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
arrange(order, ind) |>
mutate(ind = factor(ind, levels = unique(ind))) # Convert ind to a factor with levels in the desired order
# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
group_by(Country) |>
mutate(label = ifelse(row_number() == 1, as.character(Country), NA))
# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
group_by(ind, variable) |>
summarise(value = mean(value), .groups = "drop")
# Create a data frame for borders
borders <-
data.frame(Country = unique(Q_ordered$Country))
# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
sapply(borders$Country, function(rc)
max(which(Q_ordered$Country == rc))) + 0.5 # Shift borders to the right edge of the bars
# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
filter(!is.na(label)) |>
distinct(label, .keep_all = TRUE)
# Create a custom label function
label_func <- function(x) {
labels <- rep("", length(x))
labels[x %in% label_df$ind] <- label_df$label
labels
}
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) + 0)
# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
group_by(pop) |>
slice_head(n = 1) |>
ungroup() |>
dplyr::select(ind, Pop_City, Country, Name) |>
mutate(pos = as.numeric(ind)) # calculate position of population labels
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Calculate the position of lines
border_positions <- Q_ordered |>
group_by(Country) |>
summarise(pos = max(as.numeric(ind)) - 1)
pop_labels_bars <- pop_labels |>
mutate(pos = as.numeric(ind) - .5)
# Function to filter and normalize data
normalize_data <- function(df, min_value) {
df |>
filter(value > min_value) |>
group_by(ind) |>
mutate(value = value / sum(value))
}
# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
#
# Generate all potential variable names
all_variables <- paste0("v", 1:11)
# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
color = color_palette_11[1:length(all_variables)])
# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")
# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=11.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_11) +
expand_limits(y = c(0, 1.5))
color_palette_11 <-
c(
"purple",
"orangered",
"purple4",
"#FFFF99",
"chocolate4",
"navy",
"green4",
"magenta",
"orange",
"#75FAFF",
"yellow2"
)
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
geom_bar(stat = 'identity', width = 1) +
geom_vline(
data = pop_labels_bars,
aes(xintercept = pos),
color = "#2C444A",
linewidth = .2
) +
geom_text(
data = pop_labels,
aes(x = as.numeric(ind), y = 1, label = Name),
vjust = 1.5,
hjust = 0,
size = 2,
angle = 90,
inherit.aes = FALSE
) +
my_theme() +
theme(
axis.text.x = element_text(
angle = 90,
hjust = 1,
size = 12
),
legend.position = "none",
plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
) +
xlab("Admixture matrix") +
ylab("Ancestry proportions") +
labs(caption = "Each bar represents the ancestry proportions for an individual for k=11.\n LEA inference for 22,642 (MAF 1%) SNPs.") +
scale_x_discrete(labels = label_func) +
scale_fill_manual(values = color_palette_11) +
expand_limits(y = c(0, 1.5))
ggsave(
here("scripts", "RMarkdowns", "output", "euro_global", "lea", "MAF_1", "eastern_europe", "lea_k=11_eastern_europe_all_MAF1.pdf"),
width = 12,
height = 6,
units = "in",
device = cairo_pdf)