Load Libraries
## Loading required package: pegas
## Loading required package: ape
##
## Attaching package: 'pegas'
## The following object is masked from 'package:ape':
##
## mst
## Registered S3 method overwritten by 'ade4':
## method from
## print.amova pegas
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble 3.2.1 ✔ dplyr 1.1.4
## ✔ tidyr 1.3.1 ✔ stringr 1.5.1
## ✔ readr 2.1.5 ✔ forcats 0.5.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::where() masks ape::where()
## Loading required package: ade4
##
## Attaching package: 'ade4'
## The following object is masked from 'package:pegas':
##
## amova
##
## /// adegenet 2.1.10 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature requests: adegenetIssues()
## here() starts at /gpfs/gibbs/pi/caccone/mkc54/albo/scripts/RMarkdowns
##
## Attaching package: 'flextable'
## The following object is masked from 'package:purrr':
##
## compose
## The following object is masked from 'package:ape':
##
## rotate
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(dplyr)
library(tidyr)
library(geosphere)
library(flextable)
library(officer)
library(dartR) #do not load until Mantel test
## Loading required package: dartR.data
## **** Welcome to dartR.data [Version 1.0.8 ] ****
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## Registered S3 method overwritten by 'genetics':
## method from
## [.haplotype pegas
## **** Welcome to dartR [Version 2.9.7 ] ****
## Be aware that owing to CRAN requirements and compatibility reasons not all functions of the package may run after the basic installation, as some packages could still be missing. Hence for a most enjoyable experience we recommend to run the function
## gl.install.vanilla.dartR()
## This installs all missing and required packages for your version of dartR. In case something fails during installation please refer to this tutorial: https://github.com/green-striped-gecko/dartR/wiki/Installation-tutorial.
##
## For information how to cite dartR, please use:
## citation('dartR')
## Global verbosity is set to: 2
##
## **** Have fun using dartR! ****
##
## Attaching package: 'dartR'
## The following objects are masked from 'package:dartR.data':
##
## bandicoot.gl, possums.gl, testset.gl, testset.gs
Create list of populations
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
awk '{print $1}' output/snps_sets/r2_0.1.fam | sort | uniq -c | awk '{print $2, $1}' | awk '$2 >= 4 {print}' | awk '{print $1}' > output/fst/pops_4fst.txt;
head output/fst/pops_4fst.txt;
wc -l output/fst/pops_4fst.txt
## ALD
## ALU
## ALV
## ARM
## BAR
## BRE
## BUL
## CES
## CRO
## DES
## 40 output/fst/pops_4fst.txt
We have 40 populations with 4 or more mosquitoes.
First load plink
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.1 \
--keep-fam output/fst/pops_4fst.txt \
--recodeA \
--out output/fst/r2_0.1 \
--silent;
grep 'samples\|variants\|remaining' output/fst/r2_0.1.log
47484 variants loaded from .bim file. –keep-fam: 407 people remaining. Total genotyping rate in remaining samples is 0.97243. 47484 variants and 407 people pass filters and QC.
Look at https://rdrr.io/cran/StAMPP/man/stamppFst.html for details of Fst estimations
LD2 <-
read.PLINK(
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/r2_0.1.raw"
),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
## Loading required package: parallel
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## Length Class Mode
## 1 genlight S4
The command below would also work, but you can simplify it and put only the numbers: genome_equal_2 <- stamppFst(neutral, nboots=100, percent=95 + nclusters==10)
This chunk will take a couple minutes to run.
# run stampp. If you want to run with bootstraps and nclusters use the HPC. It will run out of memory on a 32Gb laptop
LD2_3 <- stamppFst(LD2_2, 1, 95, 1)
Save it
To load it
Now lets look at the object
## SOC SEV GES KER
## Min. :0.007013 Min. :0.03129 Min. :0.01664 Min. :0.03044
## 1st Qu.:0.092839 1st Qu.:0.11584 1st Qu.:0.10032 1st Qu.:0.07663
## Median :0.111269 Median :0.13359 Median :0.11908 Median :0.09316
## Mean :0.105796 Mean :0.13094 Mean :0.11590 Mean :0.09695
## 3rd Qu.:0.125958 3rd Qu.:0.15214 3rd Qu.:0.13293 3rd Qu.:0.10914
## Max. :0.224918 Max. :0.24959 Max. :0.23132 Max. :0.20467
## NA's :1 NA's :2 NA's :3 NA's :4
## KRA TIK RAR TRE
## Min. :0.01118 Min. :0.05273 Min. :0.05224 Min. :0.02291
## 1st Qu.:0.07272 1st Qu.:0.11849 1st Qu.:0.10744 1st Qu.:0.03536
## Median :0.08882 Median :0.13388 Median :0.12240 Median :0.05226
## Mean :0.09044 Mean :0.13637 Mean :0.12788 Mean :0.05576
## 3rd Qu.:0.10320 3rd Qu.:0.14994 3rd Qu.:0.13898 3rd Qu.:0.06262
## Max. :0.19549 Max. :0.24710 Max. :0.23435 Max. :0.15338
## NA's :5 NA's :6 NA's :7 NA's :8
## ALU STS SIC BRE
## Min. :0.06457 Min. :0.03029 Min. :0.04408 Min. :0.08652
## 1st Qu.:0.07981 1st Qu.:0.05243 1st Qu.:0.06313 1st Qu.:0.11993
## Median :0.09611 Median :0.07010 Median :0.08064 Median :0.13676
## Mean :0.10160 Mean :0.07388 Mean :0.08414 Mean :0.13995
## 3rd Qu.:0.10865 3rd Qu.:0.07797 3rd Qu.:0.09302 3rd Qu.:0.15482
## Max. :0.19928 Max. :0.17568 Max. :0.19265 Max. :0.24730
## NA's :9 NA's :10 NA's :11 NA's :12
## DES CES TIR IMP
## Min. :0.07658 Min. :0.1313 Min. :-0.001035 Min. :0.04641
## 1st Qu.:0.10645 1st Qu.:0.1513 1st Qu.: 0.062685 1st Qu.:0.05972
## Median :0.12447 Median :0.1740 Median : 0.073933 Median :0.08137
## Mean :0.12570 Mean :0.1738 Mean : 0.082503 Mean :0.08277
## 3rd Qu.:0.13847 3rd Qu.:0.1849 3rd Qu.: 0.100562 3rd Qu.:0.09006
## Max. :0.22575 Max. :0.2813 Max. : 0.221360 Max. :0.22977
## NA's :13 NA's :14 NA's :15 NA's :16
## ROM GRC BAR BUL
## Min. :0.006715 Min. :0.02337 Min. :0.04513 Min. :0.05459
## 1st Qu.:0.043622 1st Qu.:0.05981 1st Qu.:0.08039 1st Qu.:0.06945
## Median :0.065630 Median :0.07719 Median :0.09870 Median :0.08783
## Mean :0.066354 Mean :0.08120 Mean :0.10234 Mean :0.09073
## 3rd Qu.:0.079526 3rd Qu.:0.09579 3rd Qu.:0.12077 3rd Qu.:0.09910
## Max. :0.208063 Max. :0.19013 Max. :0.21865 Max. :0.19947
## NA's :17 NA's :18 NA's :19 NA's :20
## CRO GRA ITB MAL
## Min. :0.02801 Min. :0.04833 Min. :0.05117 Min. :0.01902
## 1st Qu.:0.05670 1st Qu.:0.05786 1st Qu.:0.06880 1st Qu.:0.03881
## Median :0.06688 Median :0.07433 Median :0.08339 Median :0.05639
## Mean :0.07571 Mean :0.07941 Mean :0.09148 Mean :0.05939
## 3rd Qu.:0.09042 3rd Qu.:0.09380 3rd Qu.:0.10154 3rd Qu.:0.07280
## Max. :0.17399 Max. :0.18378 Max. :0.22018 Max. :0.16306
## NA's :21 NA's :22 NA's :23 NA's :24
## SPM TUA TUH ALD
## Min. :0.006964 Min. :0.02906 Min. :0.03621 Min. :0.02222
## 1st Qu.:0.045323 1st Qu.:0.05758 1st Qu.:0.04658 1st Qu.:0.08246
## Median :0.056999 Median :0.07472 Median :0.07008 Median :0.09953
## Mean :0.067238 Mean :0.08082 Mean :0.07368 Mean :0.10658
## 3rd Qu.:0.082719 3rd Qu.:0.09884 3rd Qu.:0.08475 3rd Qu.:0.12488
## Max. :0.211535 Max. :0.20012 Max. :0.17513 Max. :0.21635
## NA's :25 NA's :26 NA's :27 NA's :28
## FRS ITP POP ROS
## Min. :0.03816 Min. :0.02283 Min. :0.03474 Min. :0.05727
## 1st Qu.:0.04250 1st Qu.:0.04174 1st Qu.:0.05124 1st Qu.:0.08888
## Median :0.06173 Median :0.06601 Median :0.05630 Median :0.09642
## Mean :0.06881 Mean :0.07004 Mean :0.06996 Mean :0.10679
## 3rd Qu.:0.07313 3rd Qu.:0.07749 3rd Qu.:0.07207 3rd Qu.:0.10830
## Max. :0.16475 Max. :0.17488 Max. :0.16561 Max. :0.20116
## NA's :29 NA's :30 NA's :31 NA's :32
## SER SLO SPC SPB
## Min. :0.1539 Min. :0.04114 Min. :0.00677 Min. :0.05962
## 1st Qu.:0.1757 1st Qu.:0.05356 1st Qu.:0.06874 1st Qu.:0.07442
## Median :0.1885 Median :0.05762 Median :0.08788 Median :0.08698
## Mean :0.1932 Mean :0.06054 Mean :0.08103 Mean :0.08964
## 3rd Qu.:0.2100 3rd Qu.:0.06235 3rd Qu.:0.10608 3rd Qu.:0.10220
## Max. :0.2387 Max. :0.09073 Max. :0.13571 Max. :0.12500
## NA's :33 NA's :34 NA's :35 NA's :36
## SPS ARM ALV ITR
## Min. :0.07023 Min. :0.06867 Min. :0.05863 Min. : NA
## 1st Qu.:0.09195 1st Qu.:0.07298 1st Qu.:0.05863 1st Qu.: NA
## Median :0.11366 Median :0.07730 Median :0.05863 Median : NA
## Mean :0.09958 Mean :0.07730 Mean :0.05863 Mean :NaN
## 3rd Qu.:0.11425 3rd Qu.:0.08161 3rd Qu.:0.05863 3rd Qu.: NA
## Max. :0.11484 Max. :0.08593 Max. :0.05863 Max. : NA
## NA's :37 NA's :38 NA's :39 NA's :40
If you want you can save the fst values as csv.
# Save it
write.csv(LD2_df, file = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/LD2_df.csv"))
Check the Fst values
## SOC SEV GES KER KRA TIK RAR TRE ALU STS
## SOC NA NA NA NA NA NA NA NA NA NA
## SEV 0.045552200 NA NA NA NA NA NA NA NA NA
## GES 0.011806713 0.05534614 NA NA NA NA NA NA NA NA
## KER 0.045624567 0.06766476 0.05379764 NA NA NA NA NA NA NA
## KRA 0.007012644 0.03129251 0.01664437 0.03044281 NA NA NA NA NA NA
## TIK 0.057181751 0.07990133 0.05211205 0.07167703 0.03563554 NA NA NA NA NA
## SIC BRE DES CES TIR IMP ROM GRC BAR BUL CRO GRA ITB MAL SPM TUA TUH ALD FRS
## SOC NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## SEV NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## GES NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## KER NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## KRA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## TIK NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## ITP POP ROS SER SLO SPC SPB SPS ARM ALV ITR
## SOC NA NA NA NA NA NA NA NA NA NA NA
## SEV NA NA NA NA NA NA NA NA NA NA NA
## GES NA NA NA NA NA NA NA NA NA NA NA
## KER NA NA NA NA NA NA NA NA NA NA NA
## KRA NA NA NA NA NA NA NA NA NA NA NA
## TIK NA NA NA NA NA NA NA NA NA NA NA
Convert the data into a matrix.
## SOC SEV GES KER KRA TIK
## SOC NA 0.04555220 0.01180671 0.04562457 0.007012644 0.05718175
## SEV 0.045552200 NA 0.05534614 0.06766476 0.031292513 0.07990133
## GES 0.011806713 0.05534614 NA 0.05379764 0.016644374 0.05211205
## KER 0.045624567 0.06766476 0.05379764 NA 0.030442814 0.07167703
## KRA 0.007012644 0.03129251 0.01664437 0.03044281 NA 0.03563554
## TIK 0.057181751 0.07990133 0.05211205 0.07167703 0.035635540 NA
## RAR TRE ALU STS SIC BRE DES
## SOC 0.02146036 0.09313894 0.06849806 0.10571972 0.11554876 0.1739014 0.1604400
## SEV 0.05349749 0.11338435 0.09532910 0.12661620 0.13880220 0.1942978 0.1809121
## GES 0.02883972 0.09789464 0.08423622 0.10896011 0.12121523 0.1777287 0.1653767
## KER 0.05216969 0.07079005 0.06547083 0.08191323 0.09665681 0.1507775 0.1370573
## KRA 0.01117963 0.06857930 0.05392869 0.08213222 0.09068031 0.1479356 0.1356648
## TIK 0.05273252 0.11033213 0.08909356 0.12253683 0.13577321 0.1902386 0.1766860
## CES TIR IMP ROM GRC BAR BUL
## SOC 0.2035848 0.12595232 0.12040973 0.10473003 0.11390767 0.1342407 0.1259630
## SEV 0.2237784 0.15228764 0.14817276 0.13069532 0.13271938 0.1566361 0.1478450
## GES 0.2077032 0.13293266 0.12751597 0.10999453 0.11954040 0.1399889 0.1311498
## KER 0.1781829 0.10983965 0.09348153 0.08718079 0.09922120 0.1219008 0.1092024
## KRA 0.1783668 0.09963636 0.09342591 0.07912006 0.08852899 0.1098279 0.1018435
## TIK 0.2206370 0.15084214 0.14563127 0.12846942 0.13162156 0.1512489 0.1460110
## CRO GRA ITB MAL SPM TUA TUH
## SOC 0.11285209 0.10389647 0.1321288 0.09253993 0.11126944 0.11525321 0.10366569
## SEV 0.13446449 0.12468818 0.1565682 0.11566729 0.13877342 0.13632772 0.12331465
## GES 0.11881055 0.10942797 0.1379516 0.09895866 0.11908360 0.12052592 0.10931793
## KER 0.09867196 0.09173843 0.1091228 0.07477024 0.08913167 0.10089836 0.08500275
## KRA 0.09054381 0.08042505 0.1055944 0.07104441 0.08321229 0.08881825 0.07810616
## TIK 0.13095324 0.12225653 0.1554350 0.11274247 0.13430953 0.13345426 0.12082138
## ALD FRS ITP POP ROS SER SLO
## SOC 0.1380274 0.10079113 0.09753798 0.09704470 0.1306794 0.2249183 0.08859806
## SEV 0.1589880 0.12036408 0.12098394 0.11863814 0.1517054 0.2495944 0.10848958
## GES 0.1437654 0.10515731 0.10354928 0.10227705 0.1357736 0.2313209 0.09341733
## KER 0.1242578 0.07724840 0.07881705 0.07343890 0.1090094 0.2046692 0.07183871
## KRA 0.1134517 0.07524388 0.07279459 0.07265533 0.1045599 0.1954888 0.06240033
## TIK 0.1572443 0.11835137 0.11890137 0.11648063 0.1472208 0.2471006 0.10528389
## SPC SPB SPS ARM ALV ITR
## SOC 0.12461422 0.11407133 0.1477816 0.04422243 0.11593610 0.09554091
## SEV 0.15272739 0.13981094 0.1702386 0.07107957 0.13792904 0.11634052
## GES 0.13137300 0.11978821 0.1545255 0.05484410 0.12252625 0.10032352
## KER 0.10189184 0.09283876 0.1304073 0.04004856 0.10166083 0.07891624
## KRA 0.09834158 0.08996602 0.1225046 0.01855860 0.09369755 0.07150888
## TIK 0.14579627 0.13577266 0.1681516 0.06652363 0.13457140 0.11344720
Import sample locations
sampling_loc <- readRDS(here("output", "sampling_loc_euro_global.rds"))
# Arrange by region
sampling_loc <- sampling_loc |>
dplyr::arrange(
order
)
# Check it
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
Order
## [1] "BER" "COL" "PAL" "HOU" "LOS" "MAU" "REC" "GRV" "FRS" "STS" "POP" "POL"
## [13] "SPB" "SPS" "SPC" "BAR" "SPM" "IMP" "ITG" "BRE" "DES" "TRE" "ITB" "CES"
## [25] "ROM" "ITR" "SIC" "ITP" "MAL" "SLO" "CRO" "ALV" "ALD" "TIR" "SER" "GRA"
## [37] "GRC" "ROS" "BUL" "TUA" "TUH" "SEV" "ALU" "KER" "KRA" "SOC" "TIK" "RAR"
## [49] "GES" "ARM" "KAN" "UTS" "KAG" "OKI" "HAI" "YUN" "HUN" "TAI" "GEL" "BEN"
## [61] "KUN" "KAT" "JAF" "CAM" "SUF" "SUU" "INW" "INJ" "KLP" "MAT" "SSK" "KAC"
## [73] "SON" "CHA" "LAM" "HAN" "HOC" "QNC" "LIB" "MAD" "TRO" "DAU" "JAT" "YAT"
## [85] "GAB" "ANT" "DGV" "VOH" "RAB" "YAO" "AWK" "BRM" "JAM" "SAI" "BEA" "CHI"
## [97] "DAL" "FAY" "MAC" "MAN" "NEO" "NEW" "NUE" "PEO" "RUS" "SPR" "POR" "NOV"
## [109] "TUC" "MED" "AIZ" "HIR" "KHO" "KYO" "NAG" "NIG" "SAG" "SAK" "SEN" "TAN"
LD2_df_ordered <- read_csv(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/LD2_df_odered.csv"
)
## New names:
## Rows: 40 Columns: 41
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (40): FRS, STS, POP, SPB, SPS, SPC, BAR, SPM, IMP, BRE, DES, TRE,
## ITB, C...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
Below does not change order Create vector with order of populations
# Extract the populations that appear in LD2_df
populations_in_LD2 <- colnames(LD2_df_ordered)
# Reorder the populations based on order_pops
poporder <- populations_in_LD2[populations_in_LD2 %in% order_pops]
#LD2_df[match(poporder, LD2_df$Abbreviation),] #this also doesn't reorder it
# Print the reordered populations
print(poporder)
## [1] "FRS" "STS" "POP" "SPB" "SPS" "SPC" "BAR" "SPM" "IMP" "BRE" "DES" "TRE"
## [13] "ITB" "CES" "ROM" "ITR" "SIC" "ITP" "MAL" "SLO" "CRO" "ALV" "ALD" "TIR"
## [25] "SER" "GRA" "GRC" "ROS" "BUL" "TUA" "TUH" "SEV" "ALU" "KER" "KRA" "SOC"
## [37] "TIK" "RAR" "GES" "ARM"
Lets check if the matrix is symmetric.
## [1] TRUE
Order the matrix using poporder. We will also add NA on the upper left side of the matrix.
Now we have to convert the matrix to a data frame to plot it with ggplot.
## Var1 Var2 value
## FRS : 40 FRS : 40 Min. :-0.0010
## STS : 40 STS : 40 1st Qu.: 0.0665
## POP : 40 POP : 40 Median : 0.0931
## SPB : 40 SPB : 40 Mean : 0.0991
## SPS : 40 SPS : 40 3rd Qu.: 0.1243
## SPC : 40 SPC : 40 Max. : 0.2813
## (Other):1360 (Other):1360 NA's :820
Now lets plot the data with ggplot. You can click in the little square on the top left of the plot to open it on a new window. It will have the right proportions.
pairfst.f <- ggplot(pairfst.long, aes(Var1, Var2)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(
low = "white",
high = "#71b6ff",
name = "Fst",
na.value = "white",
limits = c(0, 0.5)
) +
scale_x_discrete(position = "top") +
theme_bw() +
geom_text(aes(label = ifelse(
is.na(value), "", formatC(value, digits = 2, format = "f")
)), size = 2) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.y = element_text(hjust = 0)
)
pairfst.f
Save it
# Step 1: Map abbreviation to country
abbreviation_to_country <- sampling_loc %>% dplyr::select(Abbreviation, Country)
# Step 2: Calculate mean Fst for each pair of countries
# Convert the matrix to a data frame and add row names as a new column
fst_df <- as.data.frame(as.matrix(LD2_df))
fst_df$Abbreviation1 <- rownames(fst_df)
# Gather columns into rows
fst_long <- fst_df %>% gather(key = "Abbreviation2", value = "Fst", -Abbreviation1)
# Merge with country mapping
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation1", by.y = "Abbreviation")
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation2", by.y = "Abbreviation", suffixes = c("_1", "_2"))
# Calculate mean Fst for each pair of countries
fst_summary <- fst_long %>%
group_by(Country_1, Country_2) %>%
summarize(Mean_Fst = mean(Fst, na.rm = TRUE), .groups = 'drop') %>%
filter(Country_1 != Country_2)
#save the fst values as csv
write.csv(fst_summary, file = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/fst_summary1.csv"))
fst_summary_ordered <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/fst_summary1_ordered.txt")
#fst_summary_ordered <- data.frame(fst_summary_ordered)
fst_summary_ordered <- read.table(fst_summary_ordered,
header=TRUE)
# Convert summary back to a matrix form, avoiding the use of tibbles for row names
fst_matrix_summary <- as.data.frame(spread(fst_summary_ordered, key = Country_2, value = Mean_Fst))
rownames(fst_matrix_summary) <- fst_matrix_summary$Country_1
fst_matrix_summary <- fst_matrix_summary[, -1]
fst_matrix_summary <- as.matrix(fst_matrix_summary)
# Make the matrix symmetric by averaging the off-diagonal elements
symmetric_fst_matrix <- matrix(nrow = nrow(fst_matrix_summary), ncol = ncol(fst_matrix_summary))
rownames(symmetric_fst_matrix) <- rownames(fst_matrix_summary)
colnames(symmetric_fst_matrix) <- colnames(fst_matrix_summary)
for(i in 1:nrow(fst_matrix_summary)) {
for(j in i:nrow(fst_matrix_summary)) {
if (i == j) {
symmetric_fst_matrix[i, j] <- fst_matrix_summary[i, j]
} else {
avg_value <- mean(c(fst_matrix_summary[i, j], fst_matrix_summary[j, i]), na.rm = TRUE)
symmetric_fst_matrix[i, j] <- avg_value
symmetric_fst_matrix[j, i] <- avg_value
}
}
}
# Check if the matrix is symmetric
# print(isSymmetric(symmetric_fst_matrix))
# Your symmetric Fst matrix by country is now in symmetric_fst_matrix
print(symmetric_fst_matrix)
## Albania Armenia Bulgaria Croatia France Georgia
## Albania NA 0.09231399 0.10160985 0.02884265 0.08188077 0.13307477
## Armenia 0.09231399 NA 0.09402725 0.08299684 0.06455227 0.05484410
## Bulgaria 0.10160985 0.09402725 NA 0.08739974 0.07533960 0.13114976
## Croatia 0.02884265 0.08299684 0.08739974 NA 0.06454581 0.11881055
## France 0.08188077 0.06455227 0.07533960 0.06454581 NA 0.10705871
## Georgia 0.13307477 0.05484410 0.13114976 0.11881055 0.10705871 NA
## Greece 0.06106728 0.07997747 0.09085677 0.05464731 0.07011300 0.11448419
## Italy 0.09996591 0.08046541 0.09077892 0.08415611 0.07080554 0.13492534
## Malta 0.07007296 0.06246256 0.05837692 0.05617341 0.03917767 0.09895866
## Portugal 0.07553921 0.05447484 0.06655260 0.06430580 0.04306543 0.10227705
## Romania 0.10978487 0.09224141 0.10230543 0.09812481 0.07432071 0.13577364
## Russia 0.12792279 0.04538750 0.12692205 0.11359257 0.10317288 0.02216938
## Serbia 0.20158615 0.18849691 0.19946824 0.17399103 0.17021355 0.23132092
## Slovenia 0.06627041 0.05313194 0.06563008 0.05813764 0.04427634 0.09341733
## Spain 0.10947480 0.08133491 0.10628290 0.09472900 0.07205435 0.13295185
## Turkey 0.08085839 0.07780083 0.07721910 0.07165525 0.06093551 0.11492193
## Ukraine 0.12214703 0.05856725 0.12052216 0.10875348 0.09514142 0.06218153
## Greece Italy Malta Portugal Romania Russia
## Albania 0.06106728 0.09996591 0.07007296 0.07553921 0.10978487 0.12792279
## Armenia 0.07997747 0.08046541 0.06246256 0.05447484 0.09224141 0.04538750
## Bulgaria 0.09085677 0.09077892 0.05837692 0.06655260 0.10230543 0.12692205
## Croatia 0.05464731 0.08415611 0.05617341 0.06430580 0.09812481 0.11359257
## France 0.07011300 0.07080554 0.03917767 0.04306543 0.07432071 0.10317288
## Georgia 0.11448419 0.13492534 0.09895866 0.10227705 0.13577364 0.02216938
## Greece NA 0.08772003 0.05425446 0.06662748 0.09981326 0.10892420
## Italy 0.08772003 NA 0.04668323 0.04895747 0.09078298 0.13062252
## Malta 0.05425446 0.04668323 NA 0.03008816 0.07279516 0.09457124
## Portugal 0.06662748 0.04895747 0.03008816 NA 0.07206916 0.09820792
## Romania 0.09981326 0.09078298 0.07279516 0.07206916 NA 0.13035972
## Russia 0.10892420 0.13062252 0.09457124 0.09820792 0.13035972 NA
## Serbia 0.18695824 0.19097858 0.16306302 0.16561275 0.20116330 0.22546408
## Slovenia 0.05637978 0.05224372 0.03900747 0.03651465 0.05726610 0.08821314
## Spain 0.09375979 0.08986473 0.07238616 0.06471743 0.10652495 0.12772002
## Turkey 0.07160055 0.07018211 0.04627632 0.05506548 0.09143886 0.10920612
## Ukraine 0.10325455 0.09842950 0.08717645 0.09011392 0.12408803 0.05788560
## Serbia Slovenia Spain Turkey Ukraine
## Albania 0.2015861 0.06627041 0.10947480 0.08085839 0.12214703
## Armenia 0.1884969 0.05313194 0.08133491 0.07780083 0.05856725
## Bulgaria 0.1994682 0.06563008 0.10628290 0.07721910 0.12052216
## Croatia 0.1739910 0.05813764 0.09472900 0.07165525 0.10875348
## France 0.1702136 0.04427634 0.07205435 0.06093551 0.09514142
## Georgia 0.2313209 0.09341733 0.13295185 0.11492193 0.06218153
## Greece 0.1869582 0.05637978 0.09375979 0.07160055 0.10325455
## Italy 0.1909786 0.05224372 0.08986473 0.07018211 0.09842950
## Malta 0.1630630 0.03900747 0.07238616 0.04627632 0.08717645
## Portugal 0.1656127 0.03651465 0.06471743 0.05506548 0.09011392
## Romania 0.2011633 0.05726610 0.10652495 0.09143886 0.12408803
## Russia 0.2254641 0.08821314 0.12772002 0.10920612 0.05788560
## Serbia NA 0.15393428 0.21730466 0.18762268 0.21784855
## Slovenia 0.1539343 NA 0.06472219 0.05217992 0.08222846
## Spain 0.2173047 0.06472219 NA 0.07187648 0.12136587
## Turkey 0.1876227 0.05217992 0.07187648 NA 0.10307601
## Ukraine 0.2178485 0.08222846 0.12136587 0.10307601 NA
# Read the file
country_order <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/country_order.txt")
country_order <- read.table(country_order,
header = FALSE,
col.names = c("country"))
order_countries <- as.vector(country_order$country)
order_countries
## [1] "France" "Portugal" "Spain" "Italy" "Malta" "Slovenia"
## [7] "Croatia" "Albania" "Serbia" "Greece" "Romania" "Bulgaria"
## [13] "Turkey" "Ukraine" "Russia" "Georgia" "Armenia"
write.csv(symmetric_fst_matrix, file = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/symmetric_fst_matrix.csv"))
symmetric_fst_matrix_ordered <- read_csv(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/symmetric_fst_matrix_ordered.csv"
)
## New names:
## Rows: 17 Columns: 18
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (17): France, Portugal, Spain, Italy, Malta, Slovenia, Croatia,
## Albania,...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
symmetric_fst_matrix_ordered<- as.data.frame(symmetric_fst_matrix_ordered) #convert to dataframe
rownames(symmetric_fst_matrix_ordered) <- symmetric_fst_matrix_ordered$...1 #use first column as rownames
symmetric_fst_matrix_ordered <- symmetric_fst_matrix_ordered[ -c(1) ] #remove 1st column
symmetric_fst_matrix_ordered <-as.matrix(symmetric_fst_matrix_ordered) #convert to matrix
symmetric_fst_matrix_ordered[lower.tri(symmetric_fst_matrix_ordered)] <- NA
print(symmetric_fst_matrix_ordered)
## France Portugal Spain Italy Malta Slovenia
## France NA 0.04306543 0.07205435 0.07080554 0.03917767 0.04427634
## Portugal NA NA 0.06471743 0.04895747 0.03008816 0.03651465
## Spain NA NA NA 0.08986473 0.07238616 0.06472219
## Italy NA NA NA NA 0.04668323 0.05224372
## Malta NA NA NA NA NA 0.03900747
## Slovenia NA NA NA NA NA NA
## Croatia NA NA NA NA NA NA
## Albania NA NA NA NA NA NA
## Serbia NA NA NA NA NA NA
## Greece NA NA NA NA NA NA
## Romania NA NA NA NA NA NA
## Bulgaria NA NA NA NA NA NA
## Turkey NA NA NA NA NA NA
## Ukraine NA NA NA NA NA NA
## Russia NA NA NA NA NA NA
## Georgia NA NA NA NA NA NA
## Armenia NA NA NA NA NA NA
## Croatia Albania Serbia Greece Romania Bulgaria
## France 0.06454582 0.08188078 0.1702136 0.07011300 0.07432071 0.07533960
## Portugal 0.06430580 0.07553921 0.1656127 0.06662748 0.07206916 0.06655260
## Spain 0.09472900 0.10947480 0.2173047 0.09375979 0.10652494 0.10628290
## Italy 0.08415611 0.09996591 0.1909786 0.08772003 0.09078298 0.09077892
## Malta 0.05617341 0.07007296 0.1630630 0.05425446 0.07279516 0.05837692
## Slovenia 0.05813764 0.06627041 0.1539343 0.05637978 0.05726610 0.06563008
## Croatia NA 0.02884265 0.1739910 0.05464731 0.09812481 0.08739974
## Albania NA NA 0.2015861 0.06106728 0.10978487 0.10160985
## Serbia NA NA NA 0.18695824 0.20116330 0.19946824
## Greece NA NA NA NA 0.09981326 0.09085677
## Romania NA NA NA NA NA 0.10230543
## Bulgaria NA NA NA NA NA NA
## Turkey NA NA NA NA NA NA
## Ukraine NA NA NA NA NA NA
## Russia NA NA NA NA NA NA
## Georgia NA NA NA NA NA NA
## Armenia NA NA NA NA NA NA
## Turkey Ukraine Russia Georgia Armenia
## France 0.06093551 0.09514142 0.10317288 0.10705871 0.06455227
## Portugal 0.05506548 0.09011392 0.09820792 0.10227705 0.05447484
## Spain 0.07187648 0.12136587 0.12772002 0.13295185 0.08133491
## Italy 0.07018211 0.09842950 0.13062252 0.13492534 0.08046541
## Malta 0.04627632 0.08717645 0.09457124 0.09895866 0.06246256
## Slovenia 0.05217992 0.08222846 0.08821314 0.09341733 0.05313194
## Croatia 0.07165525 0.10875348 0.11359257 0.11881055 0.08299684
## Albania 0.08085839 0.12214703 0.12792279 0.13307477 0.09231399
## Serbia 0.18762268 0.21784855 0.22546408 0.23132092 0.18849691
## Greece 0.07160055 0.10325455 0.10892420 0.11448419 0.07997747
## Romania 0.09143886 0.12408803 0.13035972 0.13577364 0.09224141
## Bulgaria 0.07721910 0.12052216 0.12692205 0.13114976 0.09402725
## Turkey NA 0.10307601 0.10920612 0.11492193 0.07780083
## Ukraine NA NA 0.05788560 0.06218153 0.05856725
## Russia NA NA NA 0.02216938 0.04538750
## Georgia NA NA NA NA 0.05484410
## Armenia NA NA NA NA NA
Now we have to convert the matrix to a data frame to plot it with ggplot.
## Var1 Var2 value
## France : 17 France : 17 Min. :0.02217
## Portugal: 17 Portugal: 17 1st Qu.:0.06468
## Spain : 17 Spain : 17 Median :0.08904
## Italy : 17 Italy : 17 Mean :0.09569
## Malta : 17 Malta : 17 3rd Qu.:0.11074
## Slovenia: 17 Slovenia: 17 Max. :0.23132
## (Other) :187 (Other) :187 NA's :153
You can click in the little square on the top left of the plot to open it on a new window. It will have the right proportions.
pairfst.f2 <- ggplot(pairfst.long2, aes(Var1, Var2)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(
low = "white",
high = "#71b6ff",
name = "Fst",
na.value = "white",
limits = c(0, 0.5)
) +
scale_x_discrete(position = "top") +
theme_bw() +
geom_text(aes(label = ifelse(
is.na(value), "", formatC(value, digits = 2, format = "f")
)), size = 3) +
theme(
axis.text.x = element_text(angle = 90, hjust = 0),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.y = element_text(hjust = 1)
)
pairfst.f2
ggsave(
filename = here("output", "europe", "fst", "fst_matrix_Europe_LD2_by_country_ordered.pdf"),
pairfst.f2,
width = 6,
height = 5,
units = "in"
)
Remove NAs and rename columns
# remove NAs
fst2 <-
pairfst.long |>
drop_na()
# rename columns
fst2 <-
fst2 |>
dplyr::rename(pop1 = 1,
pop2 = 2,
fst = 3)
# Split the data into two data frames, one for pop1 and one for pop2
df_pop1 <- fst2 |>
dplyr::select(pop = pop1, fst)
df_pop2 <- fst2 |>
dplyr::select(pop = pop2, fst)
# Combine the two data frames
df_combined <- bind_rows(df_pop1, df_pop2)
# Calculate the mean fst for each population
mean_fst <- df_combined |>
group_by(pop) |>
summarise(mean_fst = mean(fst))
print(mean_fst)
## # A tibble: 40 × 2
## pop mean_fst
## <fct> <dbl>
## 1 FRS 0.0746
## 2 STS 0.0789
## 3 POP 0.0704
## 4 SPB 0.0897
## 5 SPS 0.120
## 6 SPC 0.100
## 7 BAR 0.114
## 8 SPM 0.0810
## 9 IMP 0.0934
## 10 BRE 0.144
## # ℹ 30 more rows
Merge
fst3 <-
sampling_loc |>
left_join(
mean_fst,
by = c("Abbreviation" = "pop")
) |>
drop_na() |>
dplyr::select(
-Region
)
head(fst3)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## Subregion order order2 orderold mean_fst
## 1 West Europe 9 1 1 0.07462900
## 2 West Europe 10 2 2 0.07891003
## 3 West Europe 11 3 3 0.07039614
## 4 West Europe 13 5 5 0.08966956
## 5 West Europe 14 6 6 0.11997670
## 6 West Europe 15 7 7 0.10001777
Mean by region
# Group by Region and calculate the mean_fst by Region
region_means <- fst3 |>
group_by(Subregion) |>
summarize(mean_fst_by_region = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_region column to the fst3 tibble
fst3 <- fst3 |>
left_join(region_means, by = "Subregion")
# Print the modified fst3 tibble
print(fst3)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## 7 Barcelona Spain 41.38510 2.173400 Europe BAR 2018
## 8 Magaluf Spain 39.50679 2.530729 Europe SPM 2017
## 9 Imperia Italy 43.87159 8.003559 Europe IMP 2017
## 10 Brescia Italy 45.53373 10.204450 Europe BRE 1995
## 11 Desenzano Italy 45.46289 10.549140 Europe DES 1995
## 12 Trentino Italy 46.05917 11.117220 Europe TRE 2020
## 13 Bologna Italy 44.48478 11.366584 Europe ITB 2017
## 14 Cesena Italy 44.15287 12.244265 Europe CES 1995
## 15 Rome (Sapienza) Italy 41.90215 12.517399 Europe ROM 2017
## 16 Rome (Trappola) Italy 41.90215 12.517399 Europe ITR 2013
## 17 Sicilia Italy 38.23294 15.550877 Europe SIC 2016
## 18 Puglia Italy 41.12213 16.844107 Europe ITP 2016
## 19 Luqa Malta 35.86053 14.487028 Europe MAL 2019
## 20 Ajdovscina Slovenia 45.88715 13.770997 Europe SLO 2017
## 21 Dubrovnik Croatia 42.60654 18.226612 Europe CRO 2017
## 22 Vlore Albania 40.46600 19.489700 Europe ALV 2020
## 23 Durres Albania 41.29704 19.503734 Europe ALD 2018
## 24 Tirana Albania 41.31473 19.831716 Europe TIR 2017
## 25 Novi Sad Serbia 45.25887 19.818778 Europe SER 2019
## 26 Athens Greece 37.93719 23.946883 Europe GRA 2019
## 27 Chania Greece 35.51448 24.017960 Europe GRC 2019
## 28 Satu Mare Romania 47.79147 22.890202 Europe ROS 2020
## 29 Lom Bulgaria 43.80489 23.236340 Europe BUL 2019
## 30 Aliaga Turkey 38.76390 26.944800 Europe TUA 2019
## 31 Hopa Turkey 41.38760 41.437800 Europe TUH 2019
## 32 Sevastopol, Crimea Ukraine 44.54125 33.514005 Europe SEV 2021
## 33 Alushta Ukraine 44.68289 34.403681 Europe ALU 2021
## 34 Kerch, Crimea Ukraine 45.35246 36.470150 Europe KER 2021
## 35 Krasnodar Russia 44.95504 39.027817 Europe KRA 2017
## 36 Sochi Russia 43.60042 39.745328 Europe SOC 2021
## 37 Tikhoretsk Russia 45.85460 40.125600 Europe TIK 2021
## 38 Armavir Russia 44.96034 41.133057 Europe RAR 2021
## 39 Sakhumi, Abkhazia Georgia 43.07851 40.887588 Europe GES 2021
## 40 Ijevan Armenia 40.87971 45.147640 Europe ARM 2020
## Subregion order order2 orderold mean_fst mean_fst_by_region
## 1 West Europe 9 1 1 0.07462900 0.1
## 2 West Europe 10 2 2 0.07891003 0.1
## 3 West Europe 11 3 3 0.07039614 0.1
## 4 West Europe 13 5 5 0.08966956 0.1
## 5 West Europe 14 6 6 0.11997670 0.1
## 6 West Europe 15 7 7 0.10001777 0.1
## 7 West Europe 16 8 8 0.11361455 0.1
## 8 West Europe 17 9 9 0.08101992 0.1
## 9 West Europe 18 10 10 0.09338559 0.1
## 10 West Europe 20 12 12 0.14417917 0.1
## 11 West Europe 21 13 13 0.12864170 0.1
## 12 West Europe 22 14 14 0.06256140 0.1
## 13 West Europe 23 15 15 0.10181636 0.1
## 14 West Europe 24 16 16 0.17560017 0.1
## 15 West Europe 25 17 17 0.07923011 0.1
## 16 West Europe 26 18 18 0.07069860 0.1
## 17 West Europe 27 19 19 0.08872802 0.1
## 18 West Europe 28 20 20 0.07094679 0.1
## 19 West Europe 29 21 21 0.06875044 0.1
## 20 East Europe 30 22 22 0.06683635 0.1
## 21 East Europe 31 23 23 0.08693351 0.1
## 22 East Europe 32 24 24 0.09045198 0.1
## 23 East Europe 33 25 25 0.11233441 0.1
## 24 East Europe 34 26 26 0.09810811 0.1
## 25 East Europe 35 27 27 0.20341532 0.1
## 26 East Europe 36 28 28 0.08644638 0.1
## 27 East Europe 37 29 29 0.09177586 0.1
## 28 East Europe 38 30 30 0.10597770 0.1
## 29 East Europe 39 31 31 0.10187716 0.1
## 30 East Europe 40 32 32 0.09142387 0.1
## 31 East Europe 41 33 33 0.07873389 0.1
## 32 East Europe 42 34 34 0.12875447 0.1
## 33 East Europe 43 35 35 0.09618709 0.1
## 34 East Europe 44 36 36 0.09378027 0.1
## 35 East Europe 45 37 37 0.08335369 0.1
## 36 East Europe 46 38 38 0.10579596 0.1
## 37 East Europe 47 39 39 0.12649180 0.1
## 38 East Europe 48 40 40 0.11384850 0.1
## 39 East Europe 49 41 41 0.11167952 0.1
## 40 East Europe 50 42 42 0.07881133 0.1
Mean By country
# Group by Country and calculate the mean_fst by Country
country_means <- fst3 |>
group_by(Country) |>
summarize(mean_fst_by_country = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_country column to the fst3 tibble
fst3 <- fst3 |>
left_join(country_means, by = "Country")
# Print the modified fst3 tibble
print(fst3)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## 7 Barcelona Spain 41.38510 2.173400 Europe BAR 2018
## 8 Magaluf Spain 39.50679 2.530729 Europe SPM 2017
## 9 Imperia Italy 43.87159 8.003559 Europe IMP 2017
## 10 Brescia Italy 45.53373 10.204450 Europe BRE 1995
## 11 Desenzano Italy 45.46289 10.549140 Europe DES 1995
## 12 Trentino Italy 46.05917 11.117220 Europe TRE 2020
## 13 Bologna Italy 44.48478 11.366584 Europe ITB 2017
## 14 Cesena Italy 44.15287 12.244265 Europe CES 1995
## 15 Rome (Sapienza) Italy 41.90215 12.517399 Europe ROM 2017
## 16 Rome (Trappola) Italy 41.90215 12.517399 Europe ITR 2013
## 17 Sicilia Italy 38.23294 15.550877 Europe SIC 2016
## 18 Puglia Italy 41.12213 16.844107 Europe ITP 2016
## 19 Luqa Malta 35.86053 14.487028 Europe MAL 2019
## 20 Ajdovscina Slovenia 45.88715 13.770997 Europe SLO 2017
## 21 Dubrovnik Croatia 42.60654 18.226612 Europe CRO 2017
## 22 Vlore Albania 40.46600 19.489700 Europe ALV 2020
## 23 Durres Albania 41.29704 19.503734 Europe ALD 2018
## 24 Tirana Albania 41.31473 19.831716 Europe TIR 2017
## 25 Novi Sad Serbia 45.25887 19.818778 Europe SER 2019
## 26 Athens Greece 37.93719 23.946883 Europe GRA 2019
## 27 Chania Greece 35.51448 24.017960 Europe GRC 2019
## 28 Satu Mare Romania 47.79147 22.890202 Europe ROS 2020
## 29 Lom Bulgaria 43.80489 23.236340 Europe BUL 2019
## 30 Aliaga Turkey 38.76390 26.944800 Europe TUA 2019
## 31 Hopa Turkey 41.38760 41.437800 Europe TUH 2019
## 32 Sevastopol, Crimea Ukraine 44.54125 33.514005 Europe SEV 2021
## 33 Alushta Ukraine 44.68289 34.403681 Europe ALU 2021
## 34 Kerch, Crimea Ukraine 45.35246 36.470150 Europe KER 2021
## 35 Krasnodar Russia 44.95504 39.027817 Europe KRA 2017
## 36 Sochi Russia 43.60042 39.745328 Europe SOC 2021
## 37 Tikhoretsk Russia 45.85460 40.125600 Europe TIK 2021
## 38 Armavir Russia 44.96034 41.133057 Europe RAR 2021
## 39 Sakhumi, Abkhazia Georgia 43.07851 40.887588 Europe GES 2021
## 40 Ijevan Armenia 40.87971 45.147640 Europe ARM 2020
## Subregion order order2 orderold mean_fst mean_fst_by_region
## 1 West Europe 9 1 1 0.07462900 0.1
## 2 West Europe 10 2 2 0.07891003 0.1
## 3 West Europe 11 3 3 0.07039614 0.1
## 4 West Europe 13 5 5 0.08966956 0.1
## 5 West Europe 14 6 6 0.11997670 0.1
## 6 West Europe 15 7 7 0.10001777 0.1
## 7 West Europe 16 8 8 0.11361455 0.1
## 8 West Europe 17 9 9 0.08101992 0.1
## 9 West Europe 18 10 10 0.09338559 0.1
## 10 West Europe 20 12 12 0.14417917 0.1
## 11 West Europe 21 13 13 0.12864170 0.1
## 12 West Europe 22 14 14 0.06256140 0.1
## 13 West Europe 23 15 15 0.10181636 0.1
## 14 West Europe 24 16 16 0.17560017 0.1
## 15 West Europe 25 17 17 0.07923011 0.1
## 16 West Europe 26 18 18 0.07069860 0.1
## 17 West Europe 27 19 19 0.08872802 0.1
## 18 West Europe 28 20 20 0.07094679 0.1
## 19 West Europe 29 21 21 0.06875044 0.1
## 20 East Europe 30 22 22 0.06683635 0.1
## 21 East Europe 31 23 23 0.08693351 0.1
## 22 East Europe 32 24 24 0.09045198 0.1
## 23 East Europe 33 25 25 0.11233441 0.1
## 24 East Europe 34 26 26 0.09810811 0.1
## 25 East Europe 35 27 27 0.20341532 0.1
## 26 East Europe 36 28 28 0.08644638 0.1
## 27 East Europe 37 29 29 0.09177586 0.1
## 28 East Europe 38 30 30 0.10597770 0.1
## 29 East Europe 39 31 31 0.10187716 0.1
## 30 East Europe 40 32 32 0.09142387 0.1
## 31 East Europe 41 33 33 0.07873389 0.1
## 32 East Europe 42 34 34 0.12875447 0.1
## 33 East Europe 43 35 35 0.09618709 0.1
## 34 East Europe 44 36 36 0.09378027 0.1
## 35 East Europe 45 37 37 0.08335369 0.1
## 36 East Europe 46 38 38 0.10579596 0.1
## 37 East Europe 47 39 39 0.12649180 0.1
## 38 East Europe 48 40 40 0.11384850 0.1
## 39 East Europe 49 41 41 0.11167952 0.1
## 40 East Europe 50 42 42 0.07881133 0.1
## mean_fst_by_country
## 1 0.08
## 2 0.08
## 3 0.07
## 4 0.10
## 5 0.10
## 6 0.10
## 7 0.10
## 8 0.10
## 9 0.10
## 10 0.10
## 11 0.10
## 12 0.10
## 13 0.10
## 14 0.10
## 15 0.10
## 16 0.10
## 17 0.10
## 18 0.10
## 19 0.07
## 20 0.07
## 21 0.09
## 22 0.10
## 23 0.10
## 24 0.10
## 25 0.20
## 26 0.09
## 27 0.09
## 28 0.11
## 29 0.10
## 30 0.09
## 31 0.09
## 32 0.11
## 33 0.11
## 34 0.11
## 35 0.11
## 36 0.11
## 37 0.11
## 38 0.11
## 39 0.11
## 40 0.08
Mean by latitude
# Add a new column to indicate whether the latitude is above or below 30N
fst3 <- fst3 |>
mutate(Latitude_group = ifelse(Latitude >= 40, "Above 40N", "Below 40N"))
# Summarize the data by Latitude_group and calculate the mean_fst
summary_by_latitude <- fst3 |>
group_by(Latitude_group) |>
summarize(mean_fst_by_latitude = mean(mean_fst, na.rm = TRUE)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_latitude column to the fst3 tibble
fst3 <- fst3 |>
left_join(summary_by_latitude, by = "Latitude_group")
# Rename columns
fst3 <- fst3 |>
dplyr::rename(
City = Pop_City)
# Print the modified fst3 tibble
print(fst3)
## City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## 7 Barcelona Spain 41.38510 2.173400 Europe BAR 2018
## 8 Magaluf Spain 39.50679 2.530729 Europe SPM 2017
## 9 Imperia Italy 43.87159 8.003559 Europe IMP 2017
## 10 Brescia Italy 45.53373 10.204450 Europe BRE 1995
## 11 Desenzano Italy 45.46289 10.549140 Europe DES 1995
## 12 Trentino Italy 46.05917 11.117220 Europe TRE 2020
## 13 Bologna Italy 44.48478 11.366584 Europe ITB 2017
## 14 Cesena Italy 44.15287 12.244265 Europe CES 1995
## 15 Rome (Sapienza) Italy 41.90215 12.517399 Europe ROM 2017
## 16 Rome (Trappola) Italy 41.90215 12.517399 Europe ITR 2013
## 17 Sicilia Italy 38.23294 15.550877 Europe SIC 2016
## 18 Puglia Italy 41.12213 16.844107 Europe ITP 2016
## 19 Luqa Malta 35.86053 14.487028 Europe MAL 2019
## 20 Ajdovscina Slovenia 45.88715 13.770997 Europe SLO 2017
## 21 Dubrovnik Croatia 42.60654 18.226612 Europe CRO 2017
## 22 Vlore Albania 40.46600 19.489700 Europe ALV 2020
## 23 Durres Albania 41.29704 19.503734 Europe ALD 2018
## 24 Tirana Albania 41.31473 19.831716 Europe TIR 2017
## 25 Novi Sad Serbia 45.25887 19.818778 Europe SER 2019
## 26 Athens Greece 37.93719 23.946883 Europe GRA 2019
## 27 Chania Greece 35.51448 24.017960 Europe GRC 2019
## 28 Satu Mare Romania 47.79147 22.890202 Europe ROS 2020
## 29 Lom Bulgaria 43.80489 23.236340 Europe BUL 2019
## 30 Aliaga Turkey 38.76390 26.944800 Europe TUA 2019
## 31 Hopa Turkey 41.38760 41.437800 Europe TUH 2019
## 32 Sevastopol, Crimea Ukraine 44.54125 33.514005 Europe SEV 2021
## 33 Alushta Ukraine 44.68289 34.403681 Europe ALU 2021
## 34 Kerch, Crimea Ukraine 45.35246 36.470150 Europe KER 2021
## 35 Krasnodar Russia 44.95504 39.027817 Europe KRA 2017
## 36 Sochi Russia 43.60042 39.745328 Europe SOC 2021
## 37 Tikhoretsk Russia 45.85460 40.125600 Europe TIK 2021
## 38 Armavir Russia 44.96034 41.133057 Europe RAR 2021
## 39 Sakhumi, Abkhazia Georgia 43.07851 40.887588 Europe GES 2021
## 40 Ijevan Armenia 40.87971 45.147640 Europe ARM 2020
## Subregion order order2 orderold mean_fst mean_fst_by_region
## 1 West Europe 9 1 1 0.07462900 0.1
## 2 West Europe 10 2 2 0.07891003 0.1
## 3 West Europe 11 3 3 0.07039614 0.1
## 4 West Europe 13 5 5 0.08966956 0.1
## 5 West Europe 14 6 6 0.11997670 0.1
## 6 West Europe 15 7 7 0.10001777 0.1
## 7 West Europe 16 8 8 0.11361455 0.1
## 8 West Europe 17 9 9 0.08101992 0.1
## 9 West Europe 18 10 10 0.09338559 0.1
## 10 West Europe 20 12 12 0.14417917 0.1
## 11 West Europe 21 13 13 0.12864170 0.1
## 12 West Europe 22 14 14 0.06256140 0.1
## 13 West Europe 23 15 15 0.10181636 0.1
## 14 West Europe 24 16 16 0.17560017 0.1
## 15 West Europe 25 17 17 0.07923011 0.1
## 16 West Europe 26 18 18 0.07069860 0.1
## 17 West Europe 27 19 19 0.08872802 0.1
## 18 West Europe 28 20 20 0.07094679 0.1
## 19 West Europe 29 21 21 0.06875044 0.1
## 20 East Europe 30 22 22 0.06683635 0.1
## 21 East Europe 31 23 23 0.08693351 0.1
## 22 East Europe 32 24 24 0.09045198 0.1
## 23 East Europe 33 25 25 0.11233441 0.1
## 24 East Europe 34 26 26 0.09810811 0.1
## 25 East Europe 35 27 27 0.20341532 0.1
## 26 East Europe 36 28 28 0.08644638 0.1
## 27 East Europe 37 29 29 0.09177586 0.1
## 28 East Europe 38 30 30 0.10597770 0.1
## 29 East Europe 39 31 31 0.10187716 0.1
## 30 East Europe 40 32 32 0.09142387 0.1
## 31 East Europe 41 33 33 0.07873389 0.1
## 32 East Europe 42 34 34 0.12875447 0.1
## 33 East Europe 43 35 35 0.09618709 0.1
## 34 East Europe 44 36 36 0.09378027 0.1
## 35 East Europe 45 37 37 0.08335369 0.1
## 36 East Europe 46 38 38 0.10579596 0.1
## 37 East Europe 47 39 39 0.12649180 0.1
## 38 East Europe 48 40 40 0.11384850 0.1
## 39 East Europe 49 41 41 0.11167952 0.1
## 40 East Europe 50 42 42 0.07881133 0.1
## mean_fst_by_country Latitude_group mean_fst_by_latitude
## 1 0.08 Above 40N 0.10154776
## 2 0.08 Above 40N 0.10154776
## 3 0.07 Above 40N 0.10154776
## 4 0.10 Below 40N 0.09086761
## 5 0.10 Below 40N 0.09086761
## 6 0.10 Below 40N 0.09086761
## 7 0.10 Above 40N 0.10154776
## 8 0.10 Below 40N 0.09086761
## 9 0.10 Above 40N 0.10154776
## 10 0.10 Above 40N 0.10154776
## 11 0.10 Above 40N 0.10154776
## 12 0.10 Above 40N 0.10154776
## 13 0.10 Above 40N 0.10154776
## 14 0.10 Above 40N 0.10154776
## 15 0.10 Above 40N 0.10154776
## 16 0.10 Above 40N 0.10154776
## 17 0.10 Below 40N 0.09086761
## 18 0.10 Above 40N 0.10154776
## 19 0.07 Below 40N 0.09086761
## 20 0.07 Above 40N 0.10154776
## 21 0.09 Above 40N 0.10154776
## 22 0.10 Above 40N 0.10154776
## 23 0.10 Above 40N 0.10154776
## 24 0.10 Above 40N 0.10154776
## 25 0.20 Above 40N 0.10154776
## 26 0.09 Below 40N 0.09086761
## 27 0.09 Below 40N 0.09086761
## 28 0.11 Above 40N 0.10154776
## 29 0.10 Above 40N 0.10154776
## 30 0.09 Below 40N 0.09086761
## 31 0.09 Above 40N 0.10154776
## 32 0.11 Above 40N 0.10154776
## 33 0.11 Above 40N 0.10154776
## 34 0.11 Above 40N 0.10154776
## 35 0.11 Above 40N 0.10154776
## 36 0.11 Above 40N 0.10154776
## 37 0.11 Above 40N 0.10154776
## 38 0.11 Above 40N 0.10154776
## 39 0.11 Above 40N 0.10154776
## 40 0.08 Above 40N 0.10154776
fst4 <- fst3 |>
dplyr::select(
Latitude_group, mean_fst_by_latitude, Subregion, mean_fst_by_region, Country, mean_fst_by_country, City, Abbreviation, mean_fst,
)
fst4 <- fst4 |>
arrange(
Latitude_group, Subregion, Country, City
)
# Round
fst4 <- fst4 |>
mutate_if(is.numeric, ~ round(., 2))
head(fst4)
## Latitude_group mean_fst_by_latitude Subregion mean_fst_by_region Country
## 1 Above 40N 0.1 East Europe 0.1 Albania
## 2 Above 40N 0.1 East Europe 0.1 Albania
## 3 Above 40N 0.1 East Europe 0.1 Albania
## 4 Above 40N 0.1 East Europe 0.1 Armenia
## 5 Above 40N 0.1 East Europe 0.1 Bulgaria
## 6 Above 40N 0.1 East Europe 0.1 Croatia
## mean_fst_by_country City Abbreviation mean_fst
## 1 0.10 Durres ALD 0.11
## 2 0.10 Tirana TIR 0.10
## 3 0.10 Vlore ALV 0.09
## 4 0.08 Ijevan ARM 0.08
## 5 0.10 Lom BUL 0.10
## 6 0.09 Dubrovnik CRO 0.09
# Set theme if you want to use something different from the previous table
set_flextable_defaults(
font.family = "Arial",
font.size = 9,
big.mark = ",",
theme_fun = "theme_zebra" # try the themes: theme_alafoli(), theme_apa(), theme_booktabs(), theme_box(), theme_tron_legacy(), theme_tron(), theme_vader(), theme_vanilla(), theme_zebra()
)
# Then create the flextable object
flex_table <- flextable(fst4) |>
set_caption(caption = as_paragraph(
as_chunk(
"Table 1. Fst values for Europe using LD2 SNPs.",
props = fp_text_default(color = "#000000", font.size = 14)
)
),
fp_p = fp_par(text.align = "center", padding = 5))
# Print the flextable
flex_table
Latitude_group | mean_fst_by_latitude | Subregion | mean_fst_by_region | Country | mean_fst_by_country | City | Abbreviation | mean_fst |
---|---|---|---|---|---|---|---|---|
Above 40N | 0.10 | East Europe | 0.1 | Albania | 0.10 | Durres | ALD | 0.11 |
Above 40N | 0.10 | East Europe | 0.1 | Albania | 0.10 | Tirana | TIR | 0.10 |
Above 40N | 0.10 | East Europe | 0.1 | Albania | 0.10 | Vlore | ALV | 0.09 |
Above 40N | 0.10 | East Europe | 0.1 | Armenia | 0.08 | Ijevan | ARM | 0.08 |
Above 40N | 0.10 | East Europe | 0.1 | Bulgaria | 0.10 | Lom | BUL | 0.10 |
Above 40N | 0.10 | East Europe | 0.1 | Croatia | 0.09 | Dubrovnik | CRO | 0.09 |
Above 40N | 0.10 | East Europe | 0.1 | Georgia | 0.11 | Sakhumi, Abkhazia | GES | 0.11 |
Above 40N | 0.10 | East Europe | 0.1 | Romania | 0.11 | Satu Mare | ROS | 0.11 |
Above 40N | 0.10 | East Europe | 0.1 | Russia | 0.11 | Armavir | RAR | 0.11 |
Above 40N | 0.10 | East Europe | 0.1 | Russia | 0.11 | Krasnodar | KRA | 0.08 |
Above 40N | 0.10 | East Europe | 0.1 | Russia | 0.11 | Sochi | SOC | 0.11 |
Above 40N | 0.10 | East Europe | 0.1 | Russia | 0.11 | Tikhoretsk | TIK | 0.13 |
Above 40N | 0.10 | East Europe | 0.1 | Serbia | 0.20 | Novi Sad | SER | 0.20 |
Above 40N | 0.10 | East Europe | 0.1 | Slovenia | 0.07 | Ajdovscina | SLO | 0.07 |
Above 40N | 0.10 | East Europe | 0.1 | Turkey | 0.09 | Hopa | TUH | 0.08 |
Above 40N | 0.10 | East Europe | 0.1 | Ukraine | 0.11 | Alushta | ALU | 0.10 |
Above 40N | 0.10 | East Europe | 0.1 | Ukraine | 0.11 | Kerch, Crimea | KER | 0.09 |
Above 40N | 0.10 | East Europe | 0.1 | Ukraine | 0.11 | Sevastopol, Crimea | SEV | 0.13 |
Above 40N | 0.10 | West Europe | 0.1 | France | 0.08 | Saint-Martin-d'Heres | FRS | 0.07 |
Above 40N | 0.10 | West Europe | 0.1 | France | 0.08 | Strasbourg | STS | 0.08 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Bologna | ITB | 0.10 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Brescia | BRE | 0.14 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Cesena | CES | 0.18 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Desenzano | DES | 0.13 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Imperia | IMP | 0.09 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Puglia | ITP | 0.07 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Rome (Sapienza) | ROM | 0.08 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Rome (Trappola) | ITR | 0.07 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Trentino | TRE | 0.06 |
Above 40N | 0.10 | West Europe | 0.1 | Portugal | 0.07 | Penafiel | POP | 0.07 |
Above 40N | 0.10 | West Europe | 0.1 | Spain | 0.10 | Barcelona | BAR | 0.11 |
Below 40N | 0.09 | East Europe | 0.1 | Greece | 0.09 | Athens | GRA | 0.09 |
Below 40N | 0.09 | East Europe | 0.1 | Greece | 0.09 | Chania | GRC | 0.09 |
Below 40N | 0.09 | East Europe | 0.1 | Turkey | 0.09 | Aliaga | TUA | 0.09 |
Below 40N | 0.09 | West Europe | 0.1 | Italy | 0.10 | Sicilia | SIC | 0.09 |
Below 40N | 0.09 | West Europe | 0.1 | Malta | 0.07 | Luqa | MAL | 0.07 |
Below 40N | 0.09 | West Europe | 0.1 | Spain | 0.10 | Badajoz | SPB | 0.09 |
Below 40N | 0.09 | West Europe | 0.1 | Spain | 0.10 | Catarroja | SPC | 0.10 |
Below 40N | 0.09 | West Europe | 0.1 | Spain | 0.10 | Magaluf | SPM | 0.08 |
Below 40N | 0.09 | West Europe | 0.1 | Spain | 0.10 | San Roque | SPS | 0.12 |
# Initialize Word document
doc <-
read_docx() |>
body_add_flextable(value = flex_table)
# Define the output path with 'here' library
output_path <- here(
"output",
"europe",
"fst",
"fst_Europe_LD2_SNPS.docx"
)
# Save the Word document
print(doc, target = output_path)
To make scatter plot
# Group by Country and calculate the mean for mean_fst_by_country
aggregated_data <- fst4 |>
dplyr::group_by(Country) |>
dplyr::summarise(mean_fst = mean(mean_fst_by_country, na.rm = TRUE))
# save the data
saveRDS(aggregated_data, here(
"output", "europe", "fst", "LD2_country.rds"
))
# Order the aggregated data
aggregated_data <- aggregated_data[order(aggregated_data$mean_fst), ]
# Assign a numeric index for plotting
aggregated_data$index <- 1:nrow(aggregated_data)
# Fit a linear model
lm_fit <- lm(mean_fst ~ index, data = aggregated_data)
# Predicted values from the linear model
aggregated_data$fitted_values <- predict(lm_fit)
ggplot(aggregated_data, aes(x = index, y = mean_fst)) +
geom_point(aes(color = Country), size = 3) +
geom_line(aes(y = fitted_values), color = "blue") + # Fitted line
labs(
title = "Mean Fst by Country",
x = "Ordered Countries",
y = "Mean Fst Value"
) +
scale_x_continuous(breaks = aggregated_data$index, labels = aggregated_data$Country) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(legend.position = "none")
Estimate distances
# Grab the population names from the matrix aa
populations_with_fst <- colnames(aa)
# Subset the sampling_loc dataframe to only include populations with FST estimates
filtered_sampling_loc <- sampling_loc %>% filter(Abbreviation %in% populations_with_fst)
# Create an empty matrix to store the distances
n <- nrow(filtered_sampling_loc)
distance_matrix <- matrix(0, n, n)
rownames(distance_matrix) <- filtered_sampling_loc$Abbreviation
colnames(distance_matrix) <- filtered_sampling_loc$Abbreviation
# Calculate the distances
for (i in 1:n) {
for (j in 1:n) {
if (i != j) {
coord1 <- c(filtered_sampling_loc$Longitude[i], filtered_sampling_loc$Latitude[i])
coord2 <- c(filtered_sampling_loc$Longitude[j], filtered_sampling_loc$Latitude[j])
distance_matrix[i, j] <- distHaversine(coord1, coord2) / 1000 # distance in km
}
}
}
# Print the distance matrix
head(distance_matrix)
## FRS STS POP SPB SPS SPC BAR
## FRS 0.0000 412.1522 1225.4383 1263.7518 1371.4448 817.6702 511.8716
## STS 412.1522 0.0000 1509.1745 1601.1573 1750.4874 1213.5639 915.9614
## POP 1225.4383 1509.1745 0.0000 282.8409 614.5101 701.9619 878.2910
## SPB 1263.7518 1601.1573 282.8409 0.0000 331.7685 571.0509 827.0716
## SPS 1371.4448 1750.4874 614.5101 331.7685 0.0000 566.5153 874.4050
## SPC 817.6702 1213.5639 701.9619 571.0509 566.5153 0.0000 310.0074
## SPM IMP BRE DES TRE ITB CES
## FRS 683.8845 228.2822 349.1505 375.3876 427.8791 448.0871 524.5893
## STS 1095.6072 527.9620 389.6504 409.5459 380.8019 536.0256 604.0707
## POP 939.4811 1370.3898 1572.0008 1596.9006 1653.2807 1644.8695 1711.3013
## SPB 822.8201 1367.0861 1594.7230 1616.9360 1682.2286 1643.6694 1701.8380
## SPS 787.3372 1423.5497 1670.1969 1688.5822 1761.4648 1690.3169 1736.8728
## SPC 251.7726 857.0395 1104.4760 1122.4598 1195.9602 1124.6677 1173.2270
## ROM ITR SIC ITP MAL SLO CRO ALV
## FRS 654.1062 654.1062 1119.260 1004.474 1269.904 628.7258 1037.845 1234.162
## STS 834.5057 834.5057 1314.450 1098.126 1522.503 546.2343 1052.803 1296.908
## POP 1734.4434 1734.4434 2064.584 2102.566 2066.730 1851.8481 2197.100 2334.720
## SPB 1683.0686 1683.0686 1957.316 2040.301 1923.429 1869.2123 2157.626 2266.196
## SPS 1669.1413 1669.1413 1865.247 2002.327 1785.308 1929.6259 2144.409 2215.487
## SPC 1124.2692 1124.2692 1387.383 1474.434 1368.139 1363.2153 1600.981 1697.872
## ALD TIR SER GRA GRC ROS BUL TUA
## FRS 1192.210 1216.107 1100.2591 1709.174 1877.379 1341.427 1392.500 1884.818
## STS 1230.239 1248.235 988.6774 1763.684 1974.712 1124.852 1304.105 1885.544
## POP 2319.742 2346.521 2315.5228 2777.366 2876.039 2566.256 2591.385 3000.557
## SPB 2262.516 2289.859 2313.3538 2686.566 2760.206 2594.045 2569.099 2924.845
## SPS 2225.649 2253.121 2340.8167 2601.355 2642.916 2651.131 2568.416 2854.982
## SPC 1697.515 1724.957 1780.6744 2115.539 2193.176 2085.530 2019.617 2354.477
## TUH SEV ALU KER KRA SOC TIK RAR
## FRS 2897.030 2179.621 2245.512 2390.859 2596.424 2688.509 2660.271 2758.005
## STS 2745.357 2012.146 2071.666 2197.341 2400.939 2511.584 2447.357 2555.484
## POP 4103.736 3397.015 3464.308 3613.507 3818.891 3906.072 3885.080 3981.365
## SPB 4076.402 3387.944 3457.259 3614.231 3818.203 3894.554 3893.085 3982.756
## SPS 4053.790 3392.537 3463.561 3628.220 3828.858 3891.912 3913.323 3994.595
## SPC 3518.971 2842.701 2913.061 3074.730 3276.925 3346.038 3357.781 3442.342
## GES ARM
## FRS 2793.592 3207.499
## STS 2619.862 3045.177
## POP 4009.674 4417.211
## SPB 3995.220 4391.989
## SPS 3988.376 4369.793
## SPC 3444.704 3834.995
Compare distance and FST
# Fill lower triangle of 'aa' matrix
aa[lower.tri(aa)] <- t(aa)[lower.tri(aa)]
# Fill diagonal with 0 (or another value that makes sense in your context)
diag(aa) <- 0
# Combine 'aa' and 'distance_matrix'
data <- data.frame(Distance = as.vector(distance_matrix), FST = as.vector(aa))
# Add row and column indices for easier tracking
data$row_index <- rep(rownames(distance_matrix), each = ncol(distance_matrix))
data$col_index <- rep(colnames(distance_matrix), nrow(distance_matrix))
data <- data |>
dplyr::arrange(
Distance
)
head(data)
## Distance FST row_index col_index
## 1 0 0 FRS FRS
## 2 0 0 STS STS
## 3 0 0 POP POP
## 4 0 0 SPB SPB
## 5 0 0 SPS SPS
## 6 0 0 SPC SPC
Fit linear regression
data <- data[data$Distance > 0, ]
# Fit linear model
lm_model <- lm(FST/(1-FST) ~ log(Distance), data = data)
equation_text <- sprintf("y = %.6fx + %.3f", coef(lm_model)[2], coef(lm_model)[1])
r2_text <- sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
# source the plotting function
source(here("analyses", "my_theme2.R"))
# Plot
ggplot(data, aes(x = log(Distance), y = FST/(1-FST))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
annotate("text", x = max(log((data$Distance))) * 0.85, y = max(data$FST/(1-data$FST)) * 0.95, label = paste(equation_text, r2_text, sep = "\n"), size = 4, color = "black") +
labs(title = "FST vs Distance - All populations",
x = "Log(Distance)",
y = "FST(1-FST)") +
scale_x_continuous(labels = scales::comma) +
theme_classic() +
theme(axis.text.x = element_text(size = 14), # Increase font size for x-axis
axis.text.y = element_text(size = 14
)) # Increase font size for y-axis
## `geom_smooth()` using formula = 'y ~ x'
Select countries with at least 3 sampling localities in Europe
countries_with_3_pops <- filtered_sampling_loc %>%
group_by(Country) %>%
filter(n() >= 3) %>%
pull(Country) %>%
unique()
countries_with_3_pops
## [1] "Spain" "Italy" "Albania" "Ukraine" "Russia"
Do test for each country
results <- list()
for (country in countries_with_3_pops) {
# Extract abbreviations for the country
abbreviations <- filtered_sampling_loc %>%
filter(Country == country) %>%
pull(Abbreviation)
# Subset the data
subset_data <- data %>%
filter(row_index %in% abbreviations & col_index %in% abbreviations)
subset_data <- subset_data[subset_data$Distance > 0, ]
# Perform linear regression
lm_model <- lm(FST/(1-FST) ~ log(Distance), data = subset_data)
results[[country]] <- list(
equation = sprintf("y = %.5fx + %.3f", coef(lm_model)[2], coef(lm_model)[1]),
r2 = sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
)
}
results
## $Spain
## $Spain$equation
## [1] "y = -0.01214x + 0.167"
##
## $Spain$r2
## [1] "R^2 = 0.01"
##
##
## $Italy
## $Italy$equation
## [1] "y = -0.01374x + 0.188"
##
## $Italy$r2
## [1] "R^2 = 0.03"
##
##
## $Albania
## $Albania$equation
## [1] "y = -0.01588x + 0.083"
##
## $Albania$r2
## [1] "R^2 = 0.50"
##
##
## $Ukraine
## $Ukraine$equation
## [1] "y = -0.02907x + 0.228"
##
## $Ukraine$r2
## [1] "R^2 = 0.90"
##
##
## $Russia
## $Russia$equation
## [1] "y = 0.01280x + -0.033"
##
## $Russia$r2
## [1] "R^2 = 0.02"
Merge the data
data_merged <- data %>%
left_join(filtered_sampling_loc[, c("Pop_City", "Country", "Abbreviation")], by = c("row_index" = "Abbreviation")) %>%
rename(Country1 = Country) %>%
left_join(filtered_sampling_loc[, c("Pop_City", "Country", "Abbreviation")], by = c("col_index" = "Abbreviation")) %>%
dplyr::select(-Pop_City.x, -Pop_City.y) %>%
filter(Country1 == Country) # Ensures the data is within the same country
# Filter to get the coutries with 3 or more sampling localities
countries_to_include <- c("Spain", "Italy", "Albania", "Ukraine", "Russia")
# Filter
data_filtered <- data_merged %>%
group_by(Country1) %>%
filter(n() >= 3 & Country1 %in% countries_to_include) %>%
ungroup()
Calculate linear regression for each country
regression_results <- data_filtered %>%
group_by(Country1) %>%
do(model = lm(FST/(1-FST) ~ log(Distance), data = .)) %>%
rowwise() %>%
mutate(equation = sprintf("italic(y) == %.3f * italic(x) + %.3f", coef(model)[2], coef(model)[1]),
r2 = sprintf("italic(R)^2 == %.2f", summary(model)$r.squared))
Plot it
ggplot(data_filtered, aes(x = log(Distance), y = FST/(1-FST))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ Country1, scales = "free", ncol = 2) +
geom_text(
data = regression_results,
aes(label = paste(equation, r2, sep = "~~")),
x = Inf, y = Inf,
vjust = 2, hjust = 1.2,
size = 3.5,
parse = TRUE,
inherit.aes = FALSE
) +
labs(title = "Fst vs Distance by Country",
x = "Log(Distance)",
y = "Fst(1-Fst)") +
scale_x_continuous(labels = scales::comma) +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggsave(
filename = here("output", "europe", "fst", "fst_by_distance_countries_Europe_LD2.pdf"),
width = 6,
height = 8,
units = "in"
)
We can merge the FST and distance matrices
# Ensure the matrices have the same names in the same order
common_names <- intersect(rownames(distance_matrix), rownames(aa))
sorted_names <- sort(common_names)
# Reorder the matrices
distance_matrix <- distance_matrix[sorted_names, sorted_names]
aa <- aa[sorted_names, sorted_names]
# Initialize the final merged matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names
colnames(merged_matrix) <- sorted_names
# Fill the upper triangular part from aa
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
# Fill the lower triangular part from distance_matrix
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]
# Format the matrix (Fst two decimals and distance in Km with zero decimals)
# Format the elements based on their position in the matrix
for(i in 1:nrow(merged_matrix)) {
for(j in 1:ncol(merged_matrix)) {
if (i < j) {
# Upper triangular - Fst values with two decimal places
merged_matrix[i, j] <- sprintf("%.2f", as.numeric(merged_matrix[i, j]))
} else if (i > j) {
# Lower triangular - Distance values with zero decimal places
merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
}
}
}
# Now the merged_matrix should be formatted as you need
print(merged_matrix)
## ALD ALU ALV ARM BAR BRE BUL CES CRO DES
## ALD NA "0.12" "0.02" "0.11" "0.12" "0.17" "0.11" "0.20" "0.05" "0.16"
## ALU "1268" NA "0.10" "0.06" "0.11" "0.16" "0.10" "0.19" "0.09" "0.14"
## ALV "93" "1307" NA "0.09" "0.10" "0.15" "0.09" "0.18" "0.03" "0.14"
## ARM "2144" "974" "2159" NA "0.11" "0.13" "0.09" "0.16" "0.08" "0.11"
## BAR "1446" "2631" "1458" "3566" NA "0.17" "0.11" "0.20" "0.10" "0.16"
## BRE "887" "1896" "942" "2859" "796" NA "0.15" "0.16" "0.15" "0.09"
## BUL "414" "895" "483" "1826" "1742" "1048" NA "0.19" "0.09" "0.14"
## CES "673" "1757" "724" "2706" "878" "223" "881" NA "0.18" "0.14"
## CRO "180" "1321" "260" "2235" "1333" "719" "428" "514" NA "0.13"
## DES "859" "1870" "915" "2832" "815" "28" "1021" "198" "691" NA
## FRS "1192" "2246" "1234" "3207" "512" "349" "1393" "525" "1038" "375"
## GES "1770" "550" "1795" "429" "3168" "2444" "1426" "2300" "1845" "2418"
## GRA "534" "1151" "476" "1848" "1900" "1417" "656" "1200" "711" "1390"
## GRC "754" "1348" "679" "1937" "2007" "1611" "925" "1389" "934" "1585"
## IMP "984" "2097" "1020" "3047" "552" "254" "1221" "341" "840" "268"
## ITB "752" "1821" "802" "2773" "824" "148" "950" "79" "591" "126"
## ITP "224" "1482" "235" "2367" "1227" "728" "604" "506" "201" "702"
## ITR "585" "1794" "605" "2711" "862" "445" "899" "252" "477" "427"
## KER "1443" "179" "1484" "862" "2789" "2042" "1062" "1913" "1490" "2017"
## KRA "1633" "366" "1670" "674" "2992" "2248" "1261" "2117" "1687" "2222"
## MAL "746" "1948" "674" "2719" "1233" "1135" "1157" "943" "817" "1119"
## POP "2320" "3464" "2335" "4417" "878" "1572" "2591" "1711" "2197" "1597"
## RAR "1798" "532" "1833" "560" "3157" "2410" "1427" "2281" "1853" "2385"
## ROM "585" "1794" "605" "2711" "862" "445" "899" "252" "477" "427"
## ROS "771" "950" "859" "1923" "1783" "1000" "445" "917" "683" "977"
## SER "442" "1149" "534" "2107" "1489" "752" "316" "612" "322" "725"
## SEV "1196" "72" "1235" "1034" "2561" "1830" "824" "1689" "1250" "1804"
## SIC "480" "1724" "420" "2545" "1195" "925" "894" "715" "537" "905"
## SLO "689" "1617" "761" "2581" "1059" "280" "782" "227" "509" "255"
## SOC "1678" "443" "1706" "538" "3068" "2339" "1327" "2197" "1748" "2313"
## SPB "2263" "3457" "2266" "4392" "827" "1595" "2569" "1702" "2158" "1617"
## SPC "1698" "2913" "1698" "3835" "310" "1104" "2020" "1173" "1601" "1122"
## SPM "1450" "2677" "1448" "3591" "211" "919" "1782" "956" "1360" "933"
## SPS "2226" "3464" "2215" "4370" "874" "1670" "2568" "1737" "2144" "1689"
## STS "1230" "2072" "1297" "3045" "916" "390" "1304" "604" "1053" "410"
## TIK "1733" "467" "1774" "687" "3069" "2313" "1350" "2191" "1778" "2288"
## TIR "27" "1242" "99" "2117" "1473" "908" "393" "696" "196" "881"
## TRE "858" "1821" "920" "2787" "887" "92" "986" "230" "684" "80"
## TUA "694" "904" "667" "1571" "2123" "1569" "641" "1363" "851" "1542"
## TUH "1828" "679" "1844" "316" "3251" "2549" "1512" "2393" "1919" "2522"
## FRS GES GRA GRC IMP ITB ITP ITR KER KRA
## ALD "0.09" "0.14" "0.08" "0.07" "0.12" "0.12" "0.08" "0.08" "0.12" "0.11"
## ALU "0.08" "0.08" "0.08" "0.09" "0.10" "0.11" "0.08" "0.07" "0.07" "0.05"
## ALV "0.07" "0.12" "0.06" "0.05" "0.08" "0.10" "0.06" "0.06" "0.10" "0.09"
## ARM "0.06" "0.05" "0.08" "0.08" "0.08" "0.09" "0.06" "0.07" "0.04" "0.02"
## BAR "0.09" "0.14" "0.09" "0.10" "0.11" "0.13" "0.08" "0.07" "0.12" "0.11"
## BRE "0.12" "0.18" "0.15" "0.15" "0.13" "0.12" "0.12" "0.14" "0.15" "0.15"
## BUL "0.07" "0.13" "0.09" "0.09" "0.09" "0.10" "0.06" "0.05" "0.11" "0.10"
## CES "0.15" "0.21" "0.18" "0.18" "0.18" "0.17" "0.15" "0.17" "0.18" "0.18"
## CRO "0.06" "0.12" "0.06" "0.05" "0.08" "0.09" "0.05" "0.05" "0.10" "0.09"
## DES "0.10" "0.17" "0.14" "0.14" "0.12" "0.11" "0.10" "0.12" "0.14" "0.14"
## FRS NA "0.11" "0.07" "0.07" "0.05" "0.06" "0.04" "0.04" "0.08" "0.08"
## GES "2794" NA "0.11" "0.12" "0.13" "0.14" "0.10" "0.10" "0.05" "0.02"
## GRA "1709" "1540" NA "0.02" "0.08" "0.09" "0.05" "0.05" "0.09" "0.08"
## GRC "1877" "1675" "270" NA "0.09" "0.10" "0.06" "0.06" "0.10" "0.09"
## IMP "228" "2640" "1492" "1653" NA "0.08" "0.05" "0.05" "0.09" "0.09"
## ITB "448" "2365" "1279" "1467" "277" NA "0.07" "0.07" "0.11" "0.11"
## ITP "1004" "1991" "705" "884" "787" "583" NA "0.02" "0.08" "0.07"
## ITR "654" "2321" "1070" "1224" "428" "302" "371" NA "0.08" "0.07"
## KER "2391" "434" "1327" "1517" "2250" "1973" "1655" "1960" NA "0.03"
## KRA "2596" "257" "1478" "1648" "2455" "2178" "1848" "2160" "206" NA
## MAL "1270" "2395" "873" "862" "1049" "996" "621" "694" "2127" "2301"
## POP "1225" "4010" "2777" "2876" "1370" "1645" "2103" "1734" "3614" "3819"
## RAR "2758" "210" "1629" "1789" "2619" "2342" "2014" "2326" "369" "166"
## ROM "654" "2321" "1070" "1224" "428" "302" "371" "0" "1960" "2160"
## ROS "1341" "1496" "1100" "1370" "1232" "961" "884" "1047" "1073" "1276"
## SER "1100" "1695" "884" "1141" "949" "672" "520" "697" "1301" "1506"
## SEV "2180" "614" "1086" "1288" "2029" "1753" "1410" "1723" "250" "438"
## SIC "1119" "2197" "736" "812" "891" "779" "340" "483" "1901" "2084"
## SLO "629" "2166" "1220" "1440" "507" "245" "585" "455" "1762" "1968"
## SOC "2689" "109" "1470" "1618" "2537" "2262" "1898" "2224" "325" "161"
## SPB "1264" "3995" "2687" "2760" "1367" "1644" "2040" "1683" "3614" "3818"
## SPC "818" "3445" "2116" "2193" "857" "1125" "1474" "1124" "3075" "3277"
## SPM "684" "3204" "1864" "1944" "665" "916" "1227" "883" "2841" "3042"
## SPS "1371" "3988" "2601" "2643" "1424" "1690" "2002" "1669" "3628" "3829"
## STS "412" "2620" "1764" "1975" "528" "536" "1098" "835" "2197" "2401"
## TIK "2660" "315" "1600" "1776" "2527" "2250" "1945" "2247" "290" "132"
## TIR "1216" "1743" "515" "742" "1009" "775" "251" "612" "1417" "1607"
## TRE "428" "2370" "1391" "1596" "346" "176" "717" "476" "1964" "2170"
## TUA "1885" "1265" "277" "445" "1678" "1441" "901" "1271" "1075" "1214"
## TUH "2897" "194" "1544" "1650" "2734" "2461" "2051" "2395" "597" "443"
## MAL POP RAR ROM ROS SER SEV SIC SLO SOC
## ALD "0.09" "0.09" "0.15" "0.10" "0.12" "0.22" "0.16" "0.11" "0.08" "0.14"
## ALU "0.07" "0.08" "0.07" "0.08" "0.11" "0.20" "0.10" "0.10" "0.07" "0.07"
## ALV "0.06" "0.07" "0.12" "0.07" "0.10" "0.18" "0.14" "0.09" "0.06" "0.12"
## ARM "0.06" "0.05" "0.05" "0.08" "0.09" "0.19" "0.07" "0.08" "0.05" "0.04"
## BAR "0.08" "0.09" "0.14" "0.08" "0.12" "0.22" "0.16" "0.10" "0.08" "0.13"
## BRE "0.12" "0.11" "0.18" "0.15" "0.13" "0.25" "0.19" "0.12" "0.10" "0.17"
## BUL "0.06" "0.07" "0.13" "0.07" "0.10" "0.20" "0.15" "0.08" "0.07" "0.13"
## CES "0.15" "0.13" "0.21" "0.18" "0.17" "0.28" "0.22" "0.16" "0.13" "0.20"
## CRO "0.06" "0.06" "0.12" "0.06" "0.10" "0.17" "0.13" "0.08" "0.06" "0.11"
## DES "0.11" "0.08" "0.17" "0.13" "0.11" "0.23" "0.18" "0.09" "0.08" "0.16"
## FRS "0.04" "0.04" "0.11" "0.04" "0.07" "0.16" "0.12" "0.06" "0.04" "0.10"
## GES "0.10" "0.10" "0.03" "0.11" "0.14" "0.23" "0.06" "0.12" "0.09" "0.01"
## GRA "0.05" "0.06" "0.11" "0.05" "0.10" "0.18" "0.12" "0.08" "0.05" "0.10"
## GRC "0.06" "0.07" "0.12" "0.06" "0.10" "0.19" "0.13" "0.08" "0.06" "0.11"
## IMP "0.05" "0.05" "0.13" "0.06" "0.10" "0.23" "0.15" "0.07" "0.06" "0.12"
## ITB "0.06" "0.05" "0.14" "0.08" "0.10" "0.22" "0.16" "0.08" "0.06" "0.13"
## ITP "0.02" "0.04" "0.11" "0.02" "0.08" "0.17" "0.12" "0.04" "0.04" "0.10"
## ITR "0.02" "0.03" "0.10" "0.01" "0.08" "0.17" "0.12" "0.05" "0.04" "0.10"
## KER "0.07" "0.07" "0.05" "0.09" "0.11" "0.20" "0.07" "0.10" "0.07" "0.05"
## KRA "0.07" "0.07" "0.01" "0.08" "0.10" "0.20" "0.03" "0.09" "0.06" "0.01"
## MAL NA "0.03" "0.10" "0.02" "0.07" "0.16" "0.12" "0.05" "0.04" "0.09"
## POP "2067" NA "0.11" "0.04" "0.07" "0.17" "0.12" "0.05" "0.04" "0.10"
## RAR "2460" "3981" NA "0.11" "0.14" "0.23" "0.05" "0.12" "0.10" "0.02"
## ROM "694" "1734" "2326" NA "0.09" "0.21" "0.13" "0.06" "0.04" "0.10"
## ROS "1498" "2566" "1432" "1047" NA "0.20" "0.15" "0.09" "0.06" "0.13"
## SER "1139" "2316" "1670" "697" "367" NA "0.25" "0.19" "0.15" "0.22"
## SEV "1878" "3397" "604" "1723" "894" "1082" NA "0.14" "0.11" "0.05"
## SIC "280" "2065" "2246" "483" "1219" "858" "1652" NA "0.05" "0.12"
## SLO "1118" "1852" "2130" "455" "726" "476" "1551" "865" NA "0.09"
## SOC "2316" "3906" "187" "2224" "1388" "1591" "509" "2113" "2060" NA
## SPB "1923" "283" "3983" "1683" "2594" "2313" "3388" "1957" "1869" "3895"
## SPC "1368" "702" "3442" "1124" "2086" "1781" "2843" "1387" "1363" "3346"
## SPM "1128" "939" "3208" "883" "1873" "1555" "2606" "1136" "1160" "3107"
## SPS "1785" "615" "3995" "1669" "2651" "2341" "3393" "1865" "1930" "3892"
## STS "1523" "1509" "2555" "835" "1125" "989" "2012" "1314" "546" "2512"
## TIK "2414" "3885" "127" "2247" "1328" "1580" "539" "2190" "2033" "253"
## TIR "764" "2347" "1772" "612" "761" "439" "1170" "502" "705" "1651"
## TRE "1170" "1653" "2331" "476" "915" "683" "1756" "945" "206" "2264"
## TUA "1148" "3001" "1361" "1271" "1057" "932" "843" "994" "1340" "1198"
## TUH "2412" "4104" "398" "2395" "1629" "1797" "734" "2233" "2272" "283"
## SPB SPC SPM SPS STS TIK TIR TRE TUA TUH
## ALD "0.12" "0.13" "0.10" "0.14" "0.10" "0.16" "0.03" "0.08" "0.10" "0.09"
## ALU "0.10" "0.11" "0.09" "0.12" "0.09" "0.09" "0.10" "0.07" "0.09" "0.08"
## ALV "0.09" "0.11" "0.08" "0.11" "0.08" "0.13" "-0.00" "0.06" "0.08" "0.07"
## ARM "0.06" "0.07" "0.06" "0.11" "0.07" "0.07" "0.09" "0.05" "0.09" "0.07"
## BAR "0.12" "0.13" "0.10" "0.11" "0.09" "0.15" "0.11" "0.08" "0.06" "0.05"
## BRE "0.10" "0.12" "0.11" "0.18" "0.12" "0.19" "0.17" "0.09" "0.16" "0.13"
## BUL "0.10" "0.11" "0.09" "0.12" "0.08" "0.15" "0.10" "0.06" "0.08" "0.07"
## CES "0.13" "0.15" "0.14" "0.21" "0.15" "0.22" "0.20" "0.11" "0.19" "0.16"
## CRO "0.09" "0.10" "0.08" "0.11" "0.07" "0.13" "0.02" "0.05" "0.08" "0.07"
## DES "0.08" "0.10" "0.09" "0.16" "0.11" "0.18" "0.15" "0.07" "0.14" "0.12"
## FRS "0.06" "0.07" "0.05" "0.09" "0.03" "0.12" "0.07" "0.03" "0.07" "0.05"
## GES "0.12" "0.13" "0.12" "0.15" "0.11" "0.05" "0.13" "0.10" "0.12" "0.11"
## GRA "0.09" "0.10" "0.08" "0.10" "0.07" "0.12" "0.06" "0.06" "0.07" "0.06"
## GRC "0.09" "0.10" "0.08" "0.11" "0.07" "0.13" "0.06" "0.06" "0.08" "0.07"
## IMP "0.07" "0.08" "0.07" "0.10" "0.05" "0.15" "0.10" "0.04" "0.08" "0.06"
## ITB "0.08" "0.09" "0.07" "0.13" "0.07" "0.16" "0.11" "0.04" "0.10" "0.08"
## ITP "0.07" "0.08" "0.05" "0.09" "0.04" "0.12" "0.06" "0.03" "0.06" "0.04"
## ITR "0.08" "0.09" "0.06" "0.07" "0.04" "0.11" "0.06" "0.04" "0.04" "0.04"
## KER "0.09" "0.10" "0.09" "0.13" "0.08" "0.07" "0.11" "0.07" "0.10" "0.09"
## KRA "0.09" "0.10" "0.08" "0.12" "0.08" "0.04" "0.10" "0.07" "0.09" "0.08"
## MAL "0.06" "0.08" "0.05" "0.07" "0.04" "0.11" "0.07" "0.03" "0.05" "0.04"
## POP "0.05" "0.06" "0.04" "0.09" "0.04" "0.12" "0.07" "0.03" "0.06" "0.05"
## RAR "0.13" "0.14" "0.12" "0.16" "0.11" "0.05" "0.14" "0.10" "0.12" "0.11"
## ROM "0.08" "0.10" "0.07" "0.09" "0.04" "0.13" "0.07" "0.03" "0.05" "0.04"
## ROS "0.09" "0.10" "0.09" "0.13" "0.07" "0.15" "0.11" "0.05" "0.10" "0.08"
## SER "0.20" "0.22" "0.21" "0.24" "0.18" "0.25" "0.22" "0.15" "0.20" "0.18"
## SEV "0.14" "0.15" "0.14" "0.17" "0.13" "0.08" "0.15" "0.11" "0.14" "0.12"
## SIC "0.08" "0.09" "0.06" "0.11" "0.06" "0.14" "0.09" "0.04" "0.08" "0.06"
## SLO "0.05" "0.06" "0.04" "0.09" "0.05" "0.11" "0.06" "0.02" "0.06" "0.05"
## SOC "0.11" "0.12" "0.11" "0.15" "0.11" "0.06" "0.13" "0.09" "0.12" "0.10"
## SPB NA "0.01" "0.01" "0.12" "0.06" "0.14" "0.10" "0.04" "0.10" "0.08"
## SPC "571" NA "0.02" "0.14" "0.07" "0.15" "0.11" "0.05" "0.10" "0.08"
## SPM "823" "252" NA "0.05" "0.05" "0.13" "0.10" "0.03" "0.08" "0.06"
## SPS "332" "567" "787" NA "0.10" "0.17" "0.13" "0.08" "0.03" "0.09"
## STS "1601" "1214" "1096" "1750" NA "0.12" "0.08" "0.03" "0.07" "0.05"
## TIK "3893" "3358" "3126" "3913" "2447" NA "0.15" "0.11" "0.13" "0.12"
## TIR "2290" "1725" "1478" "2253" "1248" "1707" NA "0.06" "0.08" "0.07"
## TRE "1682" "1196" "1011" "1761" "381" "2233" "877" NA "0.06" "0.04"
## TUA "2925" "2354" "2103" "2855" "1886" "1339" "669" "1529" NA "0.05"
## TUH "4076" "3519" "3275" "4054" "2745" "508" "1801" "2478" "1267" NA
cities <- readRDS(here("output", "sampling_loc_euro_global.rds"))
cities <- as_tibble(cities)
head(cities)
## # A tibble: 6 × 12
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 Berlin, NJ USA 39.8 -74.9 Americas BER 2018 North Am…
## 2 Columbus, OH USA 40.0 -82.9 Americas COL 2015 North Am…
## 3 Palm Beach USA 26.7 -80.0 Americas PAL 2018 North Am…
## 4 Houston, TX USA 29.8 -95.4 Americas HOU 2018 North Am…
## 5 Los Angeles USA 34.1 -118. Americas LOS 2018 North Am…
## 6 Manaus, AM Brazil -3.09 -60.0 Americas MAU 2017 South Am…
## # ℹ 4 more variables: Subregion <chr>, order <int>, order2 <int>,
## # orderold <int>
We can sort by distance
# Calculate row-wise mean distances (excluding diagonal)
row_means <- rowMeans(distance_matrix, na.rm=TRUE)
# Sort row names by mean distances
sorted_names_by_distance <- names(sort(row_means))
# Reorder distance_matrix and aa matrices based on these sorted names
distance_matrix <- distance_matrix[sorted_names_by_distance, sorted_names_by_distance]
aa <- aa[sorted_names_by_distance, sorted_names_by_distance]
# Your existing code to initialize and fill the merged_matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names_by_distance
colnames(merged_matrix) <- sorted_names_by_distance
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]
# Formatting code with absolute value for upper triangular part
for(i in 1:nrow(merged_matrix)) {
for(j in 1:ncol(merged_matrix)) {
if (i < j) {
merged_matrix[i, j] <- sprintf("%.2f", abs(as.numeric(merged_matrix[i, j])))
} else if (i > j) {
merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
}
}
}
# Print the merged matrix
print(merged_matrix)
## CRO ALD ITP TIR SER ALV CES ITR ROM SLO
## CRO NA "0.05" "0.05" "0.02" "0.17" "0.03" "0.18" "0.05" "0.06" "0.06"
## ALD "180" NA "0.08" "0.03" "0.22" "0.02" "0.20" "0.08" "0.10" "0.08"
## ITP "201" "224" NA "0.06" "0.17" "0.06" "0.15" "0.02" "0.02" "0.04"
## TIR "196" "27" "251" NA "0.22" "0.00" "0.20" "0.06" "0.07" "0.06"
## SER "322" "442" "520" "439" NA "0.18" "0.28" "0.17" "0.21" "0.15"
## ALV "260" "93" "235" "99" "534" NA "0.18" "0.06" "0.07" "0.06"
## CES "514" "673" "506" "696" "612" "724" NA "0.17" "0.18" "0.13"
## ITR "477" "585" "371" "612" "697" "605" "252" NA "0.01" "0.04"
## ROM "477" "585" "371" "612" "697" "605" "252" "0" NA "0.04"
## SLO "509" "689" "585" "705" "476" "761" "227" "455" "455" NA
## ITB "591" "752" "583" "775" "672" "802" "79" "302" "302" "245"
## BUL "428" "414" "604" "393" "316" "483" "881" "899" "899" "782"
## DES "691" "859" "702" "881" "725" "915" "198" "427" "427" "255"
## TRE "684" "858" "717" "877" "683" "920" "230" "476" "476" "206"
## BRE "719" "887" "728" "908" "752" "942" "223" "445" "445" "280"
## SIC "537" "480" "340" "502" "858" "420" "715" "483" "483" "865"
## ROS "683" "771" "884" "761" "367" "859" "917" "1047" "1047" "726"
## IMP "840" "984" "787" "1009" "949" "1020" "341" "428" "428" "507"
## GRA "711" "534" "705" "515" "884" "476" "1200" "1070" "1070" "1220"
## TUA "851" "694" "901" "669" "932" "667" "1363" "1271" "1271" "1340"
## FRS "1038" "1192" "1004" "1216" "1100" "1234" "525" "654" "654" "629"
## MAL "817" "746" "621" "764" "1139" "674" "943" "694" "694" "1118"
## STS "1053" "1230" "1098" "1248" "989" "1297" "604" "835" "835" "546"
## GRC "934" "754" "884" "742" "1141" "679" "1389" "1224" "1224" "1440"
## SEV "1250" "1196" "1410" "1170" "1082" "1235" "1689" "1723" "1723" "1551"
## ALU "1321" "1268" "1482" "1242" "1149" "1307" "1757" "1794" "1794" "1617"
## BAR "1333" "1446" "1227" "1473" "1489" "1458" "878" "862" "862" "1059"
## SPM "1360" "1450" "1227" "1478" "1555" "1448" "956" "883" "883" "1160"
## KER "1490" "1443" "1655" "1417" "1301" "1484" "1913" "1960" "1960" "1762"
## KRA "1687" "1633" "1848" "1607" "1506" "1670" "2117" "2160" "2160" "1968"
## SPC "1601" "1698" "1474" "1725" "1781" "1698" "1173" "1124" "1124" "1363"
## SOC "1748" "1678" "1898" "1651" "1591" "1706" "2197" "2224" "2224" "2060"
## TIK "1778" "1733" "1945" "1707" "1580" "1774" "2191" "2247" "2247" "2033"
## GES "1845" "1770" "1991" "1743" "1695" "1795" "2300" "2321" "2321" "2166"
## RAR "1853" "1798" "2014" "1772" "1670" "1833" "2281" "2326" "2326" "2130"
## TUH "1919" "1828" "2051" "1801" "1797" "1844" "2393" "2395" "2395" "2272"
## ARM "2235" "2144" "2367" "2117" "2107" "2159" "2706" "2711" "2711" "2581"
## SPB "2158" "2263" "2040" "2290" "2313" "2266" "1702" "1683" "1683" "1869"
## SPS "2144" "2226" "2002" "2253" "2341" "2215" "1737" "1669" "1669" "1930"
## POP "2197" "2320" "2103" "2347" "2316" "2335" "1711" "1734" "1734" "1852"
## ITB BUL DES TRE BRE SIC ROS IMP GRA TUA
## CRO "0.09" "0.09" "0.13" "0.05" "0.15" "0.08" "0.10" "0.08" "0.06" "0.08"
## ALD "0.12" "0.11" "0.16" "0.08" "0.17" "0.11" "0.12" "0.12" "0.08" "0.10"
## ITP "0.07" "0.06" "0.10" "0.03" "0.12" "0.04" "0.08" "0.05" "0.05" "0.06"
## TIR "0.11" "0.10" "0.15" "0.06" "0.17" "0.09" "0.11" "0.10" "0.06" "0.08"
## SER "0.22" "0.20" "0.23" "0.15" "0.25" "0.19" "0.20" "0.23" "0.18" "0.20"
## ALV "0.10" "0.09" "0.14" "0.06" "0.15" "0.09" "0.10" "0.08" "0.06" "0.08"
## CES "0.17" "0.19" "0.14" "0.11" "0.16" "0.16" "0.17" "0.18" "0.18" "0.19"
## ITR "0.07" "0.05" "0.12" "0.04" "0.14" "0.05" "0.08" "0.05" "0.05" "0.04"
## ROM "0.08" "0.07" "0.13" "0.03" "0.15" "0.06" "0.09" "0.06" "0.05" "0.05"
## SLO "0.06" "0.07" "0.08" "0.02" "0.10" "0.05" "0.06" "0.06" "0.05" "0.06"
## ITB NA "0.10" "0.11" "0.04" "0.12" "0.08" "0.10" "0.08" "0.09" "0.10"
## BUL "950" NA "0.14" "0.06" "0.15" "0.08" "0.10" "0.09" "0.09" "0.08"
## DES "126" "1021" NA "0.07" "0.09" "0.09" "0.11" "0.12" "0.14" "0.14"
## TRE "176" "986" "80" NA "0.09" "0.04" "0.05" "0.04" "0.06" "0.06"
## BRE "148" "1048" "28" "92" NA "0.12" "0.13" "0.13" "0.15" "0.16"
## SIC "779" "894" "905" "945" "925" NA "0.09" "0.07" "0.08" "0.08"
## ROS "961" "445" "977" "915" "1000" "1219" NA "0.10" "0.10" "0.10"
## IMP "277" "1221" "268" "346" "254" "891" "1232" NA "0.08" "0.08"
## GRA "1279" "656" "1390" "1391" "1417" "736" "1100" "1492" NA "0.07"
## TUA "1441" "641" "1542" "1529" "1569" "994" "1057" "1678" "277" NA
## FRS "448" "1393" "375" "428" "349" "1119" "1341" "228" "1709" "1885"
## MAL "996" "1157" "1119" "1170" "1135" "280" "1498" "1049" "873" "1148"
## STS "536" "1304" "410" "381" "390" "1314" "1125" "528" "1764" "1886"
## GRC "1467" "925" "1585" "1596" "1611" "812" "1370" "1653" "270" "445"
## SEV "1753" "824" "1804" "1756" "1830" "1652" "894" "2029" "1086" "843"
## ALU "1821" "895" "1870" "1821" "1896" "1724" "950" "2097" "1151" "904"
## BAR "824" "1742" "815" "887" "796" "1195" "1783" "552" "1900" "2123"
## SPM "916" "1782" "933" "1011" "919" "1136" "1873" "665" "1864" "2103"
## KER "1973" "1062" "2017" "1964" "2042" "1901" "1073" "2250" "1327" "1075"
## KRA "2178" "1261" "2222" "2170" "2248" "2084" "1276" "2455" "1478" "1214"
## SPC "1125" "2020" "1122" "1196" "1104" "1387" "2086" "857" "2116" "2354"
## SOC "2262" "1327" "2313" "2264" "2339" "2113" "1388" "2537" "1470" "1198"
## TIK "2250" "1350" "2288" "2233" "2313" "2190" "1328" "2527" "1600" "1339"
## GES "2365" "1426" "2418" "2370" "2444" "2197" "1496" "2640" "1540" "1265"
## RAR "2342" "1427" "2385" "2331" "2410" "2246" "1432" "2619" "1629" "1361"
## TUH "2461" "1512" "2522" "2478" "2549" "2233" "1629" "2734" "1544" "1267"
## ARM "2773" "1826" "2832" "2787" "2859" "2545" "1923" "3047" "1848" "1571"
## SPB "1644" "2569" "1617" "1682" "1595" "1957" "2594" "1367" "2687" "2925"
## SPS "1690" "2568" "1689" "1761" "1670" "1865" "2651" "1424" "2601" "2855"
## POP "1645" "2591" "1597" "1653" "1572" "2065" "2566" "1370" "2777" "3001"
## FRS MAL STS GRC SEV ALU BAR SPM KER KRA
## CRO "0.06" "0.06" "0.07" "0.05" "0.13" "0.09" "0.10" "0.08" "0.10" "0.09"
## ALD "0.09" "0.09" "0.10" "0.07" "0.16" "0.12" "0.12" "0.10" "0.12" "0.11"
## ITP "0.04" "0.02" "0.04" "0.06" "0.12" "0.08" "0.08" "0.05" "0.08" "0.07"
## TIR "0.07" "0.07" "0.08" "0.06" "0.15" "0.10" "0.11" "0.10" "0.11" "0.10"
## SER "0.16" "0.16" "0.18" "0.19" "0.25" "0.20" "0.22" "0.21" "0.20" "0.20"
## ALV "0.07" "0.06" "0.08" "0.05" "0.14" "0.10" "0.10" "0.08" "0.10" "0.09"
## CES "0.15" "0.15" "0.15" "0.18" "0.22" "0.19" "0.20" "0.14" "0.18" "0.18"
## ITR "0.04" "0.02" "0.04" "0.06" "0.12" "0.07" "0.07" "0.06" "0.08" "0.07"
## ROM "0.04" "0.02" "0.04" "0.06" "0.13" "0.08" "0.08" "0.07" "0.09" "0.08"
## SLO "0.04" "0.04" "0.05" "0.06" "0.11" "0.07" "0.08" "0.04" "0.07" "0.06"
## ITB "0.06" "0.06" "0.07" "0.10" "0.16" "0.11" "0.13" "0.07" "0.11" "0.11"
## BUL "0.07" "0.06" "0.08" "0.09" "0.15" "0.10" "0.11" "0.09" "0.11" "0.10"
## DES "0.10" "0.11" "0.11" "0.14" "0.18" "0.14" "0.16" "0.09" "0.14" "0.14"
## TRE "0.03" "0.03" "0.03" "0.06" "0.11" "0.07" "0.08" "0.03" "0.07" "0.07"
## BRE "0.12" "0.12" "0.12" "0.15" "0.19" "0.16" "0.17" "0.11" "0.15" "0.15"
## SIC "0.06" "0.05" "0.06" "0.08" "0.14" "0.10" "0.10" "0.06" "0.10" "0.09"
## ROS "0.07" "0.07" "0.07" "0.10" "0.15" "0.11" "0.12" "0.09" "0.11" "0.10"
## IMP "0.05" "0.05" "0.05" "0.09" "0.15" "0.10" "0.11" "0.07" "0.09" "0.09"
## GRA "0.07" "0.05" "0.07" "0.02" "0.12" "0.08" "0.09" "0.08" "0.09" "0.08"
## TUA "0.07" "0.05" "0.07" "0.08" "0.14" "0.09" "0.06" "0.08" "0.10" "0.09"
## FRS NA "0.04" "0.03" "0.07" "0.12" "0.08" "0.09" "0.05" "0.08" "0.08"
## MAL "1270" NA "0.04" "0.06" "0.12" "0.07" "0.08" "0.05" "0.07" "0.07"
## STS "412" "1523" NA "0.07" "0.13" "0.09" "0.09" "0.05" "0.08" "0.08"
## GRC "1877" "862" "1975" NA "0.13" "0.09" "0.10" "0.08" "0.10" "0.09"
## SEV "2180" "1878" "2012" "1288" NA "0.10" "0.16" "0.14" "0.07" "0.03"
## ALU "2246" "1948" "2072" "1348" "72" NA "0.11" "0.09" "0.07" "0.05"
## BAR "512" "1233" "916" "2007" "2561" "2631" NA "0.10" "0.12" "0.11"
## SPM "684" "1128" "1096" "1944" "2606" "2677" "211" NA "0.09" "0.08"
## KER "2391" "2127" "2197" "1517" "250" "179" "2789" "2841" NA "0.03"
## KRA "2596" "2301" "2401" "1648" "438" "366" "2992" "3042" "206" NA
## SPC "818" "1368" "1214" "2193" "2843" "2913" "310" "252" "3075" "3277"
## SOC "2689" "2316" "2512" "1618" "509" "443" "3068" "3107" "325" "161"
## TIK "2660" "2414" "2447" "1776" "539" "467" "3069" "3126" "290" "132"
## GES "2794" "2395" "2620" "1675" "614" "550" "3168" "3204" "434" "257"
## RAR "2758" "2460" "2555" "1789" "604" "532" "3157" "3208" "369" "166"
## TUH "2897" "2412" "2745" "1650" "734" "679" "3251" "3275" "597" "443"
## ARM "3207" "2719" "3045" "1937" "1034" "974" "3566" "3591" "862" "674"
## SPB "1264" "1923" "1601" "2760" "3388" "3457" "827" "823" "3614" "3818"
## SPS "1371" "1785" "1750" "2643" "3393" "3464" "874" "787" "3628" "3829"
## POP "1225" "2067" "1509" "2876" "3397" "3464" "878" "939" "3614" "3819"
## SPC SOC TIK GES RAR TUH ARM SPB SPS POP
## CRO "0.10" "0.11" "0.13" "0.12" "0.12" "0.07" "0.08" "0.09" "0.11" "0.06"
## ALD "0.13" "0.14" "0.16" "0.14" "0.15" "0.09" "0.11" "0.12" "0.14" "0.09"
## ITP "0.08" "0.10" "0.12" "0.10" "0.11" "0.04" "0.06" "0.07" "0.09" "0.04"
## TIR "0.11" "0.13" "0.15" "0.13" "0.14" "0.07" "0.09" "0.10" "0.13" "0.07"
## SER "0.22" "0.22" "0.25" "0.23" "0.23" "0.18" "0.19" "0.20" "0.24" "0.17"
## ALV "0.11" "0.12" "0.13" "0.12" "0.12" "0.07" "0.09" "0.09" "0.11" "0.07"
## CES "0.15" "0.20" "0.22" "0.21" "0.21" "0.16" "0.16" "0.13" "0.21" "0.13"
## ITR "0.09" "0.10" "0.11" "0.10" "0.10" "0.04" "0.07" "0.08" "0.07" "0.03"
## ROM "0.10" "0.10" "0.13" "0.11" "0.11" "0.04" "0.08" "0.08" "0.09" "0.04"
## SLO "0.06" "0.09" "0.11" "0.09" "0.10" "0.05" "0.05" "0.05" "0.09" "0.04"
## ITB "0.09" "0.13" "0.16" "0.14" "0.14" "0.08" "0.09" "0.08" "0.13" "0.05"
## BUL "0.11" "0.13" "0.15" "0.13" "0.13" "0.07" "0.09" "0.10" "0.12" "0.07"
## DES "0.10" "0.16" "0.18" "0.17" "0.17" "0.12" "0.11" "0.08" "0.16" "0.08"
## TRE "0.05" "0.09" "0.11" "0.10" "0.10" "0.04" "0.05" "0.04" "0.08" "0.03"
## BRE "0.12" "0.17" "0.19" "0.18" "0.18" "0.13" "0.13" "0.10" "0.18" "0.11"
## SIC "0.09" "0.12" "0.14" "0.12" "0.12" "0.06" "0.08" "0.08" "0.11" "0.05"
## ROS "0.10" "0.13" "0.15" "0.14" "0.14" "0.08" "0.09" "0.09" "0.13" "0.07"
## IMP "0.08" "0.12" "0.15" "0.13" "0.13" "0.06" "0.08" "0.07" "0.10" "0.05"
## GRA "0.10" "0.10" "0.12" "0.11" "0.11" "0.06" "0.08" "0.09" "0.10" "0.06"
## TUA "0.10" "0.12" "0.13" "0.12" "0.12" "0.05" "0.09" "0.10" "0.03" "0.06"
## FRS "0.07" "0.10" "0.12" "0.11" "0.11" "0.05" "0.06" "0.06" "0.09" "0.04"
## MAL "0.08" "0.09" "0.11" "0.10" "0.10" "0.04" "0.06" "0.06" "0.07" "0.03"
## STS "0.07" "0.11" "0.12" "0.11" "0.11" "0.05" "0.07" "0.06" "0.10" "0.04"
## GRC "0.10" "0.11" "0.13" "0.12" "0.12" "0.07" "0.08" "0.09" "0.11" "0.07"
## SEV "0.15" "0.05" "0.08" "0.06" "0.05" "0.12" "0.07" "0.14" "0.17" "0.12"
## ALU "0.11" "0.07" "0.09" "0.08" "0.07" "0.08" "0.06" "0.10" "0.12" "0.08"
## BAR "0.13" "0.13" "0.15" "0.14" "0.14" "0.05" "0.11" "0.12" "0.11" "0.09"
## SPM "0.02" "0.11" "0.13" "0.12" "0.12" "0.06" "0.06" "0.01" "0.05" "0.04"
## KER "0.10" "0.05" "0.07" "0.05" "0.05" "0.09" "0.04" "0.09" "0.13" "0.07"
## KRA "0.10" "0.01" "0.04" "0.02" "0.01" "0.08" "0.02" "0.09" "0.12" "0.07"
## SPC NA "0.12" "0.15" "0.13" "0.14" "0.08" "0.07" "0.01" "0.14" "0.06"
## SOC "3346" NA "0.06" "0.01" "0.02" "0.10" "0.04" "0.11" "0.15" "0.10"
## TIK "3358" "253" NA "0.05" "0.05" "0.12" "0.07" "0.14" "0.17" "0.12"
## GES "3445" "109" "315" NA "0.03" "0.11" "0.05" "0.12" "0.15" "0.10"
## RAR "3442" "187" "127" "210" NA "0.11" "0.05" "0.13" "0.16" "0.11"
## TUH "3519" "283" "508" "194" "398" NA "0.07" "0.08" "0.09" "0.05"
## ARM "3835" "538" "687" "429" "560" "316" NA "0.06" "0.11" "0.05"
## SPB "571" "3895" "3893" "3995" "3983" "4076" "4392" NA "0.12" "0.05"
## SPS "567" "3892" "3913" "3988" "3995" "4054" "4370" "332" NA "0.09"
## POP "702" "3906" "3885" "4010" "3981" "4104" "4417" "283" "615" NA
Make a table and save it as a word document
# Convert the matrix to a data frame and add a column with row names
merged_df <- as.data.frame(merged_matrix)
merged_df$Population <- rownames(merged_matrix)
# Reorder columns to have RowNames as the first column
merged_df <- merged_df[, c("Population", colnames(merged_matrix))]
merged_df1 <- as.data.frame(merged_df)
write.csv(merged_df1, "/gpfs/gibbs/pi/caccone/mkc54/albo/scripts/RMarkdowns/output/europe/fst/merged_df.csv")
# Create a flextable object from the merged_matrix
ft <- qflextable(as.data.frame(merged_df))
ft
Population | CRO | ALD | ITP | TIR | SER | ALV | CES | ITR | ROM | SLO | ITB | BUL | DES | TRE | BRE | SIC | ROS | IMP | GRA | TUA | FRS | MAL | STS | GRC | SEV | ALU | BAR | SPM | KER | KRA | SPC | SOC | TIK | GES | RAR | TUH | ARM | SPB | SPS | POP |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CRO | 0.05 | 0.05 | 0.02 | 0.17 | 0.03 | 0.18 | 0.05 | 0.06 | 0.06 | 0.09 | 0.09 | 0.13 | 0.05 | 0.15 | 0.08 | 0.10 | 0.08 | 0.06 | 0.08 | 0.06 | 0.06 | 0.07 | 0.05 | 0.13 | 0.09 | 0.10 | 0.08 | 0.10 | 0.09 | 0.10 | 0.11 | 0.13 | 0.12 | 0.12 | 0.07 | 0.08 | 0.09 | 0.11 | 0.06 | |
ALD | 180 | 0.08 | 0.03 | 0.22 | 0.02 | 0.20 | 0.08 | 0.10 | 0.08 | 0.12 | 0.11 | 0.16 | 0.08 | 0.17 | 0.11 | 0.12 | 0.12 | 0.08 | 0.10 | 0.09 | 0.09 | 0.10 | 0.07 | 0.16 | 0.12 | 0.12 | 0.10 | 0.12 | 0.11 | 0.13 | 0.14 | 0.16 | 0.14 | 0.15 | 0.09 | 0.11 | 0.12 | 0.14 | 0.09 | |
ITP | 201 | 224 | 0.06 | 0.17 | 0.06 | 0.15 | 0.02 | 0.02 | 0.04 | 0.07 | 0.06 | 0.10 | 0.03 | 0.12 | 0.04 | 0.08 | 0.05 | 0.05 | 0.06 | 0.04 | 0.02 | 0.04 | 0.06 | 0.12 | 0.08 | 0.08 | 0.05 | 0.08 | 0.07 | 0.08 | 0.10 | 0.12 | 0.10 | 0.11 | 0.04 | 0.06 | 0.07 | 0.09 | 0.04 | |
TIR | 196 | 27 | 251 | 0.22 | 0.00 | 0.20 | 0.06 | 0.07 | 0.06 | 0.11 | 0.10 | 0.15 | 0.06 | 0.17 | 0.09 | 0.11 | 0.10 | 0.06 | 0.08 | 0.07 | 0.07 | 0.08 | 0.06 | 0.15 | 0.10 | 0.11 | 0.10 | 0.11 | 0.10 | 0.11 | 0.13 | 0.15 | 0.13 | 0.14 | 0.07 | 0.09 | 0.10 | 0.13 | 0.07 | |
SER | 322 | 442 | 520 | 439 | 0.18 | 0.28 | 0.17 | 0.21 | 0.15 | 0.22 | 0.20 | 0.23 | 0.15 | 0.25 | 0.19 | 0.20 | 0.23 | 0.18 | 0.20 | 0.16 | 0.16 | 0.18 | 0.19 | 0.25 | 0.20 | 0.22 | 0.21 | 0.20 | 0.20 | 0.22 | 0.22 | 0.25 | 0.23 | 0.23 | 0.18 | 0.19 | 0.20 | 0.24 | 0.17 | |
ALV | 260 | 93 | 235 | 99 | 534 | 0.18 | 0.06 | 0.07 | 0.06 | 0.10 | 0.09 | 0.14 | 0.06 | 0.15 | 0.09 | 0.10 | 0.08 | 0.06 | 0.08 | 0.07 | 0.06 | 0.08 | 0.05 | 0.14 | 0.10 | 0.10 | 0.08 | 0.10 | 0.09 | 0.11 | 0.12 | 0.13 | 0.12 | 0.12 | 0.07 | 0.09 | 0.09 | 0.11 | 0.07 | |
CES | 514 | 673 | 506 | 696 | 612 | 724 | 0.17 | 0.18 | 0.13 | 0.17 | 0.19 | 0.14 | 0.11 | 0.16 | 0.16 | 0.17 | 0.18 | 0.18 | 0.19 | 0.15 | 0.15 | 0.15 | 0.18 | 0.22 | 0.19 | 0.20 | 0.14 | 0.18 | 0.18 | 0.15 | 0.20 | 0.22 | 0.21 | 0.21 | 0.16 | 0.16 | 0.13 | 0.21 | 0.13 | |
ITR | 477 | 585 | 371 | 612 | 697 | 605 | 252 | 0.01 | 0.04 | 0.07 | 0.05 | 0.12 | 0.04 | 0.14 | 0.05 | 0.08 | 0.05 | 0.05 | 0.04 | 0.04 | 0.02 | 0.04 | 0.06 | 0.12 | 0.07 | 0.07 | 0.06 | 0.08 | 0.07 | 0.09 | 0.10 | 0.11 | 0.10 | 0.10 | 0.04 | 0.07 | 0.08 | 0.07 | 0.03 | |
ROM | 477 | 585 | 371 | 612 | 697 | 605 | 252 | 0 | 0.04 | 0.08 | 0.07 | 0.13 | 0.03 | 0.15 | 0.06 | 0.09 | 0.06 | 0.05 | 0.05 | 0.04 | 0.02 | 0.04 | 0.06 | 0.13 | 0.08 | 0.08 | 0.07 | 0.09 | 0.08 | 0.10 | 0.10 | 0.13 | 0.11 | 0.11 | 0.04 | 0.08 | 0.08 | 0.09 | 0.04 | |
SLO | 509 | 689 | 585 | 705 | 476 | 761 | 227 | 455 | 455 | 0.06 | 0.07 | 0.08 | 0.02 | 0.10 | 0.05 | 0.06 | 0.06 | 0.05 | 0.06 | 0.04 | 0.04 | 0.05 | 0.06 | 0.11 | 0.07 | 0.08 | 0.04 | 0.07 | 0.06 | 0.06 | 0.09 | 0.11 | 0.09 | 0.10 | 0.05 | 0.05 | 0.05 | 0.09 | 0.04 | |
ITB | 591 | 752 | 583 | 775 | 672 | 802 | 79 | 302 | 302 | 245 | 0.10 | 0.11 | 0.04 | 0.12 | 0.08 | 0.10 | 0.08 | 0.09 | 0.10 | 0.06 | 0.06 | 0.07 | 0.10 | 0.16 | 0.11 | 0.13 | 0.07 | 0.11 | 0.11 | 0.09 | 0.13 | 0.16 | 0.14 | 0.14 | 0.08 | 0.09 | 0.08 | 0.13 | 0.05 | |
BUL | 428 | 414 | 604 | 393 | 316 | 483 | 881 | 899 | 899 | 782 | 950 | 0.14 | 0.06 | 0.15 | 0.08 | 0.10 | 0.09 | 0.09 | 0.08 | 0.07 | 0.06 | 0.08 | 0.09 | 0.15 | 0.10 | 0.11 | 0.09 | 0.11 | 0.10 | 0.11 | 0.13 | 0.15 | 0.13 | 0.13 | 0.07 | 0.09 | 0.10 | 0.12 | 0.07 | |
DES | 691 | 859 | 702 | 881 | 725 | 915 | 198 | 427 | 427 | 255 | 126 | 1021 | 0.07 | 0.09 | 0.09 | 0.11 | 0.12 | 0.14 | 0.14 | 0.10 | 0.11 | 0.11 | 0.14 | 0.18 | 0.14 | 0.16 | 0.09 | 0.14 | 0.14 | 0.10 | 0.16 | 0.18 | 0.17 | 0.17 | 0.12 | 0.11 | 0.08 | 0.16 | 0.08 | |
TRE | 684 | 858 | 717 | 877 | 683 | 920 | 230 | 476 | 476 | 206 | 176 | 986 | 80 | 0.09 | 0.04 | 0.05 | 0.04 | 0.06 | 0.06 | 0.03 | 0.03 | 0.03 | 0.06 | 0.11 | 0.07 | 0.08 | 0.03 | 0.07 | 0.07 | 0.05 | 0.09 | 0.11 | 0.10 | 0.10 | 0.04 | 0.05 | 0.04 | 0.08 | 0.03 | |
BRE | 719 | 887 | 728 | 908 | 752 | 942 | 223 | 445 | 445 | 280 | 148 | 1048 | 28 | 92 | 0.12 | 0.13 | 0.13 | 0.15 | 0.16 | 0.12 | 0.12 | 0.12 | 0.15 | 0.19 | 0.16 | 0.17 | 0.11 | 0.15 | 0.15 | 0.12 | 0.17 | 0.19 | 0.18 | 0.18 | 0.13 | 0.13 | 0.10 | 0.18 | 0.11 | |
SIC | 537 | 480 | 340 | 502 | 858 | 420 | 715 | 483 | 483 | 865 | 779 | 894 | 905 | 945 | 925 | 0.09 | 0.07 | 0.08 | 0.08 | 0.06 | 0.05 | 0.06 | 0.08 | 0.14 | 0.10 | 0.10 | 0.06 | 0.10 | 0.09 | 0.09 | 0.12 | 0.14 | 0.12 | 0.12 | 0.06 | 0.08 | 0.08 | 0.11 | 0.05 | |
ROS | 683 | 771 | 884 | 761 | 367 | 859 | 917 | 1047 | 1047 | 726 | 961 | 445 | 977 | 915 | 1000 | 1219 | 0.10 | 0.10 | 0.10 | 0.07 | 0.07 | 0.07 | 0.10 | 0.15 | 0.11 | 0.12 | 0.09 | 0.11 | 0.10 | 0.10 | 0.13 | 0.15 | 0.14 | 0.14 | 0.08 | 0.09 | 0.09 | 0.13 | 0.07 | |
IMP | 840 | 984 | 787 | 1009 | 949 | 1020 | 341 | 428 | 428 | 507 | 277 | 1221 | 268 | 346 | 254 | 891 | 1232 | 0.08 | 0.08 | 0.05 | 0.05 | 0.05 | 0.09 | 0.15 | 0.10 | 0.11 | 0.07 | 0.09 | 0.09 | 0.08 | 0.12 | 0.15 | 0.13 | 0.13 | 0.06 | 0.08 | 0.07 | 0.10 | 0.05 | |
GRA | 711 | 534 | 705 | 515 | 884 | 476 | 1200 | 1070 | 1070 | 1220 | 1279 | 656 | 1390 | 1391 | 1417 | 736 | 1100 | 1492 | 0.07 | 0.07 | 0.05 | 0.07 | 0.02 | 0.12 | 0.08 | 0.09 | 0.08 | 0.09 | 0.08 | 0.10 | 0.10 | 0.12 | 0.11 | 0.11 | 0.06 | 0.08 | 0.09 | 0.10 | 0.06 | |
TUA | 851 | 694 | 901 | 669 | 932 | 667 | 1363 | 1271 | 1271 | 1340 | 1441 | 641 | 1542 | 1529 | 1569 | 994 | 1057 | 1678 | 277 | 0.07 | 0.05 | 0.07 | 0.08 | 0.14 | 0.09 | 0.06 | 0.08 | 0.10 | 0.09 | 0.10 | 0.12 | 0.13 | 0.12 | 0.12 | 0.05 | 0.09 | 0.10 | 0.03 | 0.06 | |
FRS | 1038 | 1192 | 1004 | 1216 | 1100 | 1234 | 525 | 654 | 654 | 629 | 448 | 1393 | 375 | 428 | 349 | 1119 | 1341 | 228 | 1709 | 1885 | 0.04 | 0.03 | 0.07 | 0.12 | 0.08 | 0.09 | 0.05 | 0.08 | 0.08 | 0.07 | 0.10 | 0.12 | 0.11 | 0.11 | 0.05 | 0.06 | 0.06 | 0.09 | 0.04 | |
MAL | 817 | 746 | 621 | 764 | 1139 | 674 | 943 | 694 | 694 | 1118 | 996 | 1157 | 1119 | 1170 | 1135 | 280 | 1498 | 1049 | 873 | 1148 | 1270 | 0.04 | 0.06 | 0.12 | 0.07 | 0.08 | 0.05 | 0.07 | 0.07 | 0.08 | 0.09 | 0.11 | 0.10 | 0.10 | 0.04 | 0.06 | 0.06 | 0.07 | 0.03 | |
STS | 1053 | 1230 | 1098 | 1248 | 989 | 1297 | 604 | 835 | 835 | 546 | 536 | 1304 | 410 | 381 | 390 | 1314 | 1125 | 528 | 1764 | 1886 | 412 | 1523 | 0.07 | 0.13 | 0.09 | 0.09 | 0.05 | 0.08 | 0.08 | 0.07 | 0.11 | 0.12 | 0.11 | 0.11 | 0.05 | 0.07 | 0.06 | 0.10 | 0.04 | |
GRC | 934 | 754 | 884 | 742 | 1141 | 679 | 1389 | 1224 | 1224 | 1440 | 1467 | 925 | 1585 | 1596 | 1611 | 812 | 1370 | 1653 | 270 | 445 | 1877 | 862 | 1975 | 0.13 | 0.09 | 0.10 | 0.08 | 0.10 | 0.09 | 0.10 | 0.11 | 0.13 | 0.12 | 0.12 | 0.07 | 0.08 | 0.09 | 0.11 | 0.07 | |
SEV | 1250 | 1196 | 1410 | 1170 | 1082 | 1235 | 1689 | 1723 | 1723 | 1551 | 1753 | 824 | 1804 | 1756 | 1830 | 1652 | 894 | 2029 | 1086 | 843 | 2180 | 1878 | 2012 | 1288 | 0.10 | 0.16 | 0.14 | 0.07 | 0.03 | 0.15 | 0.05 | 0.08 | 0.06 | 0.05 | 0.12 | 0.07 | 0.14 | 0.17 | 0.12 | |
ALU | 1321 | 1268 | 1482 | 1242 | 1149 | 1307 | 1757 | 1794 | 1794 | 1617 | 1821 | 895 | 1870 | 1821 | 1896 | 1724 | 950 | 2097 | 1151 | 904 | 2246 | 1948 | 2072 | 1348 | 72 | 0.11 | 0.09 | 0.07 | 0.05 | 0.11 | 0.07 | 0.09 | 0.08 | 0.07 | 0.08 | 0.06 | 0.10 | 0.12 | 0.08 | |
BAR | 1333 | 1446 | 1227 | 1473 | 1489 | 1458 | 878 | 862 | 862 | 1059 | 824 | 1742 | 815 | 887 | 796 | 1195 | 1783 | 552 | 1900 | 2123 | 512 | 1233 | 916 | 2007 | 2561 | 2631 | 0.10 | 0.12 | 0.11 | 0.13 | 0.13 | 0.15 | 0.14 | 0.14 | 0.05 | 0.11 | 0.12 | 0.11 | 0.09 | |
SPM | 1360 | 1450 | 1227 | 1478 | 1555 | 1448 | 956 | 883 | 883 | 1160 | 916 | 1782 | 933 | 1011 | 919 | 1136 | 1873 | 665 | 1864 | 2103 | 684 | 1128 | 1096 | 1944 | 2606 | 2677 | 211 | 0.09 | 0.08 | 0.02 | 0.11 | 0.13 | 0.12 | 0.12 | 0.06 | 0.06 | 0.01 | 0.05 | 0.04 | |
KER | 1490 | 1443 | 1655 | 1417 | 1301 | 1484 | 1913 | 1960 | 1960 | 1762 | 1973 | 1062 | 2017 | 1964 | 2042 | 1901 | 1073 | 2250 | 1327 | 1075 | 2391 | 2127 | 2197 | 1517 | 250 | 179 | 2789 | 2841 | 0.03 | 0.10 | 0.05 | 0.07 | 0.05 | 0.05 | 0.09 | 0.04 | 0.09 | 0.13 | 0.07 | |
KRA | 1687 | 1633 | 1848 | 1607 | 1506 | 1670 | 2117 | 2160 | 2160 | 1968 | 2178 | 1261 | 2222 | 2170 | 2248 | 2084 | 1276 | 2455 | 1478 | 1214 | 2596 | 2301 | 2401 | 1648 | 438 | 366 | 2992 | 3042 | 206 | 0.10 | 0.01 | 0.04 | 0.02 | 0.01 | 0.08 | 0.02 | 0.09 | 0.12 | 0.07 | |
SPC | 1601 | 1698 | 1474 | 1725 | 1781 | 1698 | 1173 | 1124 | 1124 | 1363 | 1125 | 2020 | 1122 | 1196 | 1104 | 1387 | 2086 | 857 | 2116 | 2354 | 818 | 1368 | 1214 | 2193 | 2843 | 2913 | 310 | 252 | 3075 | 3277 | 0.12 | 0.15 | 0.13 | 0.14 | 0.08 | 0.07 | 0.01 | 0.14 | 0.06 | |
SOC | 1748 | 1678 | 1898 | 1651 | 1591 | 1706 | 2197 | 2224 | 2224 | 2060 | 2262 | 1327 | 2313 | 2264 | 2339 | 2113 | 1388 | 2537 | 1470 | 1198 | 2689 | 2316 | 2512 | 1618 | 509 | 443 | 3068 | 3107 | 325 | 161 | 3346 | 0.06 | 0.01 | 0.02 | 0.10 | 0.04 | 0.11 | 0.15 | 0.10 | |
TIK | 1778 | 1733 | 1945 | 1707 | 1580 | 1774 | 2191 | 2247 | 2247 | 2033 | 2250 | 1350 | 2288 | 2233 | 2313 | 2190 | 1328 | 2527 | 1600 | 1339 | 2660 | 2414 | 2447 | 1776 | 539 | 467 | 3069 | 3126 | 290 | 132 | 3358 | 253 | 0.05 | 0.05 | 0.12 | 0.07 | 0.14 | 0.17 | 0.12 | |
GES | 1845 | 1770 | 1991 | 1743 | 1695 | 1795 | 2300 | 2321 | 2321 | 2166 | 2365 | 1426 | 2418 | 2370 | 2444 | 2197 | 1496 | 2640 | 1540 | 1265 | 2794 | 2395 | 2620 | 1675 | 614 | 550 | 3168 | 3204 | 434 | 257 | 3445 | 109 | 315 | 0.03 | 0.11 | 0.05 | 0.12 | 0.15 | 0.10 | |
RAR | 1853 | 1798 | 2014 | 1772 | 1670 | 1833 | 2281 | 2326 | 2326 | 2130 | 2342 | 1427 | 2385 | 2331 | 2410 | 2246 | 1432 | 2619 | 1629 | 1361 | 2758 | 2460 | 2555 | 1789 | 604 | 532 | 3157 | 3208 | 369 | 166 | 3442 | 187 | 127 | 210 | 0.11 | 0.05 | 0.13 | 0.16 | 0.11 | |
TUH | 1919 | 1828 | 2051 | 1801 | 1797 | 1844 | 2393 | 2395 | 2395 | 2272 | 2461 | 1512 | 2522 | 2478 | 2549 | 2233 | 1629 | 2734 | 1544 | 1267 | 2897 | 2412 | 2745 | 1650 | 734 | 679 | 3251 | 3275 | 597 | 443 | 3519 | 283 | 508 | 194 | 398 | 0.07 | 0.08 | 0.09 | 0.05 | |
ARM | 2235 | 2144 | 2367 | 2117 | 2107 | 2159 | 2706 | 2711 | 2711 | 2581 | 2773 | 1826 | 2832 | 2787 | 2859 | 2545 | 1923 | 3047 | 1848 | 1571 | 3207 | 2719 | 3045 | 1937 | 1034 | 974 | 3566 | 3591 | 862 | 674 | 3835 | 538 | 687 | 429 | 560 | 316 | 0.06 | 0.11 | 0.05 | |
SPB | 2158 | 2263 | 2040 | 2290 | 2313 | 2266 | 1702 | 1683 | 1683 | 1869 | 1644 | 2569 | 1617 | 1682 | 1595 | 1957 | 2594 | 1367 | 2687 | 2925 | 1264 | 1923 | 1601 | 2760 | 3388 | 3457 | 827 | 823 | 3614 | 3818 | 571 | 3895 | 3893 | 3995 | 3983 | 4076 | 4392 | 0.12 | 0.05 | |
SPS | 2144 | 2226 | 2002 | 2253 | 2341 | 2215 | 1737 | 1669 | 1669 | 1930 | 1690 | 2568 | 1689 | 1761 | 1670 | 1865 | 2651 | 1424 | 2601 | 2855 | 1371 | 1785 | 1750 | 2643 | 3393 | 3464 | 874 | 787 | 3628 | 3829 | 567 | 3892 | 3913 | 3988 | 3995 | 4054 | 4370 | 332 | 0.09 | |
POP | 2197 | 2320 | 2103 | 2347 | 2316 | 2335 | 1711 | 1734 | 1734 | 1852 | 1645 | 2591 | 1597 | 1653 | 1572 | 2065 | 2566 | 1370 | 2777 | 3001 | 1225 | 2067 | 1509 | 2876 | 3397 | 3464 | 878 | 939 | 3614 | 3819 | 702 | 3906 | 3885 | 4010 | 3981 | 4104 | 4417 | 283 | 615 |
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.1 \
--keep-fam output/fst/pops_4fst.txt \
--make-bed \
--out output/fst/mantel \
--silent;
grep 'samples\|variants\|remaining' output/fst/mantel.log
47484 variants loaded from .bim file. –keep-fam: 407 people remaining. Total genotyping rate in remaining samples is 0.97243. 47484 variants and 407 people pass filters and QC.
Then convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/fst/mantel \
--recodeA \
--out output/fst/mantel \
--silent;
grep 'samples\|variants\|remaining' output/fst/mantel.log
47484 variants loaded from .bim file. 47484 variants and 407 people pass filters and QC.
Import the data and covert it to genind format
# import the data
albo <-
read.PLINK(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/mantel.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
# convert to genind
albo2 <- gl2gi(albo, probar = TRUE, verbose = NULL)
Save
Load it
Import Sample locations
## Pop_City Country Latitude Longitude Continent Abbreviation
## 1 Franceville Gabon -1.59207 13.53242 Africa GAB
## 2 Antananarivo Madagascar -18.87920 47.50790 Africa ANT
## 3 Diego ville Madagascar -12.27361 49.29372 Africa DGV
## 4 Morondava Madagascar -20.28420 44.27940 Africa MAD
## 5 Vohimasy Madagascar -22.81591 47.75026 Africa VOH
## 6 Dauguet Mauritius -20.18530 57.52154 Africa DAU
## Year Region Subregion order
## 1 2015 Central Africa Africa 72
## 2 2022 East Africa East Africa 76
## 3 2022 East Africa East Africa 77
## 4 2016 East Africa East Africa 78
## 5 2016 & 2017 East Africa East Africa 79
## 6 2022 Indian Ocean Indian Ocean 80
The fam file
fam_file <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/mantel.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# # Replace the BEN a with BEN b (remember to never name samples with the same ID... I change it manually in the fam file.)
# fam_data <- fam_data %>%
# mutate(IndividualID = ifelse(FamilyID == "BEN" & IndividualID == "a", "b", IndividualID))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 SOC 1065 0 0 0 -9
## 2 SOC 1066 0 0 0 -9
## 3 SOC 1067 0 0 0 -9
## 4 SOC 1068 0 0 0 -9
## 5 SOC 1069 0 0 0 -9
## 6 SOC 1070 0 0 0 -9
Merge
# Join with sampling_loc to get sampling localities
loc_albo <- fam_data |>
left_join(sampling_loc, by = c("FamilyID" = "Abbreviation"))
head(loc_albo)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype Pop_City Country
## 1 SOC 1065 0 0 0 -9 Sochi Russia
## 2 SOC 1066 0 0 0 -9 Sochi Russia
## 3 SOC 1067 0 0 0 -9 Sochi Russia
## 4 SOC 1068 0 0 0 -9 Sochi Russia
## 5 SOC 1069 0 0 0 -9 Sochi Russia
## 6 SOC 1070 0 0 0 -9 Sochi Russia
## Latitude Longitude Continent Year Region Subregion order
## 1 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 38
## 2 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 38
## 3 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 38
## 4 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 38
## 5 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 38
## 6 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 38
Get the latitude and longitude
## [,1] [,2]
## [1,] 43.60042 39.74533
## [2,] 43.60042 39.74533
## [3,] 43.60042 39.74533
## [4,] 43.60042 39.74533
## [5,] 43.60042 39.74533
## [6,] 43.60042 39.74533
Add jitter
## x y
## [1,] 43.60204 39.74689
## [2,] 43.60159 39.74539
## [3,] 43.59905 39.74583
## [4,] 43.59910 39.74705
## [5,] 43.59873 39.74606
## [6,] 43.59864 39.74720
Add to object
Save
Convert the data
##
## Converting data from a genind to a genpop object...
##
## ...done.
Get 1 mosquito per population, it is just to get the geographical coordinates
unique_populations <- unique(albo2@pop)
selected_individuals <- integer(length(unique_populations))
for (i in seq_along(unique_populations)) {
inds_in_pop <- which(albo2@pop == unique_populations[i])
selected_individuals[i] <- sample(inds_in_pop, 1)
}
albo2_subset <- albo2[selected_individuals, ]
Mantel test
Dgen <- dist.genpop(toto,method=2)
Dgeo <- dist(albo2_subset$other$xy)
ibd <- mantel.randtest(Dgen,Dgeo)
ibd
## Monte-Carlo test
## Call: mantel.randtest(m1 = Dgen, m2 = Dgeo)
##
## Observation: -0.0106724
##
## Based on 999 replicates
## Simulated p-value: 0.551
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## -0.167827661 0.004410054 0.008076388
Simulated p-value is not statistically signficant. Alternative hypothesis: greater. The test was one-sided, checking if the observed correlation is greater than what would be expected by chance.
Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "simIBD_europe_LD2.pdf"))
plot(ibd)
dev.off()
plot(Dgeo, Dgen)
# A linear regression model (lm stands for "linear model") is fitted, with the genetic distances (Dgen) as the response variable and the geographic distances (Dgeo) as the predictor. The distances are transformed into vectors using as.vector because the dist function produces a matrix-like structure, but the linear regression function lm requires vectors.
dist_lm <- lm(as.vector(Dgen) ~ as.vector(Dgeo))
abline(dist_lm, col="red", lty=2)
Save Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "Genetic_v_Geog_distance_europe_LD2_1.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
dev.off()
Add the equation
# Plotting the data
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
# You can adjust the position (e.g., x and y values) as necessary
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
Save Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "Genetic_v_Geog_distance_europe_LD2.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
# You can adjust the position (e.g., x and y values) as necessary
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
dev.off()
Use library MASS for plot
library(MASS)
CairoPDF(here(
"output", "europe", "fst", "IDB_PlotFromMASS_density_1.pdf"))
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
myPal <-
colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
png(here("output", "europe", "fst", "ibd2_1.png"),
width = 5,
height = 4,
units='in',
res = 300)
myPal <-
colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.4fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.4fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
title("Isolation by distance")
Check the populations - I did not include those with less than 4 mosquitoes
## ALD ALU ALV ARM BAR BRE BUL CES CRO DES FRS GES GRA GRC IMP ITB ITP ITR KER KRA
## 10 12 12 10 12 13 10 14 12 16 12 12 11 10 4 5 8 12 12 12
## MAL POP RAR ROM ROS SER SEV SIC SLO SOC SPB SPC SPM SPS STS TIK TIR TRE TUA TUH
## 12 12 12 4 11 4 12 9 12 12 8 6 5 8 12 12 4 12 9 12
Create list of populations
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
awk '{print $1}' output/snps_sets/r2_0.01.fam | sort | uniq -c | awk '{print $2, $1}' | awk '$2 >= 4 {print}' | awk '{print $1}' > output/fst/pops_4fst.txt;
head output/fst/pops_4fst.txt;
wc -l output/fst/pops_4fst.txt
## ALD
## ALU
## ALV
## ARM
## BAR
## BRE
## BUL
## CES
## CRO
## DES
## 40 output/fst/pops_4fst.txt
We have 40 populations with 4 or more mosquitoes.
First load plink
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.01 \
--keep-fam output/fst/pops_4fst.txt \
--recodeA \
--out output/fst/r2_0.01 \
--silent;
grep 'samples\|variants\|remaining' output/fst/r2_0.01.log
17028 variants loaded from .bim file. –keep-fam: 407 people remaining. Total genotyping rate in remaining samples is 0.970158. 17028 variants and 407 people pass filters and QC.
Look at https://rdrr.io/cran/StAMPP/man/stamppFst.html for details of Fst estimations
LD2 <-
read.PLINK(
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/r2_0.01.raw"
),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
##
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## Length Class Mode
## 1 genlight S4
The Europe+Asia dataset kept crashing R Studio when running the next step, so we will try running it directly in the R module in McCleary Install StAMPP for direct use in the R module in the cluster
salloc
module load R/4.2.0-foss-2020b
R
#install.packages("StAMPP", repos="http://cran.r-project.org")
srun -p ycga --pty -N 1 -n 1 -c 4 bash
#salloc -p ycga -t 2:00:00 -c 5
#srun -p ycga --pty bash #start an interactive session and load R packages needed
R
library(StAMPP)
library(adegenet)
library(here)
LD2 <-
read.PLINK(
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/r2_0.01.raw"
),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
summary(LD2)
#The command below would also work, but you can simplify it and put only the numbers: genome_equal_2 <- stamppFst(neutral, nboots=100, percent=95 + nclusters==10)
#This chunk will take a couple minutes to run.
LD2_2 <- stamppConvert(LD2, type="genlight")
LD2_3 <- stamppFst(LD2_2, 1, 95, 1)
saveRDS(
LD2_3, here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/r_01/LD2_01.rds"
)
)
To load it
Now lets look at the object
## SOC SEV GES KER
## Min. :0.007311 Min. :0.02992 Min. :0.01736 Min. :0.03118
## 1st Qu.:0.096319 1st Qu.:0.11754 1st Qu.:0.10369 1st Qu.:0.07739
## Median :0.111864 Median :0.13548 Median :0.12121 Median :0.09315
## Mean :0.107863 Mean :0.13222 Mean :0.11844 Mean :0.09748
## 3rd Qu.:0.128105 3rd Qu.:0.15325 3rd Qu.:0.13510 3rd Qu.:0.10958
## Max. :0.225734 Max. :0.24709 Max. :0.23191 Max. :0.20274
## NA's :1 NA's :2 NA's :3 NA's :4
## KRA TIK RAR TRE
## Min. :0.01111 Min. :0.05284 Min. :0.05298 Min. :0.02280
## 1st Qu.:0.07491 1st Qu.:0.12123 1st Qu.:0.11076 1st Qu.:0.03519
## Median :0.09108 Median :0.13647 Median :0.12539 Median :0.05345
## Mean :0.09209 Mean :0.13920 Mean :0.13079 Mean :0.05612
## 3rd Qu.:0.10552 3rd Qu.:0.15373 3rd Qu.:0.14289 3rd Qu.:0.06377
## Max. :0.19543 Max. :0.24869 Max. :0.23378 Max. :0.15172
## NA's :5 NA's :6 NA's :7 NA's :8
## ALU STS SIC BRE
## Min. :0.06427 Min. :0.02997 Min. :0.04444 Min. :0.08688
## 1st Qu.:0.07946 1st Qu.:0.05088 1st Qu.:0.06078 1st Qu.:0.12055
## Median :0.09406 Median :0.06915 Median :0.08174 Median :0.13740
## Mean :0.10162 Mean :0.07324 Mean :0.08448 Mean :0.14092
## 3rd Qu.:0.10823 3rd Qu.:0.07785 3rd Qu.:0.09264 3rd Qu.:0.15603
## Max. :0.19589 Max. :0.17344 Max. :0.19149 Max. :0.24786
## NA's :9 NA's :10 NA's :11 NA's :12
## DES CES TIR IMP
## Min. :0.07757 Min. :0.1314 Min. :-0.000645 Min. :0.04306
## 1st Qu.:0.10679 1st Qu.:0.1518 1st Qu.: 0.063331 1st Qu.:0.05707
## Median :0.12593 Median :0.1746 Median : 0.073437 Median :0.08053
## Mean :0.12750 Mean :0.1751 Mean : 0.082908 Mean :0.08087
## 3rd Qu.:0.14060 3rd Qu.:0.1875 3rd Qu.: 0.100416 3rd Qu.:0.08657
## Max. :0.22813 Max. :0.2837 Max. : 0.221505 Max. :0.22494
## NA's :13 NA's :14 NA's :15 NA's :16
## ROM GRC BAR BUL
## Min. :0.005747 Min. :0.02423 Min. :0.04388 Min. :0.05313
## 1st Qu.:0.044609 1st Qu.:0.06052 1st Qu.:0.08151 1st Qu.:0.06765
## Median :0.065980 Median :0.07806 Median :0.09771 Median :0.08864
## Mean :0.066735 Mean :0.08185 Mean :0.10234 Mean :0.09014
## 3rd Qu.:0.080996 3rd Qu.:0.09858 3rd Qu.:0.12127 3rd Qu.:0.09997
## Max. :0.206208 Max. :0.18863 Max. :0.21578 Max. :0.19385
## NA's :17 NA's :18 NA's :19 NA's :20
## CRO GRA ITB MAL
## Min. :0.02783 Min. :0.04929 Min. :0.05313 Min. :0.01794
## 1st Qu.:0.05626 1st Qu.:0.05828 1st Qu.:0.07026 1st Qu.:0.03869
## Median :0.06515 Median :0.07547 Median :0.08330 Median :0.05604
## Mean :0.07561 Mean :0.08066 Mean :0.09315 Mean :0.05880
## 3rd Qu.:0.09137 3rd Qu.:0.09714 3rd Qu.:0.10416 3rd Qu.:0.07361
## Max. :0.17206 Max. :0.18532 Max. :0.22078 Max. :0.16004
## NA's :21 NA's :22 NA's :23 NA's :24
## SPM TUA TUH ALD
## Min. :0.006871 Min. :0.02829 Min. :0.03447 Min. :0.02244
## 1st Qu.:0.045630 1st Qu.:0.05807 1st Qu.:0.04586 1st Qu.:0.08179
## Median :0.056524 Median :0.07420 Median :0.06887 Median :0.09939
## Mean :0.066728 Mean :0.08066 Mean :0.07247 Mean :0.10639
## 3rd Qu.:0.081846 3rd Qu.:0.09925 3rd Qu.:0.08466 3rd Qu.:0.12546
## Max. :0.209145 Max. :0.19709 Max. :0.17094 Max. :0.21518
## NA's :25 NA's :26 NA's :27 NA's :28
## FRS ITP POP ROS
## Min. :0.03904 Min. :0.02335 Min. :0.03484 Min. :0.05978
## 1st Qu.:0.04250 1st Qu.:0.04222 1st Qu.:0.05135 1st Qu.:0.08975
## Median :0.06191 Median :0.06744 Median :0.05485 Median :0.09692
## Mean :0.06874 Mean :0.07105 Mean :0.07033 Mean :0.10801
## 3rd Qu.:0.07393 3rd Qu.:0.07974 3rd Qu.:0.07401 3rd Qu.:0.11011
## Max. :0.16317 Max. :0.17352 Max. :0.16544 Max. :0.20168
## NA's :29 NA's :30 NA's :31 NA's :32
## SER SLO SPC SPB
## Min. :0.1519 Min. :0.04098 Min. :0.00762 Min. :0.05969
## 1st Qu.:0.1729 1st Qu.:0.05351 1st Qu.:0.06937 1st Qu.:0.07454
## Median :0.1879 Median :0.05713 Median :0.08597 Median :0.08747
## Mean :0.1915 Mean :0.06075 Mean :0.08068 Mean :0.09051
## 3rd Qu.:0.2094 3rd Qu.:0.06211 3rd Qu.:0.10543 3rd Qu.:0.10344
## Max. :0.2363 Max. :0.09319 Max. :0.13499 Max. :0.12741
## NA's :33 NA's :34 NA's :35 NA's :36
## SPS ARM ALV ITR
## Min. :0.07167 Min. :0.06895 Min. :0.05727 Min. : NA
## 1st Qu.:0.09287 1st Qu.:0.07346 1st Qu.:0.05727 1st Qu.: NA
## Median :0.11407 Median :0.07797 Median :0.05727 Median : NA
## Mean :0.10048 Mean :0.07797 Mean :0.05727 Mean :NaN
## 3rd Qu.:0.11488 3rd Qu.:0.08248 3rd Qu.:0.05727 3rd Qu.: NA
## Max. :0.11569 Max. :0.08699 Max. :0.05727 Max. : NA
## NA's :37 NA's :38 NA's :39 NA's :40
If you want you can save the fst values as csv.
# Save it
write.csv(LD2_df, file = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/r_01/LD2_df_01.csv"))
Check the Fst values
## SOC SEV GES KER KRA TIK RAR TRE ALU STS
## SOC NA NA NA NA NA NA NA NA NA NA
## SEV 0.045340580 NA NA NA NA NA NA NA NA NA
## GES 0.011051225 0.05398193 NA NA NA NA NA NA NA NA
## KER 0.046624742 0.06797261 0.05431325 NA NA NA NA NA NA NA
## KRA 0.007310457 0.02992473 0.01736366 0.03118331 NA NA NA NA NA NA
## TIK 0.058308330 0.07951033 0.05326679 0.07204451 0.03550992 NA NA NA NA NA
## SIC BRE DES CES TIR IMP ROM GRC BAR BUL CRO GRA ITB MAL SPM TUA TUH ALD FRS
## SOC NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## SEV NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## GES NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## KER NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## KRA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## TIK NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## ITP POP ROS SER SLO SPC SPB SPS ARM ALV ITR
## SOC NA NA NA NA NA NA NA NA NA NA NA
## SEV NA NA NA NA NA NA NA NA NA NA NA
## GES NA NA NA NA NA NA NA NA NA NA NA
## KER NA NA NA NA NA NA NA NA NA NA NA
## KRA NA NA NA NA NA NA NA NA NA NA NA
## TIK NA NA NA NA NA NA NA NA NA NA NA
Convert the data into a matrix.
## SOC SEV GES KER KRA TIK
## SOC NA 0.04534058 0.01105123 0.04662474 0.007310457 0.05830833
## SEV 0.045340580 NA 0.05398193 0.06797261 0.029924734 0.07951033
## GES 0.011051225 0.05398193 NA 0.05431325 0.017363660 0.05326679
## KER 0.046624742 0.06797261 0.05431325 NA 0.031183311 0.07204451
## KRA 0.007310457 0.02992473 0.01736366 0.03118331 NA 0.03550992
## TIK 0.058308330 0.07951033 0.05326679 0.07204451 0.035509919 NA
## RAR TRE ALU STS SIC BRE DES
## SOC 0.02140308 0.09774556 0.06956851 0.10725685 0.11842581 0.1778315 0.1664752
## SEV 0.05274907 0.11676360 0.09533467 0.12805521 0.14109059 0.1957159 0.1853356
## GES 0.02845986 0.10228847 0.08536454 0.11152269 0.12403531 0.1804918 0.1701758
## KER 0.05335175 0.07223555 0.06478502 0.08210551 0.09791612 0.1522506 0.1390027
## KRA 0.01110483 0.07237471 0.05408914 0.08351473 0.09244042 0.1509169 0.1405311
## TIK 0.05283903 0.11486839 0.09035828 0.12571084 0.13863956 0.1938497 0.1815638
## CES TIR IMP ROM GRC BAR BUL
## SOC 0.2084034 0.1279906 0.12014266 0.10598720 0.11737403 0.1362259 0.1282197
## SEV 0.2266013 0.1533911 0.14660265 0.13150060 0.13483143 0.1584720 0.1494195
## GES 0.2108559 0.1350974 0.12700509 0.11104200 0.12349279 0.1437363 0.1344213
## KER 0.1801340 0.1099683 0.09105621 0.08616772 0.10043376 0.1237754 0.1084714
## KRA 0.1811980 0.1019753 0.09219167 0.08079209 0.09064014 0.1118393 0.1028093
## TIK 0.2231552 0.1547583 0.14573453 0.13191149 0.13556816 0.1551659 0.1486248
## CRO GRA ITB MAL SPM TUA TUH
## SOC 0.11548159 0.10812974 0.1344973 0.09509352 0.11186385 0.11843156 0.10440110
## SEV 0.13612178 0.12816215 0.1586964 0.11731416 0.13942614 0.13961729 0.12321527
## GES 0.12130010 0.11394608 0.1405141 0.10187843 0.12078634 0.12484530 0.11211883
## KER 0.09946183 0.09385646 0.1094535 0.07532346 0.08876307 0.10298389 0.08472508
## KRA 0.09226819 0.08358153 0.1082221 0.07264223 0.08298896 0.09156695 0.07825535
## TIK 0.13369582 0.12732836 0.1581451 0.11588489 0.13491594 0.13768571 0.12231274
## ALD FRS ITP POP ROS SER SLO
## SOC 0.1400475 0.10249166 0.10147814 0.09916795 0.1343887 0.2257335 0.09200842
## SEV 0.1604408 0.12125322 0.12308452 0.12072078 0.1550944 0.2470881 0.11079811
## GES 0.1455015 0.10774189 0.10719453 0.10568851 0.1398834 0.2319092 0.09786065
## KER 0.1249852 0.07807626 0.08041833 0.07279555 0.1105760 0.2027373 0.07278478
## KRA 0.1144149 0.07603818 0.07526203 0.07455165 0.1082726 0.1954302 0.06545358
## TIK 0.1606478 0.12086388 0.12263965 0.11939275 0.1506513 0.2486889 0.10862989
## SPC SPB SPS ARM ALV ITR
## SOC 0.12467311 0.11528409 0.1505581 0.04473196 0.11895717 0.09754439
## SEV 0.15283101 0.14107609 0.1730141 0.07087941 0.14016159 0.11822902
## GES 0.13281801 0.12120594 0.1588711 0.05626477 0.12520599 0.10368592
## KER 0.10120180 0.09243961 0.1323221 0.03946515 0.10268353 0.07931754
## KRA 0.09829134 0.09108052 0.1251237 0.01848469 0.09600472 0.07323132
## TIK 0.14678514 0.13736995 0.1708804 0.06769401 0.13827773 0.11770930
Import sample locations
sampling_loc <- readRDS(here("output", "sampling_loc_all.rds"))
# Arrange by region
sampling_loc <- sampling_loc |>
dplyr::arrange(
order
)
# Check it
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Loule Portugal 37.09084 -8.092465 Europe POL 2017
## 5 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 6 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## Region Subregion order
## 1 Western Europe West Europe 1
## 2 Western Europe West Europe 2
## 3 Southern Europe West Europe 3
## 4 Southern Europe West Europe 4
## 5 Southern Europe West Europe 5
## 6 Southern Europe West Europe 6
Order
## [1] "FRS" "STS" "POP" "POL" "SPB" "SPS" "SPC" "BAR" "SPM" "IMP" "ITG" "BRE"
## [13] "DES" "TRE" "ITB" "CES" "ROM" "ITR" "SIC" "ITP" "MAL" "SLO" "CRO" "ALV"
## [25] "ALD" "TIR" "SER" "GRA" "GRC" "ROS" "BUL" "TUA" "TUH" "SEV" "ALU" "KER"
## [37] "KRA" "SOC" "TIK" "RAR" "GES" "ARM" "KAN" "UTS" "KAG" "OKI" "HAI" "YUN"
## [49] "HUN" "TAI" "GEL" "BEN" "KUN" "KAT" "JAF" "CAM" "SUF" "SUU" "INW" "INJ"
## [61] "KLP" "MAT" "SSK" "KAC" "SON" "CHA" "LAM" "HAN" "HOC" "QNC" "RAB" "GAB"
## [73] "LIB" "YAO" "AWK" "ANT" "DGV" "MAD" "VOH" "DAU" "TRO" "BRM" "JAM" "SAI"
## [85] "BEA" "BER" "CHI" "COL" "DAL" "FAY" "HOU" "LOS" "MAC" "MAN" "NEO" "NEW"
## [97] "NUE" "PAL" "PEO" "RUS" "SPR" "POR" "GRV" "MAU" "NOV" "REC" "TUC" "MED"
## [109] "AIZ" "HIR" "KHO" "KYO" "NAG" "NIG" "SAG" "SAK" "SEN" "TAN" "JAT" "YAT"
Create vector with order of populations
# Extract the populations that appear in LD2_df
populations_in_LD2 <- colnames(LD2_df)
# Reorder the populations based on order_pops
poporder <- populations_in_LD2[populations_in_LD2 %in% order_pops]
#LD2_df[match(poporder, LD2_df$Abbreviation),] #this also doesn't reorder it
# Print the reordered populations
print(poporder)
## [1] "SOC" "SEV" "GES" "KER" "KRA" "TIK" "RAR" "TRE" "ALU" "STS" "SIC" "BRE"
## [13] "DES" "CES" "TIR" "IMP" "ROM" "GRC" "BAR" "BUL" "CRO" "GRA" "ITB" "MAL"
## [25] "SPM" "TUA" "TUH" "ALD" "FRS" "ITP" "POP" "ROS" "SER" "SLO" "SPC" "SPB"
## [37] "SPS" "ARM" "ALV" "ITR"
Lets check if the matrix is symmetric.
## [1] TRUE
Order the matrix using poporder. We will also add NA on the upper left side of the matrix.
Now we have to convert the matrix to a data frame to plot it with ggplot.
## Var1 Var2 value
## SOC : 40 SOC : 40 Min. :-0.0006
## SEV : 40 SEV : 40 1st Qu.: 0.0660
## GES : 40 GES : 40 Median : 0.0936
## KER : 40 KER : 40 Mean : 0.0999
## KRA : 40 KRA : 40 3rd Qu.: 0.1258
## TIK : 40 TIK : 40 Max. : 0.2837
## (Other):1360 (Other):1360 NA's :820
Now lets plot the data with ggplot. You can click in the little square on the top left of the plot to open it on a new window. It will have the right proportions.
pairfst.f <- ggplot(pairfst.long, aes(Var1, Var2)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(
low = "white",
high = "#71b6ff",
name = "Fst",
na.value = "white",
limits = c(0, 0.5)
) +
scale_x_discrete(position = "top") +
theme_bw() +
geom_text(aes(label = ifelse(
is.na(value), "", formatC(value, digits = 2, format = "f")
)), size = 2) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.y = element_text(hjust = 0)
)
pairfst.f
Save it
# Step 1: Map abbreviation to country
abbreviation_to_country <- sampling_loc %>% dplyr::select(Abbreviation, Country)
# Step 2: Calculate mean Fst for each pair of countries
# Convert the matrix to a data frame and add row names as a new column
fst_df <- as.data.frame(as.matrix(LD2_df))
fst_df$Abbreviation1 <- rownames(fst_df)
# Gather columns into rows
fst_long <- fst_df %>% gather(key = "Abbreviation2", value = "Fst", -Abbreviation1)
# Merge with country mapping
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation1", by.y = "Abbreviation")
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation2", by.y = "Abbreviation", suffixes = c("_1", "_2"))
# Calculate mean Fst for each pair of countries
fst_summary <- fst_long %>%
group_by(Country_1, Country_2) %>%
summarize(Mean_Fst = mean(Fst, na.rm = TRUE), .groups = 'drop') %>%
filter(Country_1 != Country_2)
# Convert summary back to a matrix form, avoiding the use of tibbles for row names
fst_matrix_summary <- as.data.frame(spread(fst_summary, key = Country_2, value = Mean_Fst))
rownames(fst_matrix_summary) <- fst_matrix_summary$Country_1
fst_matrix_summary <- fst_matrix_summary[, -1]
fst_matrix_summary <- as.matrix(fst_matrix_summary)
# Make the matrix symmetric by averaging the off-diagonal elements
symmetric_fst_matrix <- matrix(nrow = nrow(fst_matrix_summary), ncol = ncol(fst_matrix_summary))
rownames(symmetric_fst_matrix) <- rownames(fst_matrix_summary)
colnames(symmetric_fst_matrix) <- colnames(fst_matrix_summary)
for(i in 1:nrow(fst_matrix_summary)) {
for(j in i:nrow(fst_matrix_summary)) {
if (i == j) {
symmetric_fst_matrix[i, j] <- fst_matrix_summary[i, j]
} else {
avg_value <- mean(c(fst_matrix_summary[i, j], fst_matrix_summary[j, i]), na.rm = TRUE)
symmetric_fst_matrix[i, j] <- avg_value
symmetric_fst_matrix[j, i] <- avg_value
}
}
}
# Check if the matrix is symmetric
# print(isSymmetric(symmetric_fst_matrix))
# Your symmetric Fst matrix by country is now in symmetric_fst_matrix
print(symmetric_fst_matrix)
## Albania Armenia Bulgaria Croatia France Georgia
## Albania NA 0.09321744 0.10071586 0.02902128 0.08171049 0.13526830
## Armenia 0.09321744 NA 0.09453274 0.08402086 0.06488566 0.05626477
## Bulgaria 0.10071586 0.09453274 NA 0.08659803 0.07492555 0.13442128
## Croatia 0.02902128 0.08402086 0.08659803 NA 0.06325525 0.12130010
## France 0.08171049 0.06488566 0.07492555 0.06325525 NA 0.10963229
## Georgia 0.13526830 0.05626477 0.13442128 0.12130010 0.10963229 NA
## Greece 0.06256676 0.08098370 0.09271700 0.05530008 0.07048226 0.11871943
## Italy 0.10031787 0.08125198 0.09058315 0.08446013 0.07069720 0.13772888
## Malta 0.06943936 0.06208794 0.05647669 0.05515396 0.03900198 0.10187843
## Portugal 0.07568869 0.05432709 0.06598036 0.06468746 0.04262535 0.10568851
## Romania 0.11177139 0.09437558 0.10276302 0.10008786 0.07513883 0.13988343
## Russia 0.13048892 0.04597386 0.12935100 0.11613403 0.10534007 0.02204067
## Serbia 0.20037488 0.18791125 0.19385358 0.17206143 0.16830584 0.23190916
## Slovenia 0.06605789 0.05327576 0.06528185 0.05737246 0.04394329 0.09786065
## Spain 0.10936323 0.08179381 0.10542292 0.09378821 0.07178449 0.13548354
## Turkey 0.07977018 0.07764824 0.07567729 0.07033617 0.06034639 0.11848206
## Ukraine 0.12284139 0.05820559 0.12083319 0.10949639 0.09577193 0.06191042
## Greece Italy Malta Portugal Romania Russia
## Albania 0.06256676 0.10031787 0.06943936 0.07568869 0.11177139 0.13048892
## Armenia 0.08098370 0.08125198 0.06208794 0.05432709 0.09437558 0.04597386
## Bulgaria 0.09271700 0.09058315 0.05647669 0.06598036 0.10276302 0.12935100
## Croatia 0.05530008 0.08446013 0.05515396 0.06468746 0.10008786 0.11613403
## France 0.07048226 0.07069720 0.03900198 0.04262535 0.07513883 0.10534007
## Georgia 0.11871943 0.13772888 0.10187843 0.10568851 0.13988343 0.02204067
## Greece NA 0.08950131 0.05519872 0.06808207 0.10204372 0.11265341
## Italy 0.08950131 NA 0.04627918 0.04934449 0.09222338 0.13368546
## Malta 0.05519872 0.04627918 NA 0.02989389 0.07421546 0.09726957
## Portugal 0.06808207 0.04934449 0.02989389 NA 0.07400939 0.10088585
## Romania 0.10204372 0.09222338 0.07421546 0.07400939 NA 0.13405182
## Russia 0.11265341 0.13368546 0.09726957 0.10088585 0.13405182 NA
## Serbia 0.18697367 0.18879959 0.16004224 0.16544015 0.20168208 0.22590842
## Slovenia 0.05673457 0.05219771 0.03919840 0.03612759 0.05977806 0.09144282
## Spain 0.09397200 0.09029722 0.07212531 0.06579523 0.10718238 0.12934461
## Turkey 0.07171085 0.07023434 0.04526383 0.05569789 0.09184709 0.11147059
## Ukraine 0.10485926 0.09974544 0.08771344 0.09057183 0.12625042 0.05832179
## Serbia Slovenia Spain Turkey Ukraine
## Albania 0.2003749 0.06605789 0.10936323 0.07977018 0.12284139
## Armenia 0.1879113 0.05327576 0.08179381 0.07764824 0.05820559
## Bulgaria 0.1938536 0.06528185 0.10542292 0.07567729 0.12083319
## Croatia 0.1720614 0.05737246 0.09378821 0.07033617 0.10949639
## France 0.1683058 0.04394329 0.07178449 0.06034639 0.09577193
## Georgia 0.2319092 0.09786065 0.13548354 0.11848206 0.06191042
## Greece 0.1869737 0.05673457 0.09397200 0.07171085 0.10485926
## Italy 0.1887996 0.05219771 0.09029722 0.07023434 0.09974544
## Malta 0.1600422 0.03919840 0.07212531 0.04526383 0.08771344
## Portugal 0.1654401 0.03612759 0.06579523 0.05569789 0.09057183
## Romania 0.2016821 0.05977806 0.10718238 0.09184709 0.12625042
## Russia 0.2259084 0.09144282 0.12934461 0.11147059 0.05832179
## Serbia NA 0.15188779 0.21540658 0.18401508 0.21523980
## Slovenia 0.1518878 NA 0.06515292 0.05208143 0.08333507
## Spain 0.2154066 0.06515292 NA 0.07099299 0.12195514
## Turkey 0.1840151 0.05208143 0.07099299 NA 0.10371389
## Ukraine 0.2152398 0.08333507 0.12195514 0.10371389 NA
## Albania Armenia Bulgaria Croatia France Georgia
## Albania NA 0.09321744 0.10071586 0.02902128 0.08171049 0.13526830
## Armenia NA NA 0.09453274 0.08402086 0.06488566 0.05626477
## Bulgaria NA NA NA 0.08659803 0.07492555 0.13442128
## Croatia NA NA NA NA 0.06325525 0.12130010
## France NA NA NA NA NA 0.10963229
## Georgia NA NA NA NA NA NA
## Greece NA NA NA NA NA NA
## Italy NA NA NA NA NA NA
## Malta NA NA NA NA NA NA
## Portugal NA NA NA NA NA NA
## Romania NA NA NA NA NA NA
## Russia NA NA NA NA NA NA
## Serbia NA NA NA NA NA NA
## Slovenia NA NA NA NA NA NA
## Spain NA NA NA NA NA NA
## Turkey NA NA NA NA NA NA
## Ukraine NA NA NA NA NA NA
## Greece Italy Malta Portugal Romania Russia
## Albania 0.06256676 0.10031787 0.06943936 0.07568869 0.11177139 0.13048892
## Armenia 0.08098370 0.08125198 0.06208794 0.05432709 0.09437558 0.04597386
## Bulgaria 0.09271700 0.09058315 0.05647669 0.06598036 0.10276302 0.12935100
## Croatia 0.05530008 0.08446013 0.05515396 0.06468746 0.10008786 0.11613403
## France 0.07048226 0.07069720 0.03900198 0.04262535 0.07513883 0.10534007
## Georgia 0.11871943 0.13772888 0.10187843 0.10568851 0.13988343 0.02204067
## Greece NA 0.08950131 0.05519872 0.06808207 0.10204372 0.11265341
## Italy NA NA 0.04627918 0.04934449 0.09222338 0.13368546
## Malta NA NA NA 0.02989389 0.07421546 0.09726957
## Portugal NA NA NA NA 0.07400939 0.10088585
## Romania NA NA NA NA NA 0.13405182
## Russia NA NA NA NA NA NA
## Serbia NA NA NA NA NA NA
## Slovenia NA NA NA NA NA NA
## Spain NA NA NA NA NA NA
## Turkey NA NA NA NA NA NA
## Ukraine NA NA NA NA NA NA
## Serbia Slovenia Spain Turkey Ukraine
## Albania 0.2003749 0.06605789 0.10936323 0.07977018 0.12284139
## Armenia 0.1879113 0.05327576 0.08179381 0.07764824 0.05820559
## Bulgaria 0.1938536 0.06528185 0.10542292 0.07567729 0.12083319
## Croatia 0.1720614 0.05737246 0.09378821 0.07033617 0.10949639
## France 0.1683058 0.04394329 0.07178449 0.06034639 0.09577193
## Georgia 0.2319092 0.09786065 0.13548354 0.11848206 0.06191042
## Greece 0.1869737 0.05673457 0.09397200 0.07171085 0.10485926
## Italy 0.1887996 0.05219771 0.09029722 0.07023434 0.09974544
## Malta 0.1600422 0.03919840 0.07212531 0.04526383 0.08771344
## Portugal 0.1654401 0.03612759 0.06579523 0.05569789 0.09057183
## Romania 0.2016821 0.05977806 0.10718238 0.09184709 0.12625042
## Russia 0.2259084 0.09144282 0.12934461 0.11147059 0.05832179
## Serbia NA 0.15188779 0.21540658 0.18401508 0.21523980
## Slovenia NA NA 0.06515292 0.05208143 0.08333507
## Spain NA NA NA 0.07099299 0.12195514
## Turkey NA NA NA NA 0.10371389
## Ukraine NA NA NA NA NA
Now we have to convert the matrix to a data frame to plot it with ggplot.
## Var1 Var2 value
## Albania : 17 Albania : 17 Min. :0.02204
## Armenia : 17 Armenia : 17 1st Qu.:0.06509
## Bulgaria: 17 Bulgaria: 17 Median :0.09043
## Croatia : 17 Croatia : 17 Mean :0.09624
## France : 17 France : 17 3rd Qu.:0.11352
## Georgia : 17 Georgia : 17 Max. :0.23191
## (Other) :187 (Other) :187 NA's :153
You can click in the little square on the top left of the plot to open it on a new window. It will have the right proportions.
pairfst.f2 <- ggplot(pairfst.long2, aes(Var1, Var2)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(
low = "white",
high = "#71b6ff",
name = "Fst",
na.value = "white",
limits = c(0, 0.5)
) +
scale_x_discrete(position = "top") +
theme_bw() +
geom_text(aes(label = ifelse(
is.na(value), "", formatC(value, digits = 2, format = "f")
)), size = 2) +
theme(
axis.text.x = element_text(angle = 90, hjust = 0),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.y = element_text(hjust = 1)
)
pairfst.f2
ggsave(
filename = here("output", "europe", "fst", "r_01", "fst_matrix_europe_r01_LD2_by_country.pdf"),
pairfst.f2,
width = 6,
height = 5,
units = "in"
)
Remove NAs and rename columns
# remove NAs
fst2 <-
pairfst.long |>
drop_na()
# rename columns
fst2 <-
fst2 |>
dplyr::rename(pop1 = 1,
pop2 = 2,
fst = 3)
# Split the data into two data frames, one for pop1 and one for pop2
df_pop1 <- fst2 |>
dplyr::select(pop = pop1, fst)
df_pop2 <- fst2 |>
dplyr::select(pop = pop2, fst)
# Combine the two data frames
df_combined <- bind_rows(df_pop1, df_pop2)
# Calculate the mean fst for each population
mean_fst <- df_combined |>
group_by(pop) |>
summarise(mean_fst = mean(fst))
print(mean_fst)
## # A tibble: 40 × 2
## pop mean_fst
## <fct> <dbl>
## 1 SOC 0.108
## 2 SEV 0.130
## 3 GES 0.114
## 4 KER 0.0943
## 5 KRA 0.0848
## 6 TIK 0.129
## 7 RAR 0.116
## 8 TRE 0.0636
## 9 ALU 0.0964
## 10 STS 0.0787
## # ℹ 30 more rows
Merge
fst3 <-
sampling_loc |>
left_join(
mean_fst,
by = c("Abbreviation" = "pop")
) |>
drop_na() |>
dplyr::select(
-Region
)
# check output
head(fst3)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## Subregion order mean_fst
## 1 West Europe 1 0.07499194
## 2 West Europe 2 0.07874931
## 3 West Europe 3 0.07119761
## 4 West Europe 5 0.09003042
## 5 West Europe 6 0.12147228
## 6 West Europe 7 0.09952012
Mean by region
# Group by Region and calculate the mean_fst by Region
region_means <- fst3 |>
group_by(Subregion) |>
summarize(mean_fst_by_region = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_region column to the fst3 tibble
fst3 <- fst3 |>
left_join(region_means, by = "Subregion")
# Print the modified fst3 tibble
print(fst3)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## 7 Barcelona Spain 41.38510 2.173400 Europe BAR 2018
## 8 Magaluf Spain 39.50679 2.530729 Europe SPM 2017
## 9 Imperia Italy 43.87159 8.003559 Europe IMP 2017
## 10 Brescia Italy 45.53373 10.204450 Europe BRE 1995
## 11 Desenzano Italy 45.46289 10.549140 Europe DES 1995
## 12 Trentino Italy 46.05917 11.117220 Europe TRE 2020
## 13 Bologna Italy 44.48478 11.366584 Europe ITB 2017
## 14 Cesena Italy 44.15287 12.244265 Europe CES 1995
## 15 Rome (Sapienza) Italy 41.90215 12.517399 Europe ROM 2017
## 16 Rome (Trappola) Italy 41.90215 12.517399 Europe ITR 2013
## 17 Sicilia Italy 38.23294 15.550877 Europe SIC 2016
## 18 Puglia Italy 41.12213 16.844107 Europe ITP 2016
## 19 Luqa Malta 35.86053 14.487028 Europe MAL 2019
## 20 Ajdovscina Slovenia 45.88715 13.770997 Europe SLO 2017
## 21 Dubrovnik Croatia 42.60654 18.226612 Europe CRO 2017
## 22 Vlore Albania 40.46600 19.489700 Europe ALV 2020
## 23 Durres Albania 41.29704 19.503734 Europe ALD 2018
## 24 Tirana Albania 41.31473 19.831716 Europe TIR 2017
## 25 Novi Sad Serbia 45.25887 19.818778 Europe SER 2019
## 26 Athens Greece 37.93719 23.946883 Europe GRA 2019
## 27 Chania Greece 35.51448 24.017960 Europe GRC 2019
## 28 Satu Mare Romania 47.79147 22.890202 Europe ROS 2020
## 29 Lom Bulgaria 43.80489 23.236340 Europe BUL 2019
## 30 Aliaga Turkey 38.76390 26.944800 Europe TUA 2019
## 31 Hopa Turkey 41.38760 41.437800 Europe TUH 2019
## 32 Sevastopol, Crimea Ukraine 44.54125 33.514005 Europe SEV 2021
## 33 Alushta Ukraine 44.68289 34.403681 Europe ALU 2021
## 34 Kerch, Crimea Ukraine 45.35246 36.470150 Europe KER 2021
## 35 Krasnodar Russia 44.95504 39.027817 Europe KRA 2017
## 36 Sochi Russia 43.60042 39.745328 Europe SOC 2021
## 37 Tikhoretsk Russia 45.85460 40.125600 Europe TIK 2021
## 38 Armavir Russia 44.96034 41.133057 Europe RAR 2021
## 39 Sakhumi, Abkhazia Georgia 43.07851 40.887588 Europe GES 2021
## 40 Ijevan Armenia 40.87971 45.147640 Europe ARM 2020
## Subregion order mean_fst mean_fst_by_region
## 1 West Europe 1 0.07499194 0.1
## 2 West Europe 2 0.07874931 0.1
## 3 West Europe 3 0.07119761 0.1
## 4 West Europe 5 0.09003042 0.1
## 5 West Europe 6 0.12147228 0.1
## 6 West Europe 7 0.09952012 0.1
## 7 West Europe 8 0.11429940 0.1
## 8 West Europe 9 0.08075756 0.1
## 9 West Europe 10 0.09191574 0.1
## 10 West Europe 12 0.14543917 0.1
## 11 West Europe 13 0.13083838 0.1
## 12 West Europe 14 0.06355925 0.1
## 13 West Europe 15 0.10359028 0.1
## 14 West Europe 16 0.17703688 0.1
## 15 West Europe 17 0.07997899 0.1
## 16 West Europe 18 0.07079655 0.1
## 17 West Europe 19 0.08944254 0.1
## 18 West Europe 20 0.07208743 0.1
## 19 West Europe 21 0.06876414 0.1
## 20 East Europe 22 0.06740086 0.1
## 21 East Europe 23 0.08722078 0.1
## 22 East Europe 24 0.09096560 0.1
## 23 East Europe 25 0.11256707 0.1
## 24 East Europe 26 0.09903932 0.1
## 25 East Europe 27 0.20214043 0.1
## 26 East Europe 28 0.08828169 0.1
## 27 East Europe 29 0.09295555 0.1
## 28 East Europe 30 0.10766099 0.1
## 29 East Europe 31 0.10181071 0.1
## 30 East Europe 32 0.09222311 0.1
## 31 East Europe 33 0.07798655 0.1
## 32 East Europe 34 0.12999533 0.1
## 33 East Europe 35 0.09637019 0.1
## 34 East Europe 36 0.09431187 0.1
## 35 East Europe 37 0.08484293 0.1
## 36 East Europe 38 0.10786279 0.1
## 37 East Europe 39 0.12901506 0.1
## 38 East Europe 40 0.11630266 0.1
## 39 East Europe 41 0.11403042 0.1
## 40 East Europe 42 0.07943892 0.1
Mean By country
# Group by Country and calculate the mean_fst by Country
country_means <- fst3 |>
group_by(Country) |>
summarize(mean_fst_by_country = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_country column to the fst3 tibble
fst3 <- fst3 |>
left_join(country_means, by = "Country")
# Print the modified fst3 tibble
print(fst3)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## 7 Barcelona Spain 41.38510 2.173400 Europe BAR 2018
## 8 Magaluf Spain 39.50679 2.530729 Europe SPM 2017
## 9 Imperia Italy 43.87159 8.003559 Europe IMP 2017
## 10 Brescia Italy 45.53373 10.204450 Europe BRE 1995
## 11 Desenzano Italy 45.46289 10.549140 Europe DES 1995
## 12 Trentino Italy 46.05917 11.117220 Europe TRE 2020
## 13 Bologna Italy 44.48478 11.366584 Europe ITB 2017
## 14 Cesena Italy 44.15287 12.244265 Europe CES 1995
## 15 Rome (Sapienza) Italy 41.90215 12.517399 Europe ROM 2017
## 16 Rome (Trappola) Italy 41.90215 12.517399 Europe ITR 2013
## 17 Sicilia Italy 38.23294 15.550877 Europe SIC 2016
## 18 Puglia Italy 41.12213 16.844107 Europe ITP 2016
## 19 Luqa Malta 35.86053 14.487028 Europe MAL 2019
## 20 Ajdovscina Slovenia 45.88715 13.770997 Europe SLO 2017
## 21 Dubrovnik Croatia 42.60654 18.226612 Europe CRO 2017
## 22 Vlore Albania 40.46600 19.489700 Europe ALV 2020
## 23 Durres Albania 41.29704 19.503734 Europe ALD 2018
## 24 Tirana Albania 41.31473 19.831716 Europe TIR 2017
## 25 Novi Sad Serbia 45.25887 19.818778 Europe SER 2019
## 26 Athens Greece 37.93719 23.946883 Europe GRA 2019
## 27 Chania Greece 35.51448 24.017960 Europe GRC 2019
## 28 Satu Mare Romania 47.79147 22.890202 Europe ROS 2020
## 29 Lom Bulgaria 43.80489 23.236340 Europe BUL 2019
## 30 Aliaga Turkey 38.76390 26.944800 Europe TUA 2019
## 31 Hopa Turkey 41.38760 41.437800 Europe TUH 2019
## 32 Sevastopol, Crimea Ukraine 44.54125 33.514005 Europe SEV 2021
## 33 Alushta Ukraine 44.68289 34.403681 Europe ALU 2021
## 34 Kerch, Crimea Ukraine 45.35246 36.470150 Europe KER 2021
## 35 Krasnodar Russia 44.95504 39.027817 Europe KRA 2017
## 36 Sochi Russia 43.60042 39.745328 Europe SOC 2021
## 37 Tikhoretsk Russia 45.85460 40.125600 Europe TIK 2021
## 38 Armavir Russia 44.96034 41.133057 Europe RAR 2021
## 39 Sakhumi, Abkhazia Georgia 43.07851 40.887588 Europe GES 2021
## 40 Ijevan Armenia 40.87971 45.147640 Europe ARM 2020
## Subregion order mean_fst mean_fst_by_region mean_fst_by_country
## 1 West Europe 1 0.07499194 0.1 0.08
## 2 West Europe 2 0.07874931 0.1 0.08
## 3 West Europe 3 0.07119761 0.1 0.07
## 4 West Europe 5 0.09003042 0.1 0.10
## 5 West Europe 6 0.12147228 0.1 0.10
## 6 West Europe 7 0.09952012 0.1 0.10
## 7 West Europe 8 0.11429940 0.1 0.10
## 8 West Europe 9 0.08075756 0.1 0.10
## 9 West Europe 10 0.09191574 0.1 0.10
## 10 West Europe 12 0.14543917 0.1 0.10
## 11 West Europe 13 0.13083838 0.1 0.10
## 12 West Europe 14 0.06355925 0.1 0.10
## 13 West Europe 15 0.10359028 0.1 0.10
## 14 West Europe 16 0.17703688 0.1 0.10
## 15 West Europe 17 0.07997899 0.1 0.10
## 16 West Europe 18 0.07079655 0.1 0.10
## 17 West Europe 19 0.08944254 0.1 0.10
## 18 West Europe 20 0.07208743 0.1 0.10
## 19 West Europe 21 0.06876414 0.1 0.07
## 20 East Europe 22 0.06740086 0.1 0.07
## 21 East Europe 23 0.08722078 0.1 0.09
## 22 East Europe 24 0.09096560 0.1 0.10
## 23 East Europe 25 0.11256707 0.1 0.10
## 24 East Europe 26 0.09903932 0.1 0.10
## 25 East Europe 27 0.20214043 0.1 0.20
## 26 East Europe 28 0.08828169 0.1 0.09
## 27 East Europe 29 0.09295555 0.1 0.09
## 28 East Europe 30 0.10766099 0.1 0.11
## 29 East Europe 31 0.10181071 0.1 0.10
## 30 East Europe 32 0.09222311 0.1 0.09
## 31 East Europe 33 0.07798655 0.1 0.09
## 32 East Europe 34 0.12999533 0.1 0.11
## 33 East Europe 35 0.09637019 0.1 0.11
## 34 East Europe 36 0.09431187 0.1 0.11
## 35 East Europe 37 0.08484293 0.1 0.11
## 36 East Europe 38 0.10786279 0.1 0.11
## 37 East Europe 39 0.12901506 0.1 0.11
## 38 East Europe 40 0.11630266 0.1 0.11
## 39 East Europe 41 0.11403042 0.1 0.11
## 40 East Europe 42 0.07943892 0.1 0.08
Mean by latitude
# Add a new column to indicate whether the latitude is above or below 30N
fst3 <- fst3 |>
mutate(Latitude_group = ifelse(Latitude >= 40, "Above 40N", "Below 40N"))
# Summarize the data by Latitude_group and calculate the mean_fst
summary_by_latitude <- fst3 |>
group_by(Latitude_group) |>
summarize(mean_fst_by_latitude = mean(mean_fst, na.rm = TRUE)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_latitude column to the fst3 tibble
fst3 <- fst3 |>
left_join(summary_by_latitude, by = "Latitude_group")
# Rename columns
fst3 <- fst3 |>
dplyr::rename(
City = Pop_City)
# Print the modified fst3 tibble
print(fst3)
## City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## 7 Barcelona Spain 41.38510 2.173400 Europe BAR 2018
## 8 Magaluf Spain 39.50679 2.530729 Europe SPM 2017
## 9 Imperia Italy 43.87159 8.003559 Europe IMP 2017
## 10 Brescia Italy 45.53373 10.204450 Europe BRE 1995
## 11 Desenzano Italy 45.46289 10.549140 Europe DES 1995
## 12 Trentino Italy 46.05917 11.117220 Europe TRE 2020
## 13 Bologna Italy 44.48478 11.366584 Europe ITB 2017
## 14 Cesena Italy 44.15287 12.244265 Europe CES 1995
## 15 Rome (Sapienza) Italy 41.90215 12.517399 Europe ROM 2017
## 16 Rome (Trappola) Italy 41.90215 12.517399 Europe ITR 2013
## 17 Sicilia Italy 38.23294 15.550877 Europe SIC 2016
## 18 Puglia Italy 41.12213 16.844107 Europe ITP 2016
## 19 Luqa Malta 35.86053 14.487028 Europe MAL 2019
## 20 Ajdovscina Slovenia 45.88715 13.770997 Europe SLO 2017
## 21 Dubrovnik Croatia 42.60654 18.226612 Europe CRO 2017
## 22 Vlore Albania 40.46600 19.489700 Europe ALV 2020
## 23 Durres Albania 41.29704 19.503734 Europe ALD 2018
## 24 Tirana Albania 41.31473 19.831716 Europe TIR 2017
## 25 Novi Sad Serbia 45.25887 19.818778 Europe SER 2019
## 26 Athens Greece 37.93719 23.946883 Europe GRA 2019
## 27 Chania Greece 35.51448 24.017960 Europe GRC 2019
## 28 Satu Mare Romania 47.79147 22.890202 Europe ROS 2020
## 29 Lom Bulgaria 43.80489 23.236340 Europe BUL 2019
## 30 Aliaga Turkey 38.76390 26.944800 Europe TUA 2019
## 31 Hopa Turkey 41.38760 41.437800 Europe TUH 2019
## 32 Sevastopol, Crimea Ukraine 44.54125 33.514005 Europe SEV 2021
## 33 Alushta Ukraine 44.68289 34.403681 Europe ALU 2021
## 34 Kerch, Crimea Ukraine 45.35246 36.470150 Europe KER 2021
## 35 Krasnodar Russia 44.95504 39.027817 Europe KRA 2017
## 36 Sochi Russia 43.60042 39.745328 Europe SOC 2021
## 37 Tikhoretsk Russia 45.85460 40.125600 Europe TIK 2021
## 38 Armavir Russia 44.96034 41.133057 Europe RAR 2021
## 39 Sakhumi, Abkhazia Georgia 43.07851 40.887588 Europe GES 2021
## 40 Ijevan Armenia 40.87971 45.147640 Europe ARM 2020
## Subregion order mean_fst mean_fst_by_region mean_fst_by_country
## 1 West Europe 1 0.07499194 0.1 0.08
## 2 West Europe 2 0.07874931 0.1 0.08
## 3 West Europe 3 0.07119761 0.1 0.07
## 4 West Europe 5 0.09003042 0.1 0.10
## 5 West Europe 6 0.12147228 0.1 0.10
## 6 West Europe 7 0.09952012 0.1 0.10
## 7 West Europe 8 0.11429940 0.1 0.10
## 8 West Europe 9 0.08075756 0.1 0.10
## 9 West Europe 10 0.09191574 0.1 0.10
## 10 West Europe 12 0.14543917 0.1 0.10
## 11 West Europe 13 0.13083838 0.1 0.10
## 12 West Europe 14 0.06355925 0.1 0.10
## 13 West Europe 15 0.10359028 0.1 0.10
## 14 West Europe 16 0.17703688 0.1 0.10
## 15 West Europe 17 0.07997899 0.1 0.10
## 16 West Europe 18 0.07079655 0.1 0.10
## 17 West Europe 19 0.08944254 0.1 0.10
## 18 West Europe 20 0.07208743 0.1 0.10
## 19 West Europe 21 0.06876414 0.1 0.07
## 20 East Europe 22 0.06740086 0.1 0.07
## 21 East Europe 23 0.08722078 0.1 0.09
## 22 East Europe 24 0.09096560 0.1 0.10
## 23 East Europe 25 0.11256707 0.1 0.10
## 24 East Europe 26 0.09903932 0.1 0.10
## 25 East Europe 27 0.20214043 0.1 0.20
## 26 East Europe 28 0.08828169 0.1 0.09
## 27 East Europe 29 0.09295555 0.1 0.09
## 28 East Europe 30 0.10766099 0.1 0.11
## 29 East Europe 31 0.10181071 0.1 0.10
## 30 East Europe 32 0.09222311 0.1 0.09
## 31 East Europe 33 0.07798655 0.1 0.09
## 32 East Europe 34 0.12999533 0.1 0.11
## 33 East Europe 35 0.09637019 0.1 0.11
## 34 East Europe 36 0.09431187 0.1 0.11
## 35 East Europe 37 0.08484293 0.1 0.11
## 36 East Europe 38 0.10786279 0.1 0.11
## 37 East Europe 39 0.12901506 0.1 0.11
## 38 East Europe 40 0.11630266 0.1 0.11
## 39 East Europe 41 0.11403042 0.1 0.11
## 40 East Europe 42 0.07943892 0.1 0.08
## Latitude_group mean_fst_by_latitude
## 1 Above 40N 0.10236914
## 2 Above 40N 0.10236914
## 3 Above 40N 0.10236914
## 4 Below 40N 0.09149416
## 5 Below 40N 0.09149416
## 6 Below 40N 0.09149416
## 7 Above 40N 0.10236914
## 8 Below 40N 0.09149416
## 9 Above 40N 0.10236914
## 10 Above 40N 0.10236914
## 11 Above 40N 0.10236914
## 12 Above 40N 0.10236914
## 13 Above 40N 0.10236914
## 14 Above 40N 0.10236914
## 15 Above 40N 0.10236914
## 16 Above 40N 0.10236914
## 17 Below 40N 0.09149416
## 18 Above 40N 0.10236914
## 19 Below 40N 0.09149416
## 20 Above 40N 0.10236914
## 21 Above 40N 0.10236914
## 22 Above 40N 0.10236914
## 23 Above 40N 0.10236914
## 24 Above 40N 0.10236914
## 25 Above 40N 0.10236914
## 26 Below 40N 0.09149416
## 27 Below 40N 0.09149416
## 28 Above 40N 0.10236914
## 29 Above 40N 0.10236914
## 30 Below 40N 0.09149416
## 31 Above 40N 0.10236914
## 32 Above 40N 0.10236914
## 33 Above 40N 0.10236914
## 34 Above 40N 0.10236914
## 35 Above 40N 0.10236914
## 36 Above 40N 0.10236914
## 37 Above 40N 0.10236914
## 38 Above 40N 0.10236914
## 39 Above 40N 0.10236914
## 40 Above 40N 0.10236914
Mean By continent
# Group by Country and calculate the mean_fst by Country
country_means <- fst3 |>
group_by(Continent) |>
summarize(mean_fst_by_continent = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_country column to the fst3 tibble
fst3 <- fst3 |>
left_join(country_means, by = "Continent")
# Print the modified fst3 tibble
print(fst3)
## City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## 7 Barcelona Spain 41.38510 2.173400 Europe BAR 2018
## 8 Magaluf Spain 39.50679 2.530729 Europe SPM 2017
## 9 Imperia Italy 43.87159 8.003559 Europe IMP 2017
## 10 Brescia Italy 45.53373 10.204450 Europe BRE 1995
## 11 Desenzano Italy 45.46289 10.549140 Europe DES 1995
## 12 Trentino Italy 46.05917 11.117220 Europe TRE 2020
## 13 Bologna Italy 44.48478 11.366584 Europe ITB 2017
## 14 Cesena Italy 44.15287 12.244265 Europe CES 1995
## 15 Rome (Sapienza) Italy 41.90215 12.517399 Europe ROM 2017
## 16 Rome (Trappola) Italy 41.90215 12.517399 Europe ITR 2013
## 17 Sicilia Italy 38.23294 15.550877 Europe SIC 2016
## 18 Puglia Italy 41.12213 16.844107 Europe ITP 2016
## 19 Luqa Malta 35.86053 14.487028 Europe MAL 2019
## 20 Ajdovscina Slovenia 45.88715 13.770997 Europe SLO 2017
## 21 Dubrovnik Croatia 42.60654 18.226612 Europe CRO 2017
## 22 Vlore Albania 40.46600 19.489700 Europe ALV 2020
## 23 Durres Albania 41.29704 19.503734 Europe ALD 2018
## 24 Tirana Albania 41.31473 19.831716 Europe TIR 2017
## 25 Novi Sad Serbia 45.25887 19.818778 Europe SER 2019
## 26 Athens Greece 37.93719 23.946883 Europe GRA 2019
## 27 Chania Greece 35.51448 24.017960 Europe GRC 2019
## 28 Satu Mare Romania 47.79147 22.890202 Europe ROS 2020
## 29 Lom Bulgaria 43.80489 23.236340 Europe BUL 2019
## 30 Aliaga Turkey 38.76390 26.944800 Europe TUA 2019
## 31 Hopa Turkey 41.38760 41.437800 Europe TUH 2019
## 32 Sevastopol, Crimea Ukraine 44.54125 33.514005 Europe SEV 2021
## 33 Alushta Ukraine 44.68289 34.403681 Europe ALU 2021
## 34 Kerch, Crimea Ukraine 45.35246 36.470150 Europe KER 2021
## 35 Krasnodar Russia 44.95504 39.027817 Europe KRA 2017
## 36 Sochi Russia 43.60042 39.745328 Europe SOC 2021
## 37 Tikhoretsk Russia 45.85460 40.125600 Europe TIK 2021
## 38 Armavir Russia 44.96034 41.133057 Europe RAR 2021
## 39 Sakhumi, Abkhazia Georgia 43.07851 40.887588 Europe GES 2021
## 40 Ijevan Armenia 40.87971 45.147640 Europe ARM 2020
## Subregion order mean_fst mean_fst_by_region mean_fst_by_country
## 1 West Europe 1 0.07499194 0.1 0.08
## 2 West Europe 2 0.07874931 0.1 0.08
## 3 West Europe 3 0.07119761 0.1 0.07
## 4 West Europe 5 0.09003042 0.1 0.10
## 5 West Europe 6 0.12147228 0.1 0.10
## 6 West Europe 7 0.09952012 0.1 0.10
## 7 West Europe 8 0.11429940 0.1 0.10
## 8 West Europe 9 0.08075756 0.1 0.10
## 9 West Europe 10 0.09191574 0.1 0.10
## 10 West Europe 12 0.14543917 0.1 0.10
## 11 West Europe 13 0.13083838 0.1 0.10
## 12 West Europe 14 0.06355925 0.1 0.10
## 13 West Europe 15 0.10359028 0.1 0.10
## 14 West Europe 16 0.17703688 0.1 0.10
## 15 West Europe 17 0.07997899 0.1 0.10
## 16 West Europe 18 0.07079655 0.1 0.10
## 17 West Europe 19 0.08944254 0.1 0.10
## 18 West Europe 20 0.07208743 0.1 0.10
## 19 West Europe 21 0.06876414 0.1 0.07
## 20 East Europe 22 0.06740086 0.1 0.07
## 21 East Europe 23 0.08722078 0.1 0.09
## 22 East Europe 24 0.09096560 0.1 0.10
## 23 East Europe 25 0.11256707 0.1 0.10
## 24 East Europe 26 0.09903932 0.1 0.10
## 25 East Europe 27 0.20214043 0.1 0.20
## 26 East Europe 28 0.08828169 0.1 0.09
## 27 East Europe 29 0.09295555 0.1 0.09
## 28 East Europe 30 0.10766099 0.1 0.11
## 29 East Europe 31 0.10181071 0.1 0.10
## 30 East Europe 32 0.09222311 0.1 0.09
## 31 East Europe 33 0.07798655 0.1 0.09
## 32 East Europe 34 0.12999533 0.1 0.11
## 33 East Europe 35 0.09637019 0.1 0.11
## 34 East Europe 36 0.09431187 0.1 0.11
## 35 East Europe 37 0.08484293 0.1 0.11
## 36 East Europe 38 0.10786279 0.1 0.11
## 37 East Europe 39 0.12901506 0.1 0.11
## 38 East Europe 40 0.11630266 0.1 0.11
## 39 East Europe 41 0.11403042 0.1 0.11
## 40 East Europe 42 0.07943892 0.1 0.08
## Latitude_group mean_fst_by_latitude mean_fst_by_continent
## 1 Above 40N 0.10236914 0.1
## 2 Above 40N 0.10236914 0.1
## 3 Above 40N 0.10236914 0.1
## 4 Below 40N 0.09149416 0.1
## 5 Below 40N 0.09149416 0.1
## 6 Below 40N 0.09149416 0.1
## 7 Above 40N 0.10236914 0.1
## 8 Below 40N 0.09149416 0.1
## 9 Above 40N 0.10236914 0.1
## 10 Above 40N 0.10236914 0.1
## 11 Above 40N 0.10236914 0.1
## 12 Above 40N 0.10236914 0.1
## 13 Above 40N 0.10236914 0.1
## 14 Above 40N 0.10236914 0.1
## 15 Above 40N 0.10236914 0.1
## 16 Above 40N 0.10236914 0.1
## 17 Below 40N 0.09149416 0.1
## 18 Above 40N 0.10236914 0.1
## 19 Below 40N 0.09149416 0.1
## 20 Above 40N 0.10236914 0.1
## 21 Above 40N 0.10236914 0.1
## 22 Above 40N 0.10236914 0.1
## 23 Above 40N 0.10236914 0.1
## 24 Above 40N 0.10236914 0.1
## 25 Above 40N 0.10236914 0.1
## 26 Below 40N 0.09149416 0.1
## 27 Below 40N 0.09149416 0.1
## 28 Above 40N 0.10236914 0.1
## 29 Above 40N 0.10236914 0.1
## 30 Below 40N 0.09149416 0.1
## 31 Above 40N 0.10236914 0.1
## 32 Above 40N 0.10236914 0.1
## 33 Above 40N 0.10236914 0.1
## 34 Above 40N 0.10236914 0.1
## 35 Above 40N 0.10236914 0.1
## 36 Above 40N 0.10236914 0.1
## 37 Above 40N 0.10236914 0.1
## 38 Above 40N 0.10236914 0.1
## 39 Above 40N 0.10236914 0.1
## 40 Above 40N 0.10236914 0.1
fst4 <- fst3 |>
dplyr::select(
Latitude_group, mean_fst_by_latitude, Subregion, mean_fst_by_region, Country, mean_fst_by_country, City, Abbreviation, mean_fst,
)
fst4 <- fst4 |>
arrange(
Latitude_group, Subregion, Country, City
)
# Round
fst4 <- fst4 |>
mutate_if(is.numeric, ~ round(., 2))
head(fst4)
## Latitude_group mean_fst_by_latitude Subregion mean_fst_by_region Country
## 1 Above 40N 0.1 East Europe 0.1 Albania
## 2 Above 40N 0.1 East Europe 0.1 Albania
## 3 Above 40N 0.1 East Europe 0.1 Albania
## 4 Above 40N 0.1 East Europe 0.1 Armenia
## 5 Above 40N 0.1 East Europe 0.1 Bulgaria
## 6 Above 40N 0.1 East Europe 0.1 Croatia
## mean_fst_by_country City Abbreviation mean_fst
## 1 0.10 Durres ALD 0.11
## 2 0.10 Tirana TIR 0.10
## 3 0.10 Vlore ALV 0.09
## 4 0.08 Ijevan ARM 0.08
## 5 0.10 Lom BUL 0.10
## 6 0.09 Dubrovnik CRO 0.09
# Set theme if you want to use something different from the previous table
set_flextable_defaults(
font.family = "Arial",
font.size = 9,
big.mark = ",",
theme_fun = "theme_zebra" # try the themes: theme_alafoli(), theme_apa(), theme_booktabs(), theme_box(), theme_tron_legacy(), theme_tron(), theme_vader(), theme_vanilla(), theme_zebra()
)
# Then create the flextable object
flex_table <- flextable(fst4) |>
set_caption(caption = as_paragraph(
as_chunk(
"Table 1. Fst values for Europe using LD2 SNPs.",
props = fp_text_default(color = "#000000", font.size = 14)
)
),
fp_p = fp_par(text.align = "center", padding = 5))
# Print the flextable
flex_table
Latitude_group | mean_fst_by_latitude | Subregion | mean_fst_by_region | Country | mean_fst_by_country | City | Abbreviation | mean_fst |
---|---|---|---|---|---|---|---|---|
Above 40N | 0.10 | East Europe | 0.1 | Albania | 0.10 | Durres | ALD | 0.11 |
Above 40N | 0.10 | East Europe | 0.1 | Albania | 0.10 | Tirana | TIR | 0.10 |
Above 40N | 0.10 | East Europe | 0.1 | Albania | 0.10 | Vlore | ALV | 0.09 |
Above 40N | 0.10 | East Europe | 0.1 | Armenia | 0.08 | Ijevan | ARM | 0.08 |
Above 40N | 0.10 | East Europe | 0.1 | Bulgaria | 0.10 | Lom | BUL | 0.10 |
Above 40N | 0.10 | East Europe | 0.1 | Croatia | 0.09 | Dubrovnik | CRO | 0.09 |
Above 40N | 0.10 | East Europe | 0.1 | Georgia | 0.11 | Sakhumi, Abkhazia | GES | 0.11 |
Above 40N | 0.10 | East Europe | 0.1 | Romania | 0.11 | Satu Mare | ROS | 0.11 |
Above 40N | 0.10 | East Europe | 0.1 | Russia | 0.11 | Armavir | RAR | 0.12 |
Above 40N | 0.10 | East Europe | 0.1 | Russia | 0.11 | Krasnodar | KRA | 0.08 |
Above 40N | 0.10 | East Europe | 0.1 | Russia | 0.11 | Sochi | SOC | 0.11 |
Above 40N | 0.10 | East Europe | 0.1 | Russia | 0.11 | Tikhoretsk | TIK | 0.13 |
Above 40N | 0.10 | East Europe | 0.1 | Serbia | 0.20 | Novi Sad | SER | 0.20 |
Above 40N | 0.10 | East Europe | 0.1 | Slovenia | 0.07 | Ajdovscina | SLO | 0.07 |
Above 40N | 0.10 | East Europe | 0.1 | Turkey | 0.09 | Hopa | TUH | 0.08 |
Above 40N | 0.10 | East Europe | 0.1 | Ukraine | 0.11 | Alushta | ALU | 0.10 |
Above 40N | 0.10 | East Europe | 0.1 | Ukraine | 0.11 | Kerch, Crimea | KER | 0.09 |
Above 40N | 0.10 | East Europe | 0.1 | Ukraine | 0.11 | Sevastopol, Crimea | SEV | 0.13 |
Above 40N | 0.10 | West Europe | 0.1 | France | 0.08 | Saint-Martin-d'Heres | FRS | 0.07 |
Above 40N | 0.10 | West Europe | 0.1 | France | 0.08 | Strasbourg | STS | 0.08 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Bologna | ITB | 0.10 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Brescia | BRE | 0.15 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Cesena | CES | 0.18 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Desenzano | DES | 0.13 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Imperia | IMP | 0.09 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Puglia | ITP | 0.07 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Rome (Sapienza) | ROM | 0.08 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Rome (Trappola) | ITR | 0.07 |
Above 40N | 0.10 | West Europe | 0.1 | Italy | 0.10 | Trentino | TRE | 0.06 |
Above 40N | 0.10 | West Europe | 0.1 | Portugal | 0.07 | Penafiel | POP | 0.07 |
Above 40N | 0.10 | West Europe | 0.1 | Spain | 0.10 | Barcelona | BAR | 0.11 |
Below 40N | 0.09 | East Europe | 0.1 | Greece | 0.09 | Athens | GRA | 0.09 |
Below 40N | 0.09 | East Europe | 0.1 | Greece | 0.09 | Chania | GRC | 0.09 |
Below 40N | 0.09 | East Europe | 0.1 | Turkey | 0.09 | Aliaga | TUA | 0.09 |
Below 40N | 0.09 | West Europe | 0.1 | Italy | 0.10 | Sicilia | SIC | 0.09 |
Below 40N | 0.09 | West Europe | 0.1 | Malta | 0.07 | Luqa | MAL | 0.07 |
Below 40N | 0.09 | West Europe | 0.1 | Spain | 0.10 | Badajoz | SPB | 0.09 |
Below 40N | 0.09 | West Europe | 0.1 | Spain | 0.10 | Catarroja | SPC | 0.10 |
Below 40N | 0.09 | West Europe | 0.1 | Spain | 0.10 | Magaluf | SPM | 0.08 |
Below 40N | 0.09 | West Europe | 0.1 | Spain | 0.10 | San Roque | SPS | 0.12 |
# Initialize Word document
doc <-
read_docx() |>
body_add_flextable(value = flex_table)
# Define the output path with 'here' library
output_path <- here(
"output",
"europe",
"fst",
"r_01",
"fst_Europe_r01_LD2_SNPS.docx"
)
# Save the Word document
print(doc, target = output_path)
To make scatter plot
# Group by Country and calculate the mean for mean_fst_by_country
aggregated_data <- fst4 |>
dplyr::group_by(Country) |>
dplyr::summarise(mean_fst = mean(mean_fst_by_country, na.rm = TRUE))
# save the data
saveRDS(aggregated_data, here(
"output", "europe", "fst", "r_01", "LD2_country_europe_r01.rds"
))
# Order the aggregated data
aggregated_data <- aggregated_data[order(aggregated_data$mean_fst), ]
# Assign a numeric index for plotting
aggregated_data$index <- 1:nrow(aggregated_data)
# Fit a linear model
lm_fit <- lm(mean_fst ~ index, data = aggregated_data)
# Predicted values from the linear model
aggregated_data$fitted_values <- predict(lm_fit)
ggplot(aggregated_data, aes(x = index, y = mean_fst)) +
geom_point(aes(color = Country), size = 3) +
geom_line(aes(y = fitted_values), color = "blue") + # Fitted line
labs(
title = "Mean Fst by Country",
x = "Ordered Countries",
y = "Mean Fst Value"
) +
scale_x_continuous(breaks = aggregated_data$index, labels = aggregated_data$Country) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(legend.position = "none")
Save it
ggsave(
filename = here("output", "europe", "fst", "r_01", "mean_fst_by_country_europe_r01_LD2.pdf"),
width = 10,
height = 10,
units = "in"
)
Estimate distances
# Grab the population names from the matrix aa
populations_with_fst <- colnames(aa)
# Subset the sampling_loc dataframe to only include populations with FST estimates
filtered_sampling_loc <- sampling_loc %>% filter(Abbreviation %in% populations_with_fst)
# Create an empty matrix to store the distances
n <- nrow(filtered_sampling_loc)
distance_matrix <- matrix(0, n, n)
rownames(distance_matrix) <- filtered_sampling_loc$Abbreviation
colnames(distance_matrix) <- filtered_sampling_loc$Abbreviation
# Calculate the distances
for (i in 1:n) {
for (j in 1:n) {
if (i != j) {
coord1 <- c(filtered_sampling_loc$Longitude[i], filtered_sampling_loc$Latitude[i])
coord2 <- c(filtered_sampling_loc$Longitude[j], filtered_sampling_loc$Latitude[j])
distance_matrix[i, j] <- distHaversine(coord1, coord2) / 1000 # distance in km
}
}
}
# Print the distance matrix
head(distance_matrix)
## FRS STS POP SPB SPS SPC BAR
## FRS 0.0000 412.1522 1225.4383 1263.7518 1371.4448 817.6702 511.8716
## STS 412.1522 0.0000 1509.1745 1601.1573 1750.4874 1213.5639 915.9614
## POP 1225.4383 1509.1745 0.0000 282.8409 614.5101 701.9619 878.2910
## SPB 1263.7518 1601.1573 282.8409 0.0000 331.7685 571.0509 827.0716
## SPS 1371.4448 1750.4874 614.5101 331.7685 0.0000 566.5153 874.4050
## SPC 817.6702 1213.5639 701.9619 571.0509 566.5153 0.0000 310.0074
## SPM IMP BRE DES TRE ITB CES
## FRS 683.8845 228.2822 349.1505 375.3876 427.8791 448.0871 524.5893
## STS 1095.6072 527.9620 389.6504 409.5459 380.8019 536.0256 604.0707
## POP 939.4811 1370.3898 1572.0008 1596.9006 1653.2807 1644.8695 1711.3013
## SPB 822.8201 1367.0861 1594.7230 1616.9360 1682.2286 1643.6694 1701.8380
## SPS 787.3372 1423.5497 1670.1969 1688.5822 1761.4648 1690.3169 1736.8728
## SPC 251.7726 857.0395 1104.4760 1122.4598 1195.9602 1124.6677 1173.2270
## ROM ITR SIC ITP MAL SLO CRO ALV
## FRS 654.1062 654.1062 1119.260 1004.474 1269.904 628.7258 1037.845 1234.162
## STS 834.5057 834.5057 1314.450 1098.126 1522.503 546.2343 1052.803 1296.908
## POP 1734.4434 1734.4434 2064.584 2102.566 2066.730 1851.8481 2197.100 2334.720
## SPB 1683.0686 1683.0686 1957.316 2040.301 1923.429 1869.2123 2157.626 2266.196
## SPS 1669.1413 1669.1413 1865.247 2002.327 1785.308 1929.6259 2144.409 2215.487
## SPC 1124.2692 1124.2692 1387.383 1474.434 1368.139 1363.2153 1600.981 1697.872
## ALD TIR SER GRA GRC ROS BUL TUA
## FRS 1192.210 1216.107 1100.2591 1709.174 1877.379 1341.427 1392.500 1884.818
## STS 1230.239 1248.235 988.6774 1763.684 1974.712 1124.852 1304.105 1885.544
## POP 2319.742 2346.521 2315.5228 2777.366 2876.039 2566.256 2591.385 3000.557
## SPB 2262.516 2289.859 2313.3538 2686.566 2760.206 2594.045 2569.099 2924.845
## SPS 2225.649 2253.121 2340.8167 2601.355 2642.916 2651.131 2568.416 2854.982
## SPC 1697.515 1724.957 1780.6744 2115.539 2193.176 2085.530 2019.617 2354.477
## TUH SEV ALU KER KRA SOC TIK RAR
## FRS 2897.030 2179.621 2245.512 2390.859 2596.424 2688.509 2660.271 2758.005
## STS 2745.357 2012.146 2071.666 2197.341 2400.939 2511.584 2447.357 2555.484
## POP 4103.736 3397.015 3464.308 3613.507 3818.891 3906.072 3885.080 3981.365
## SPB 4076.402 3387.944 3457.259 3614.231 3818.203 3894.554 3893.085 3982.756
## SPS 4053.790 3392.537 3463.561 3628.220 3828.858 3891.912 3913.323 3994.595
## SPC 3518.971 2842.701 2913.061 3074.730 3276.925 3346.038 3357.781 3442.342
## GES ARM
## FRS 2793.592 3207.499
## STS 2619.862 3045.177
## POP 4009.674 4417.211
## SPB 3995.220 4391.989
## SPS 3988.376 4369.793
## SPC 3444.704 3834.995
Compare distance and FST
# Fill lower triangle of 'aa' matrix
aa[lower.tri(aa)] <- t(aa)[lower.tri(aa)]
# Fill diagonal with 0 (or another value that makes sense in your context)
diag(aa) <- 0
# Combine 'aa' and 'distance_matrix'
data <- data.frame(Distance = as.vector(distance_matrix), FST = as.vector(aa))
# Add row and column indices for easier tracking
data$row_index <- rep(rownames(distance_matrix), each = ncol(distance_matrix))
data$col_index <- rep(colnames(distance_matrix), nrow(distance_matrix))
data <- data |>
dplyr::arrange(
Distance
)
head(data)
## Distance FST row_index col_index
## 1 0 0 FRS FRS
## 2 0 0 STS STS
## 3 0 0 POP POP
## 4 0 0 SPB SPB
## 5 0 0 SPS SPS
## 6 0 0 SPC SPC
Fit linear regression
# Fit linear model
lm_model <- lm(FST ~ Distance, data = data)
equation_text <- sprintf("y = %.6fx + %.3f", coef(lm_model)[2], coef(lm_model)[1])
r2_text <- sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
# source the plotting function
source(here("analyses", "my_theme2.R"))
# Plot
ggplot(data, aes(x = Distance, y = FST)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
annotate("text", x = max(data$Distance) * 0.85, y = max(data$FST) * 0.95, label = paste(equation_text, r2_text, sep = "\n"), size = 4, color = "black") +
labs(title = "FST vs Distance - European populations",
x = "Distance (Km)",
y = "FST") +
scale_x_continuous(labels = scales::comma) +
theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
Select countries with at least 3 sampling localities in Europe & Asia
countries_with_3_pops <- filtered_sampling_loc %>%
group_by(Country) %>%
filter(n() >= 3) %>%
pull(Country) %>%
unique()
countries_with_3_pops
## [1] "Spain" "Italy" "Albania" "Ukraine" "Russia"
Do test for each country
results <- list()
for (country in countries_with_3_pops) {
# Extract abbreviations for the country
abbreviations <- filtered_sampling_loc %>%
filter(Country == country) %>%
pull(Abbreviation)
# Subset the data
subset_data <- data %>%
filter(row_index %in% abbreviations & col_index %in% abbreviations)
# Perform linear regression
lm_model <- lm(FST ~ Distance, data = subset_data)
results[[country]] <- list(
equation = sprintf("y = %.5fx + %.3f", coef(lm_model)[2], coef(lm_model)[1]),
r2 = sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
)
}
results
## $Spain
## $Spain$equation
## [1] "y = 0.00003x + 0.038"
##
## $Spain$r2
## [1] "R^2 = 0.05"
##
##
## $Italy
## $Italy$equation
## [1] "y = 0.00004x + 0.088"
##
## $Italy$r2
## [1] "R^2 = 0.04"
##
##
## $Albania
## $Albania$equation
## [1] "y = 0.00067x + 0.015"
##
## $Albania$r2
## [1] "R^2 = 0.61"
##
##
## $Ukraine
## $Ukraine$equation
## [1] "y = 0.00024x + 0.065"
##
## $Ukraine$r2
## [1] "R^2 = 0.09"
##
##
## $Russia
## $Russia$equation
## [1] "y = 0.00041x + 0.011"
##
## $Russia$r2
## [1] "R^2 = 0.40"
Calculate linear regression for each country
regression_results <- data_filtered %>%
group_by(Country1) %>%
do(model = lm(FST ~ Distance, data = .)) %>%
rowwise() %>%
mutate(equation = sprintf("y = %.3fx + %.3f", coef(model)[2], coef(model)[1]), r2 = sprintf("R^2 = %.2f", summary(model)$r.squared))
Plot it
ggplot(data_filtered, aes(x = Distance, y = FST)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ Country1, scales = "free", ncol = 2) +
geom_text(data = regression_results, aes(label = paste(equation, r2, sep = "\n"), x = Inf, y = Inf),
vjust = 2, hjust = 2, size = 3.5, inherit.aes = FALSE) +
labs(title = "FST vs Distance by Country",
x = "Distance",
y = "FST") +
scale_x_continuous(labels = scales::comma) +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggsave(
filename = here("output", "europe", "fst", "r_01", "fst_by_distance_countries_Europe_r01_LD2.pdf"),
width = 6,
height = 8,
units = "in"
)
We can merge the FST and distance matrices
# Ensure the matrices have the same names in the same order
common_names <- intersect(rownames(distance_matrix), rownames(aa))
sorted_names <- sort(common_names)
# Reorder the matrices
distance_matrix <- distance_matrix[sorted_names, sorted_names]
aa <- aa[sorted_names, sorted_names]
# Initialize the final merged matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names
colnames(merged_matrix) <- sorted_names
# Fill the upper triangular part from aa
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
# Fill the lower triangular part from distance_matrix
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]
# Format the matrix (Fst two decimals and distance in Km with zero decimals)
# Format the elements based on their position in the matrix
for(i in 1:nrow(merged_matrix)) {
for(j in 1:ncol(merged_matrix)) {
if (i < j) {
# Upper triangular - Fst values with two decimal places
merged_matrix[i, j] <- sprintf("%.2f", as.numeric(merged_matrix[i, j]))
} else if (i > j) {
# Lower triangular - Distance values with zero decimal places
merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
}
}
}
# Now the merged_matrix should be formatted as you need
print(merged_matrix)
## ALD ALU ALV ARM BAR BRE BUL CES CRO DES
## ALD NA "0.12" "0.02" "0.11" "0.12" "0.17" "0.11" "0.20" "0.05" "0.16"
## ALU "1268" NA "0.10" "0.06" "0.11" "0.16" "0.10" "0.19" "0.09" "0.15"
## ALV "93" "1307" NA "0.09" "0.10" "0.16" "0.09" "0.18" "0.03" "0.14"
## ARM "2144" "974" "2159" NA "0.11" "0.13" "0.09" "0.16" "0.08" "0.11"
## BAR "1446" "2631" "1458" "3566" NA "0.18" "0.11" "0.21" "0.09" "0.17"
## BRE "887" "1896" "942" "2859" "796" NA "0.15" "0.16" "0.15" "0.09"
## BUL "414" "895" "483" "1826" "1742" "1048" NA "0.19" "0.09" "0.14"
## CES "673" "1757" "724" "2706" "878" "223" "881" NA "0.18" "0.14"
## CRO "180" "1321" "260" "2235" "1333" "719" "428" "514" NA "0.13"
## DES "859" "1870" "915" "2832" "815" "28" "1021" "198" "691" NA
## FRS "1192" "2246" "1234" "3207" "512" "349" "1393" "525" "1038" "375"
## GES "1770" "550" "1795" "429" "3168" "2444" "1426" "2300" "1845" "2418"
## GRA "534" "1151" "476" "1848" "1900" "1417" "656" "1200" "711" "1390"
## GRC "754" "1348" "679" "1937" "2007" "1611" "925" "1389" "934" "1585"
## IMP "984" "2097" "1020" "3047" "552" "254" "1221" "341" "840" "268"
## ITB "752" "1821" "802" "2773" "824" "148" "950" "79" "591" "126"
## ITP "224" "1482" "235" "2367" "1227" "728" "604" "506" "201" "702"
## ITR "585" "1794" "605" "2711" "862" "445" "899" "252" "477" "427"
## KER "1443" "179" "1484" "862" "2789" "2042" "1062" "1913" "1490" "2017"
## KRA "1633" "366" "1670" "674" "2992" "2248" "1261" "2117" "1687" "2222"
## MAL "746" "1948" "674" "2719" "1233" "1135" "1157" "943" "817" "1119"
## POP "2320" "3464" "2335" "4417" "878" "1572" "2591" "1711" "2197" "1597"
## RAR "1798" "532" "1833" "560" "3157" "2410" "1427" "2281" "1853" "2385"
## ROM "585" "1794" "605" "2711" "862" "445" "899" "252" "477" "427"
## ROS "771" "950" "859" "1923" "1783" "1000" "445" "917" "683" "977"
## SER "442" "1149" "534" "2107" "1489" "752" "316" "612" "322" "725"
## SEV "1196" "72" "1235" "1034" "2561" "1830" "824" "1689" "1250" "1804"
## SIC "480" "1724" "420" "2545" "1195" "925" "894" "715" "537" "905"
## SLO "689" "1617" "761" "2581" "1059" "280" "782" "227" "509" "255"
## SOC "1678" "443" "1706" "538" "3068" "2339" "1327" "2197" "1748" "2313"
## SPB "2263" "3457" "2266" "4392" "827" "1595" "2569" "1702" "2158" "1617"
## SPC "1698" "2913" "1698" "3835" "310" "1104" "2020" "1173" "1601" "1122"
## SPM "1450" "2677" "1448" "3591" "211" "919" "1782" "956" "1360" "933"
## SPS "2226" "3464" "2215" "4370" "874" "1670" "2568" "1737" "2144" "1689"
## STS "1230" "2072" "1297" "3045" "916" "390" "1304" "604" "1053" "410"
## TIK "1733" "467" "1774" "687" "3069" "2313" "1350" "2191" "1778" "2288"
## TIR "27" "1242" "99" "2117" "1473" "908" "393" "696" "196" "881"
## TRE "858" "1821" "920" "2787" "887" "92" "986" "230" "684" "80"
## TUA "694" "904" "667" "1571" "2123" "1569" "641" "1363" "851" "1542"
## TUH "1828" "679" "1844" "316" "3251" "2549" "1512" "2393" "1919" "2522"
## FRS GES GRA GRC IMP ITB ITP ITR KER KRA
## ALD "0.09" "0.15" "0.08" "0.08" "0.11" "0.13" "0.08" "0.08" "0.12" "0.11"
## ALU "0.08" "0.09" "0.08" "0.09" "0.09" "0.11" "0.08" "0.07" "0.06" "0.05"
## ALV "0.07" "0.13" "0.06" "0.05" "0.08" "0.10" "0.06" "0.06" "0.10" "0.10"
## ARM "0.06" "0.06" "0.08" "0.08" "0.08" "0.09" "0.07" "0.07" "0.04" "0.02"
## BAR "0.09" "0.14" "0.09" "0.10" "0.11" "0.13" "0.08" "0.07" "0.12" "0.11"
## BRE "0.12" "0.18" "0.15" "0.15" "0.13" "0.12" "0.12" "0.14" "0.15" "0.15"
## BUL "0.07" "0.13" "0.09" "0.09" "0.09" "0.10" "0.06" "0.05" "0.11" "0.10"
## CES "0.15" "0.21" "0.18" "0.19" "0.18" "0.17" "0.15" "0.17" "0.18" "0.18"
## CRO "0.06" "0.12" "0.06" "0.05" "0.08" "0.09" "0.05" "0.05" "0.10" "0.09"
## DES "0.11" "0.17" "0.14" "0.14" "0.13" "0.11" "0.11" "0.12" "0.14" "0.14"
## FRS NA "0.11" "0.07" "0.07" "0.05" "0.06" "0.04" "0.04" "0.08" "0.08"
## GES "2794" NA "0.11" "0.12" "0.13" "0.14" "0.11" "0.10" "0.05" "0.02"
## GRA "1709" "1540" NA "0.02" "0.08" "0.10" "0.05" "0.05" "0.09" "0.08"
## GRC "1877" "1675" "270" NA "0.09" "0.10" "0.06" "0.06" "0.10" "0.09"
## IMP "228" "2640" "1492" "1653" NA "0.08" "0.05" "0.05" "0.09" "0.09"
## ITB "448" "2365" "1279" "1467" "277" NA "0.07" "0.07" "0.11" "0.11"
## ITP "1004" "1991" "705" "884" "787" "583" NA "0.02" "0.08" "0.08"
## ITR "654" "2321" "1070" "1224" "428" "302" "371" NA "0.08" "0.07"
## KER "2391" "434" "1327" "1517" "2250" "1973" "1655" "1960" NA "0.03"
## KRA "2596" "257" "1478" "1648" "2455" "2178" "1848" "2160" "206" NA
## MAL "1270" "2395" "873" "862" "1049" "996" "621" "694" "2127" "2301"
## POP "1225" "4010" "2777" "2876" "1370" "1645" "2103" "1734" "3614" "3819"
## RAR "2758" "210" "1629" "1789" "2619" "2342" "2014" "2326" "369" "166"
## ROM "654" "2321" "1070" "1224" "428" "302" "371" "0" "1960" "2160"
## ROS "1341" "1496" "1100" "1370" "1232" "961" "884" "1047" "1073" "1276"
## SER "1100" "1695" "884" "1141" "949" "672" "520" "697" "1301" "1506"
## SEV "2180" "614" "1086" "1288" "2029" "1753" "1410" "1723" "250" "438"
## SIC "1119" "2197" "736" "812" "891" "779" "340" "483" "1901" "2084"
## SLO "629" "2166" "1220" "1440" "507" "245" "585" "455" "1762" "1968"
## SOC "2689" "109" "1470" "1618" "2537" "2262" "1898" "2224" "325" "161"
## SPB "1264" "3995" "2687" "2760" "1367" "1644" "2040" "1683" "3614" "3818"
## SPC "818" "3445" "2116" "2193" "857" "1125" "1474" "1124" "3075" "3277"
## SPM "684" "3204" "1864" "1944" "665" "916" "1227" "883" "2841" "3042"
## SPS "1371" "3988" "2601" "2643" "1424" "1690" "2002" "1669" "3628" "3829"
## STS "412" "2620" "1764" "1975" "528" "536" "1098" "835" "2197" "2401"
## TIK "2660" "315" "1600" "1776" "2527" "2250" "1945" "2247" "290" "132"
## TIR "1216" "1743" "515" "742" "1009" "775" "251" "612" "1417" "1607"
## TRE "428" "2370" "1391" "1596" "346" "176" "717" "476" "1964" "2170"
## TUA "1885" "1265" "277" "445" "1678" "1441" "901" "1271" "1075" "1214"
## TUH "2897" "194" "1544" "1650" "2734" "2461" "2051" "2395" "597" "443"
## MAL POP RAR ROM ROS SER SEV SIC SLO SOC
## ALD "0.08" "0.09" "0.15" "0.10" "0.12" "0.22" "0.16" "0.11" "0.08" "0.14"
## ALU "0.07" "0.08" "0.08" "0.08" "0.11" "0.20" "0.10" "0.10" "0.07" "0.07"
## ALV "0.06" "0.07" "0.13" "0.07" "0.10" "0.18" "0.14" "0.09" "0.06" "0.12"
## ARM "0.06" "0.05" "0.05" "0.08" "0.09" "0.19" "0.07" "0.08" "0.05" "0.04"
## BAR "0.08" "0.09" "0.14" "0.08" "0.13" "0.22" "0.16" "0.10" "0.08" "0.14"
## BRE "0.12" "0.11" "0.19" "0.15" "0.14" "0.25" "0.20" "0.12" "0.10" "0.18"
## BUL "0.06" "0.07" "0.14" "0.07" "0.10" "0.19" "0.15" "0.08" "0.07" "0.13"
## CES "0.15" "0.13" "0.22" "0.19" "0.17" "0.28" "0.23" "0.16" "0.14" "0.21"
## CRO "0.06" "0.06" "0.12" "0.05" "0.10" "0.17" "0.14" "0.08" "0.06" "0.12"
## DES "0.11" "0.08" "0.18" "0.14" "0.12" "0.23" "0.19" "0.09" "0.08" "0.17"
## FRS "0.04" "0.04" "0.11" "0.04" "0.07" "0.16" "0.12" "0.06" "0.04" "0.10"
## GES "0.10" "0.11" "0.03" "0.11" "0.14" "0.23" "0.05" "0.12" "0.10" "0.01"
## GRA "0.05" "0.07" "0.12" "0.05" "0.10" "0.19" "0.13" "0.08" "0.05" "0.11"
## GRC "0.06" "0.07" "0.12" "0.06" "0.10" "0.19" "0.13" "0.09" "0.06" "0.12"
## IMP "0.04" "0.05" "0.13" "0.06" "0.09" "0.22" "0.15" "0.07" "0.05" "0.12"
## ITB "0.06" "0.05" "0.15" "0.08" "0.10" "0.22" "0.16" "0.08" "0.06" "0.13"
## ITP "0.02" "0.04" "0.11" "0.02" "0.08" "0.17" "0.12" "0.04" "0.04" "0.10"
## ITR "0.02" "0.03" "0.10" "0.01" "0.08" "0.16" "0.12" "0.05" "0.04" "0.10"
## KER "0.08" "0.07" "0.05" "0.09" "0.11" "0.20" "0.07" "0.10" "0.07" "0.05"
## KRA "0.07" "0.07" "0.01" "0.08" "0.11" "0.20" "0.03" "0.09" "0.07" "0.01"
## MAL NA "0.03" "0.11" "0.02" "0.07" "0.16" "0.12" "0.05" "0.04" "0.10"
## POP "2067" NA "0.11" "0.05" "0.07" "0.17" "0.12" "0.05" "0.04" "0.10"
## RAR "2460" "3981" NA "0.12" "0.14" "0.23" "0.05" "0.13" "0.10" "0.02"
## ROM "694" "1734" "2326" NA "0.09" "0.21" "0.13" "0.06" "0.04" "0.11"
## ROS "1498" "2566" "1432" "1047" NA "0.20" "0.16" "0.09" "0.06" "0.13"
## SER "1139" "2316" "1670" "697" "367" NA "0.25" "0.19" "0.15" "0.23"
## SEV "1878" "3397" "604" "1723" "894" "1082" NA "0.14" "0.11" "0.05"
## SIC "280" "2065" "2246" "483" "1219" "858" "1652" NA "0.05" "0.12"
## SLO "1118" "1852" "2130" "455" "726" "476" "1551" "865" NA "0.09"
## SOC "2316" "3906" "187" "2224" "1388" "1591" "509" "2113" "2060" NA
## SPB "1923" "283" "3983" "1683" "2594" "2313" "3388" "1957" "1869" "3895"
## SPC "1368" "702" "3442" "1124" "2086" "1781" "2843" "1387" "1363" "3346"
## SPM "1128" "939" "3208" "883" "1873" "1555" "2606" "1136" "1160" "3107"
## SPS "1785" "615" "3995" "1669" "2651" "2341" "3393" "1865" "1930" "3892"
## STS "1523" "1509" "2555" "835" "1125" "989" "2012" "1314" "546" "2512"
## TIK "2414" "3885" "127" "2247" "1328" "1580" "539" "2190" "2033" "253"
## TIR "764" "2347" "1772" "612" "761" "439" "1170" "502" "705" "1651"
## TRE "1170" "1653" "2331" "476" "915" "683" "1756" "945" "206" "2264"
## TUA "1148" "3001" "1361" "1271" "1057" "932" "843" "994" "1340" "1198"
## TUH "2412" "4104" "398" "2395" "1629" "1797" "734" "2233" "2272" "283"
## SPB SPC SPM SPS STS TIK TIR TRE TUA TUH
## ALD "0.12" "0.13" "0.10" "0.14" "0.10" "0.16" "0.03" "0.08" "0.10" "0.09"
## ALU "0.10" "0.11" "0.09" "0.13" "0.09" "0.09" "0.10" "0.07" "0.09" "0.08"
## ALV "0.10" "0.11" "0.08" "0.11" "0.07" "0.14" "-0.00" "0.06" "0.08" "0.07"
## ARM "0.06" "0.07" "0.06" "0.12" "0.07" "0.07" "0.09" "0.05" "0.09" "0.07"
## BAR "0.12" "0.13" "0.10" "0.11" "0.09" "0.16" "0.10" "0.08" "0.06" "0.04"
## BRE "0.10" "0.12" "0.11" "0.18" "0.12" "0.19" "0.17" "0.09" "0.16" "0.13"
## BUL "0.10" "0.10" "0.08" "0.12" "0.08" "0.15" "0.10" "0.06" "0.08" "0.07"
## CES "0.13" "0.15" "0.14" "0.22" "0.15" "0.22" "0.21" "0.11" "0.19" "0.16"
## CRO "0.09" "0.10" "0.08" "0.11" "0.06" "0.13" "0.02" "0.05" "0.08" "0.07"
## DES "0.08" "0.10" "0.09" "0.17" "0.11" "0.18" "0.15" "0.07" "0.14" "0.12"
## FRS "0.06" "0.07" "0.05" "0.10" "0.03" "0.12" "0.07" "0.03" "0.07" "0.05"
## GES "0.12" "0.13" "0.12" "0.16" "0.11" "0.05" "0.14" "0.10" "0.12" "0.11"
## GRA "0.09" "0.10" "0.08" "0.10" "0.07" "0.13" "0.06" "0.06" "0.07" "0.06"
## GRC "0.09" "0.10" "0.08" "0.11" "0.07" "0.14" "0.06" "0.06" "0.08" "0.07"
## IMP "0.07" "0.08" "0.07" "0.10" "0.05" "0.15" "0.10" "0.04" "0.08" "0.06"
## ITB "0.08" "0.09" "0.07" "0.13" "0.07" "0.16" "0.11" "0.05" "0.11" "0.08"
## ITP "0.07" "0.08" "0.05" "0.09" "0.04" "0.12" "0.06" "0.03" "0.06" "0.04"
## ITR "0.08" "0.09" "0.06" "0.07" "0.04" "0.12" "0.06" "0.04" "0.04" "0.03"
## KER "0.09" "0.10" "0.09" "0.13" "0.08" "0.07" "0.11" "0.07" "0.10" "0.08"
## KRA "0.09" "0.10" "0.08" "0.13" "0.08" "0.04" "0.10" "0.07" "0.09" "0.08"
## MAL "0.06" "0.07" "0.05" "0.07" "0.04" "0.12" "0.07" "0.03" "0.05" "0.04"
## POP "0.05" "0.05" "0.04" "0.09" "0.04" "0.12" "0.07" "0.03" "0.06" "0.05"
## RAR "0.13" "0.14" "0.12" "0.16" "0.12" "0.05" "0.14" "0.11" "0.13" "0.11"
## ROM "0.08" "0.09" "0.07" "0.09" "0.04" "0.13" "0.07" "0.03" "0.06" "0.04"
## ROS "0.09" "0.10" "0.09" "0.13" "0.08" "0.15" "0.12" "0.05" "0.10" "0.08"
## SER "0.20" "0.22" "0.21" "0.24" "0.17" "0.25" "0.22" "0.15" "0.20" "0.17"
## SEV "0.14" "0.15" "0.14" "0.17" "0.13" "0.08" "0.15" "0.12" "0.14" "0.12"
## SIC "0.08" "0.09" "0.06" "0.11" "0.06" "0.14" "0.10" "0.04" "0.08" "0.06"
## SLO "0.05" "0.06" "0.04" "0.09" "0.04" "0.11" "0.06" "0.02" "0.06" "0.04"
## SOC "0.12" "0.12" "0.11" "0.15" "0.11" "0.06" "0.13" "0.10" "0.12" "0.10"
## SPB NA "0.01" "0.01" "0.13" "0.06" "0.14" "0.10" "0.04" "0.10" "0.08"
## SPC "571" NA "0.02" "0.13" "0.07" "0.15" "0.11" "0.05" "0.10" "0.08"
## SPM "823" "252" NA "0.06" "0.05" "0.13" "0.10" "0.03" "0.08" "0.06"
## SPS "332" "567" "787" NA "0.10" "0.17" "0.13" "0.08" "0.03" "0.09"
## STS "1601" "1214" "1096" "1750" NA "0.13" "0.08" "0.03" "0.07" "0.05"
## TIK "3893" "3358" "3126" "3913" "2447" NA "0.15" "0.11" "0.14" "0.12"
## TIR "2290" "1725" "1478" "2253" "1248" "1707" NA "0.06" "0.08" "0.07"
## TRE "1682" "1196" "1011" "1761" "381" "2233" "877" NA "0.06" "0.05"
## TUA "2925" "2354" "2103" "2855" "1886" "1339" "669" "1529" NA "0.05"
## TUH "4076" "3519" "3275" "4054" "2745" "508" "1801" "2478" "1267" NA
## # A tibble: 6 × 10
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 Franceville Gabon -1.59 13.5 Africa GAB 2015 Centr…
## 2 Antananarivo Madagascar -18.9 47.5 Africa ANT 2022 East …
## 3 Diego ville Madagascar -12.3 49.3 Africa DGV 2022 East …
## 4 Morondava Madagascar -20.3 44.3 Africa MAD 2016 East …
## 5 Vohimasy Madagascar -22.8 47.8 Africa VOH 2016… East …
## 6 Dauguet Mauritius -20.2 57.5 Africa DAU 2022 India…
## # ℹ 2 more variables: Subregion <chr>, order <int>
We can sort by distance
# Calculate row-wise mean distances (excluding diagonal)
row_means <- rowMeans(distance_matrix, na.rm=TRUE)
# Sort row names by mean distances
sorted_names_by_distance <- names(sort(row_means))
# Reorder distance_matrix and aa matrices based on these sorted names
distance_matrix <- distance_matrix[sorted_names_by_distance, sorted_names_by_distance]
aa <- aa[sorted_names_by_distance, sorted_names_by_distance]
# Your existing code to initialize and fill the merged_matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names_by_distance
colnames(merged_matrix) <- sorted_names_by_distance
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]
# Formatting code with absolute value for upper triangular part
for(i in 1:nrow(merged_matrix)) {
for(j in 1:ncol(merged_matrix)) {
if (i < j) {
merged_matrix[i, j] <- sprintf("%.2f", abs(as.numeric(merged_matrix[i, j])))
} else if (i > j) {
merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
}
}
}
# Print the merged matrix
print(merged_matrix)
## CRO ALD ITP TIR SER ALV CES ITR ROM SLO
## CRO NA "0.05" "0.05" "0.02" "0.17" "0.03" "0.18" "0.05" "0.05" "0.06"
## ALD "180" NA "0.08" "0.03" "0.22" "0.02" "0.20" "0.08" "0.10" "0.08"
## ITP "201" "224" NA "0.06" "0.17" "0.06" "0.15" "0.02" "0.02" "0.04"
## TIR "196" "27" "251" NA "0.22" "0.00" "0.21" "0.06" "0.07" "0.06"
## SER "322" "442" "520" "439" NA "0.18" "0.28" "0.16" "0.21" "0.15"
## ALV "260" "93" "235" "99" "534" NA "0.18" "0.06" "0.07" "0.06"
## CES "514" "673" "506" "696" "612" "724" NA "0.17" "0.19" "0.14"
## ITR "477" "585" "371" "612" "697" "605" "252" NA "0.01" "0.04"
## ROM "477" "585" "371" "612" "697" "605" "252" "0" NA "0.04"
## SLO "509" "689" "585" "705" "476" "761" "227" "455" "455" NA
## ITB "591" "752" "583" "775" "672" "802" "79" "302" "302" "245"
## BUL "428" "414" "604" "393" "316" "483" "881" "899" "899" "782"
## DES "691" "859" "702" "881" "725" "915" "198" "427" "427" "255"
## TRE "684" "858" "717" "877" "683" "920" "230" "476" "476" "206"
## BRE "719" "887" "728" "908" "752" "942" "223" "445" "445" "280"
## SIC "537" "480" "340" "502" "858" "420" "715" "483" "483" "865"
## ROS "683" "771" "884" "761" "367" "859" "917" "1047" "1047" "726"
## IMP "840" "984" "787" "1009" "949" "1020" "341" "428" "428" "507"
## GRA "711" "534" "705" "515" "884" "476" "1200" "1070" "1070" "1220"
## TUA "851" "694" "901" "669" "932" "667" "1363" "1271" "1271" "1340"
## FRS "1038" "1192" "1004" "1216" "1100" "1234" "525" "654" "654" "629"
## MAL "817" "746" "621" "764" "1139" "674" "943" "694" "694" "1118"
## STS "1053" "1230" "1098" "1248" "989" "1297" "604" "835" "835" "546"
## GRC "934" "754" "884" "742" "1141" "679" "1389" "1224" "1224" "1440"
## SEV "1250" "1196" "1410" "1170" "1082" "1235" "1689" "1723" "1723" "1551"
## ALU "1321" "1268" "1482" "1242" "1149" "1307" "1757" "1794" "1794" "1617"
## BAR "1333" "1446" "1227" "1473" "1489" "1458" "878" "862" "862" "1059"
## SPM "1360" "1450" "1227" "1478" "1555" "1448" "956" "883" "883" "1160"
## KER "1490" "1443" "1655" "1417" "1301" "1484" "1913" "1960" "1960" "1762"
## KRA "1687" "1633" "1848" "1607" "1506" "1670" "2117" "2160" "2160" "1968"
## SPC "1601" "1698" "1474" "1725" "1781" "1698" "1173" "1124" "1124" "1363"
## SOC "1748" "1678" "1898" "1651" "1591" "1706" "2197" "2224" "2224" "2060"
## TIK "1778" "1733" "1945" "1707" "1580" "1774" "2191" "2247" "2247" "2033"
## GES "1845" "1770" "1991" "1743" "1695" "1795" "2300" "2321" "2321" "2166"
## RAR "1853" "1798" "2014" "1772" "1670" "1833" "2281" "2326" "2326" "2130"
## TUH "1919" "1828" "2051" "1801" "1797" "1844" "2393" "2395" "2395" "2272"
## ARM "2235" "2144" "2367" "2117" "2107" "2159" "2706" "2711" "2711" "2581"
## SPB "2158" "2263" "2040" "2290" "2313" "2266" "1702" "1683" "1683" "1869"
## SPS "2144" "2226" "2002" "2253" "2341" "2215" "1737" "1669" "1669" "1930"
## POP "2197" "2320" "2103" "2347" "2316" "2335" "1711" "1734" "1734" "1852"
## ITB BUL DES TRE BRE SIC ROS IMP GRA TUA
## CRO "0.09" "0.09" "0.13" "0.05" "0.15" "0.08" "0.10" "0.08" "0.06" "0.08"
## ALD "0.13" "0.11" "0.16" "0.08" "0.17" "0.11" "0.12" "0.11" "0.08" "0.10"
## ITP "0.07" "0.06" "0.11" "0.03" "0.12" "0.04" "0.08" "0.05" "0.05" "0.06"
## TIR "0.11" "0.10" "0.15" "0.06" "0.17" "0.10" "0.12" "0.10" "0.06" "0.08"
## SER "0.22" "0.19" "0.23" "0.15" "0.25" "0.19" "0.20" "0.22" "0.19" "0.20"
## ALV "0.10" "0.09" "0.14" "0.06" "0.16" "0.09" "0.10" "0.08" "0.06" "0.08"
## CES "0.17" "0.19" "0.14" "0.11" "0.16" "0.16" "0.17" "0.18" "0.18" "0.19"
## ITR "0.07" "0.05" "0.12" "0.04" "0.14" "0.05" "0.08" "0.05" "0.05" "0.04"
## ROM "0.08" "0.07" "0.14" "0.03" "0.15" "0.06" "0.09" "0.06" "0.05" "0.06"
## SLO "0.06" "0.07" "0.08" "0.02" "0.10" "0.05" "0.06" "0.05" "0.05" "0.06"
## ITB NA "0.10" "0.11" "0.05" "0.12" "0.08" "0.10" "0.08" "0.10" "0.11"
## BUL "950" NA "0.14" "0.06" "0.15" "0.08" "0.10" "0.09" "0.09" "0.08"
## DES "126" "1021" NA "0.07" "0.09" "0.09" "0.12" "0.13" "0.14" "0.14"
## TRE "176" "986" "80" NA "0.09" "0.04" "0.05" "0.04" "0.06" "0.06"
## BRE "148" "1048" "28" "92" NA "0.12" "0.14" "0.13" "0.15" "0.16"
## SIC "779" "894" "905" "945" "925" NA "0.09" "0.07" "0.08" "0.08"
## ROS "961" "445" "977" "915" "1000" "1219" NA "0.09" "0.10" "0.10"
## IMP "277" "1221" "268" "346" "254" "891" "1232" NA "0.08" "0.08"
## GRA "1279" "656" "1390" "1391" "1417" "736" "1100" "1492" NA "0.07"
## TUA "1441" "641" "1542" "1529" "1569" "994" "1057" "1678" "277" NA
## FRS "448" "1393" "375" "428" "349" "1119" "1341" "228" "1709" "1885"
## MAL "996" "1157" "1119" "1170" "1135" "280" "1498" "1049" "873" "1148"
## STS "536" "1304" "410" "381" "390" "1314" "1125" "528" "1764" "1886"
## GRC "1467" "925" "1585" "1596" "1611" "812" "1370" "1653" "270" "445"
## SEV "1753" "824" "1804" "1756" "1830" "1652" "894" "2029" "1086" "843"
## ALU "1821" "895" "1870" "1821" "1896" "1724" "950" "2097" "1151" "904"
## BAR "824" "1742" "815" "887" "796" "1195" "1783" "552" "1900" "2123"
## SPM "916" "1782" "933" "1011" "919" "1136" "1873" "665" "1864" "2103"
## KER "1973" "1062" "2017" "1964" "2042" "1901" "1073" "2250" "1327" "1075"
## KRA "2178" "1261" "2222" "2170" "2248" "2084" "1276" "2455" "1478" "1214"
## SPC "1125" "2020" "1122" "1196" "1104" "1387" "2086" "857" "2116" "2354"
## SOC "2262" "1327" "2313" "2264" "2339" "2113" "1388" "2537" "1470" "1198"
## TIK "2250" "1350" "2288" "2233" "2313" "2190" "1328" "2527" "1600" "1339"
## GES "2365" "1426" "2418" "2370" "2444" "2197" "1496" "2640" "1540" "1265"
## RAR "2342" "1427" "2385" "2331" "2410" "2246" "1432" "2619" "1629" "1361"
## TUH "2461" "1512" "2522" "2478" "2549" "2233" "1629" "2734" "1544" "1267"
## ARM "2773" "1826" "2832" "2787" "2859" "2545" "1923" "3047" "1848" "1571"
## SPB "1644" "2569" "1617" "1682" "1595" "1957" "2594" "1367" "2687" "2925"
## SPS "1690" "2568" "1689" "1761" "1670" "1865" "2651" "1424" "2601" "2855"
## POP "1645" "2591" "1597" "1653" "1572" "2065" "2566" "1370" "2777" "3001"
## FRS MAL STS GRC SEV ALU BAR SPM KER KRA
## CRO "0.06" "0.06" "0.06" "0.05" "0.14" "0.09" "0.09" "0.08" "0.10" "0.09"
## ALD "0.09" "0.08" "0.10" "0.08" "0.16" "0.12" "0.12" "0.10" "0.12" "0.11"
## ITP "0.04" "0.02" "0.04" "0.06" "0.12" "0.08" "0.08" "0.05" "0.08" "0.08"
## TIR "0.07" "0.07" "0.08" "0.06" "0.15" "0.10" "0.10" "0.10" "0.11" "0.10"
## SER "0.16" "0.16" "0.17" "0.19" "0.25" "0.20" "0.22" "0.21" "0.20" "0.20"
## ALV "0.07" "0.06" "0.07" "0.05" "0.14" "0.10" "0.10" "0.08" "0.10" "0.10"
## CES "0.15" "0.15" "0.15" "0.19" "0.23" "0.19" "0.21" "0.14" "0.18" "0.18"
## ITR "0.04" "0.02" "0.04" "0.06" "0.12" "0.07" "0.07" "0.06" "0.08" "0.07"
## ROM "0.04" "0.02" "0.04" "0.06" "0.13" "0.08" "0.08" "0.07" "0.09" "0.08"
## SLO "0.04" "0.04" "0.04" "0.06" "0.11" "0.07" "0.08" "0.04" "0.07" "0.07"
## ITB "0.06" "0.06" "0.07" "0.10" "0.16" "0.11" "0.13" "0.07" "0.11" "0.11"
## BUL "0.07" "0.06" "0.08" "0.09" "0.15" "0.10" "0.11" "0.08" "0.11" "0.10"
## DES "0.11" "0.11" "0.11" "0.14" "0.19" "0.15" "0.17" "0.09" "0.14" "0.14"
## TRE "0.03" "0.03" "0.03" "0.06" "0.12" "0.07" "0.08" "0.03" "0.07" "0.07"
## BRE "0.12" "0.12" "0.12" "0.15" "0.20" "0.16" "0.18" "0.11" "0.15" "0.15"
## SIC "0.06" "0.05" "0.06" "0.09" "0.14" "0.10" "0.10" "0.06" "0.10" "0.09"
## ROS "0.07" "0.07" "0.08" "0.10" "0.16" "0.11" "0.13" "0.09" "0.11" "0.11"
## IMP "0.05" "0.04" "0.05" "0.09" "0.15" "0.09" "0.11" "0.07" "0.09" "0.09"
## GRA "0.07" "0.05" "0.07" "0.02" "0.13" "0.08" "0.09" "0.08" "0.09" "0.08"
## TUA "0.07" "0.05" "0.07" "0.08" "0.14" "0.09" "0.06" "0.08" "0.10" "0.09"
## FRS NA "0.04" "0.03" "0.07" "0.12" "0.08" "0.09" "0.05" "0.08" "0.08"
## MAL "1270" NA "0.04" "0.06" "0.12" "0.07" "0.08" "0.05" "0.08" "0.07"
## STS "412" "1523" NA "0.07" "0.13" "0.09" "0.09" "0.05" "0.08" "0.08"
## GRC "1877" "862" "1975" NA "0.13" "0.09" "0.10" "0.08" "0.10" "0.09"
## SEV "2180" "1878" "2012" "1288" NA "0.10" "0.16" "0.14" "0.07" "0.03"
## ALU "2246" "1948" "2072" "1348" "72" NA "0.11" "0.09" "0.06" "0.05"
## BAR "512" "1233" "916" "2007" "2561" "2631" NA "0.10" "0.12" "0.11"
## SPM "684" "1128" "1096" "1944" "2606" "2677" "211" NA "0.09" "0.08"
## KER "2391" "2127" "2197" "1517" "250" "179" "2789" "2841" NA "0.03"
## KRA "2596" "2301" "2401" "1648" "438" "366" "2992" "3042" "206" NA
## SPC "818" "1368" "1214" "2193" "2843" "2913" "310" "252" "3075" "3277"
## SOC "2689" "2316" "2512" "1618" "509" "443" "3068" "3107" "325" "161"
## TIK "2660" "2414" "2447" "1776" "539" "467" "3069" "3126" "290" "132"
## GES "2794" "2395" "2620" "1675" "614" "550" "3168" "3204" "434" "257"
## RAR "2758" "2460" "2555" "1789" "604" "532" "3157" "3208" "369" "166"
## TUH "2897" "2412" "2745" "1650" "734" "679" "3251" "3275" "597" "443"
## ARM "3207" "2719" "3045" "1937" "1034" "974" "3566" "3591" "862" "674"
## SPB "1264" "1923" "1601" "2760" "3388" "3457" "827" "823" "3614" "3818"
## SPS "1371" "1785" "1750" "2643" "3393" "3464" "874" "787" "3628" "3829"
## POP "1225" "2067" "1509" "2876" "3397" "3464" "878" "939" "3614" "3819"
## SPC SOC TIK GES RAR TUH ARM SPB SPS POP
## CRO "0.10" "0.12" "0.13" "0.12" "0.12" "0.07" "0.08" "0.09" "0.11" "0.06"
## ALD "0.13" "0.14" "0.16" "0.15" "0.15" "0.09" "0.11" "0.12" "0.14" "0.09"
## ITP "0.08" "0.10" "0.12" "0.11" "0.11" "0.04" "0.07" "0.07" "0.09" "0.04"
## TIR "0.11" "0.13" "0.15" "0.14" "0.14" "0.07" "0.09" "0.10" "0.13" "0.07"
## SER "0.22" "0.23" "0.25" "0.23" "0.23" "0.17" "0.19" "0.20" "0.24" "0.17"
## ALV "0.11" "0.12" "0.14" "0.13" "0.13" "0.07" "0.09" "0.10" "0.11" "0.07"
## CES "0.15" "0.21" "0.22" "0.21" "0.22" "0.16" "0.16" "0.13" "0.22" "0.13"
## ITR "0.09" "0.10" "0.12" "0.10" "0.10" "0.03" "0.07" "0.08" "0.07" "0.03"
## ROM "0.09" "0.11" "0.13" "0.11" "0.12" "0.04" "0.08" "0.08" "0.09" "0.05"
## SLO "0.06" "0.09" "0.11" "0.10" "0.10" "0.04" "0.05" "0.05" "0.09" "0.04"
## ITB "0.09" "0.13" "0.16" "0.14" "0.15" "0.08" "0.09" "0.08" "0.13" "0.05"
## BUL "0.10" "0.13" "0.15" "0.13" "0.14" "0.07" "0.09" "0.10" "0.12" "0.07"
## DES "0.10" "0.17" "0.18" "0.17" "0.18" "0.12" "0.11" "0.08" "0.17" "0.08"
## TRE "0.05" "0.10" "0.11" "0.10" "0.11" "0.05" "0.05" "0.04" "0.08" "0.03"
## BRE "0.12" "0.18" "0.19" "0.18" "0.19" "0.13" "0.13" "0.10" "0.18" "0.11"
## SIC "0.09" "0.12" "0.14" "0.12" "0.13" "0.06" "0.08" "0.08" "0.11" "0.05"
## ROS "0.10" "0.13" "0.15" "0.14" "0.14" "0.08" "0.09" "0.09" "0.13" "0.07"
## IMP "0.08" "0.12" "0.15" "0.13" "0.13" "0.06" "0.08" "0.07" "0.10" "0.05"
## GRA "0.10" "0.11" "0.13" "0.11" "0.12" "0.06" "0.08" "0.09" "0.10" "0.07"
## TUA "0.10" "0.12" "0.14" "0.12" "0.13" "0.05" "0.09" "0.10" "0.03" "0.06"
## FRS "0.07" "0.10" "0.12" "0.11" "0.11" "0.05" "0.06" "0.06" "0.10" "0.04"
## MAL "0.07" "0.10" "0.12" "0.10" "0.11" "0.04" "0.06" "0.06" "0.07" "0.03"
## STS "0.07" "0.11" "0.13" "0.11" "0.12" "0.05" "0.07" "0.06" "0.10" "0.04"
## GRC "0.10" "0.12" "0.14" "0.12" "0.12" "0.07" "0.08" "0.09" "0.11" "0.07"
## SEV "0.15" "0.05" "0.08" "0.05" "0.05" "0.12" "0.07" "0.14" "0.17" "0.12"
## ALU "0.11" "0.07" "0.09" "0.09" "0.08" "0.08" "0.06" "0.10" "0.13" "0.08"
## BAR "0.13" "0.14" "0.16" "0.14" "0.14" "0.04" "0.11" "0.12" "0.11" "0.09"
## SPM "0.02" "0.11" "0.13" "0.12" "0.12" "0.06" "0.06" "0.01" "0.06" "0.04"
## KER "0.10" "0.05" "0.07" "0.05" "0.05" "0.08" "0.04" "0.09" "0.13" "0.07"
## KRA "0.10" "0.01" "0.04" "0.02" "0.01" "0.08" "0.02" "0.09" "0.13" "0.07"
## SPC NA "0.12" "0.15" "0.13" "0.14" "0.08" "0.07" "0.01" "0.13" "0.05"
## SOC "3346" NA "0.06" "0.01" "0.02" "0.10" "0.04" "0.12" "0.15" "0.10"
## TIK "3358" "253" NA "0.05" "0.05" "0.12" "0.07" "0.14" "0.17" "0.12"
## GES "3445" "109" "315" NA "0.03" "0.11" "0.06" "0.12" "0.16" "0.11"
## RAR "3442" "187" "127" "210" NA "0.11" "0.05" "0.13" "0.16" "0.11"
## TUH "3519" "283" "508" "194" "398" NA "0.07" "0.08" "0.09" "0.05"
## ARM "3835" "538" "687" "429" "560" "316" NA "0.06" "0.12" "0.05"
## SPB "571" "3895" "3893" "3995" "3983" "4076" "4392" NA "0.13" "0.05"
## SPS "567" "3892" "3913" "3988" "3995" "4054" "4370" "332" NA "0.09"
## POP "702" "3906" "3885" "4010" "3981" "4104" "4417" "283" "615" NA
Make a table and save it as a word document
# Convert the matrix to a data frame and add a column with row names
merged_df <- as.data.frame(merged_matrix)
merged_df$Population <- rownames(merged_matrix)
# Reorder columns to have RowNames as the first column
merged_df <- merged_df[, c("Population", colnames(merged_matrix))]
# Create a flextable object from the merged_matrix
ft <- qflextable(as.data.frame(merged_df))
ft
Population | CRO | ALD | ITP | TIR | SER | ALV | CES | ITR | ROM | SLO | ITB | BUL | DES | TRE | BRE | SIC | ROS | IMP | GRA | TUA | FRS | MAL | STS | GRC | SEV | ALU | BAR | SPM | KER | KRA | SPC | SOC | TIK | GES | RAR | TUH | ARM | SPB | SPS | POP |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CRO | 0.05 | 0.05 | 0.02 | 0.17 | 0.03 | 0.18 | 0.05 | 0.05 | 0.06 | 0.09 | 0.09 | 0.13 | 0.05 | 0.15 | 0.08 | 0.10 | 0.08 | 0.06 | 0.08 | 0.06 | 0.06 | 0.06 | 0.05 | 0.14 | 0.09 | 0.09 | 0.08 | 0.10 | 0.09 | 0.10 | 0.12 | 0.13 | 0.12 | 0.12 | 0.07 | 0.08 | 0.09 | 0.11 | 0.06 | |
ALD | 180 | 0.08 | 0.03 | 0.22 | 0.02 | 0.20 | 0.08 | 0.10 | 0.08 | 0.13 | 0.11 | 0.16 | 0.08 | 0.17 | 0.11 | 0.12 | 0.11 | 0.08 | 0.10 | 0.09 | 0.08 | 0.10 | 0.08 | 0.16 | 0.12 | 0.12 | 0.10 | 0.12 | 0.11 | 0.13 | 0.14 | 0.16 | 0.15 | 0.15 | 0.09 | 0.11 | 0.12 | 0.14 | 0.09 | |
ITP | 201 | 224 | 0.06 | 0.17 | 0.06 | 0.15 | 0.02 | 0.02 | 0.04 | 0.07 | 0.06 | 0.11 | 0.03 | 0.12 | 0.04 | 0.08 | 0.05 | 0.05 | 0.06 | 0.04 | 0.02 | 0.04 | 0.06 | 0.12 | 0.08 | 0.08 | 0.05 | 0.08 | 0.08 | 0.08 | 0.10 | 0.12 | 0.11 | 0.11 | 0.04 | 0.07 | 0.07 | 0.09 | 0.04 | |
TIR | 196 | 27 | 251 | 0.22 | 0.00 | 0.21 | 0.06 | 0.07 | 0.06 | 0.11 | 0.10 | 0.15 | 0.06 | 0.17 | 0.10 | 0.12 | 0.10 | 0.06 | 0.08 | 0.07 | 0.07 | 0.08 | 0.06 | 0.15 | 0.10 | 0.10 | 0.10 | 0.11 | 0.10 | 0.11 | 0.13 | 0.15 | 0.14 | 0.14 | 0.07 | 0.09 | 0.10 | 0.13 | 0.07 | |
SER | 322 | 442 | 520 | 439 | 0.18 | 0.28 | 0.16 | 0.21 | 0.15 | 0.22 | 0.19 | 0.23 | 0.15 | 0.25 | 0.19 | 0.20 | 0.22 | 0.19 | 0.20 | 0.16 | 0.16 | 0.17 | 0.19 | 0.25 | 0.20 | 0.22 | 0.21 | 0.20 | 0.20 | 0.22 | 0.23 | 0.25 | 0.23 | 0.23 | 0.17 | 0.19 | 0.20 | 0.24 | 0.17 | |
ALV | 260 | 93 | 235 | 99 | 534 | 0.18 | 0.06 | 0.07 | 0.06 | 0.10 | 0.09 | 0.14 | 0.06 | 0.16 | 0.09 | 0.10 | 0.08 | 0.06 | 0.08 | 0.07 | 0.06 | 0.07 | 0.05 | 0.14 | 0.10 | 0.10 | 0.08 | 0.10 | 0.10 | 0.11 | 0.12 | 0.14 | 0.13 | 0.13 | 0.07 | 0.09 | 0.10 | 0.11 | 0.07 | |
CES | 514 | 673 | 506 | 696 | 612 | 724 | 0.17 | 0.19 | 0.14 | 0.17 | 0.19 | 0.14 | 0.11 | 0.16 | 0.16 | 0.17 | 0.18 | 0.18 | 0.19 | 0.15 | 0.15 | 0.15 | 0.19 | 0.23 | 0.19 | 0.21 | 0.14 | 0.18 | 0.18 | 0.15 | 0.21 | 0.22 | 0.21 | 0.22 | 0.16 | 0.16 | 0.13 | 0.22 | 0.13 | |
ITR | 477 | 585 | 371 | 612 | 697 | 605 | 252 | 0.01 | 0.04 | 0.07 | 0.05 | 0.12 | 0.04 | 0.14 | 0.05 | 0.08 | 0.05 | 0.05 | 0.04 | 0.04 | 0.02 | 0.04 | 0.06 | 0.12 | 0.07 | 0.07 | 0.06 | 0.08 | 0.07 | 0.09 | 0.10 | 0.12 | 0.10 | 0.10 | 0.03 | 0.07 | 0.08 | 0.07 | 0.03 | |
ROM | 477 | 585 | 371 | 612 | 697 | 605 | 252 | 0 | 0.04 | 0.08 | 0.07 | 0.14 | 0.03 | 0.15 | 0.06 | 0.09 | 0.06 | 0.05 | 0.06 | 0.04 | 0.02 | 0.04 | 0.06 | 0.13 | 0.08 | 0.08 | 0.07 | 0.09 | 0.08 | 0.09 | 0.11 | 0.13 | 0.11 | 0.12 | 0.04 | 0.08 | 0.08 | 0.09 | 0.05 | |
SLO | 509 | 689 | 585 | 705 | 476 | 761 | 227 | 455 | 455 | 0.06 | 0.07 | 0.08 | 0.02 | 0.10 | 0.05 | 0.06 | 0.05 | 0.05 | 0.06 | 0.04 | 0.04 | 0.04 | 0.06 | 0.11 | 0.07 | 0.08 | 0.04 | 0.07 | 0.07 | 0.06 | 0.09 | 0.11 | 0.10 | 0.10 | 0.04 | 0.05 | 0.05 | 0.09 | 0.04 | |
ITB | 591 | 752 | 583 | 775 | 672 | 802 | 79 | 302 | 302 | 245 | 0.10 | 0.11 | 0.05 | 0.12 | 0.08 | 0.10 | 0.08 | 0.10 | 0.11 | 0.06 | 0.06 | 0.07 | 0.10 | 0.16 | 0.11 | 0.13 | 0.07 | 0.11 | 0.11 | 0.09 | 0.13 | 0.16 | 0.14 | 0.15 | 0.08 | 0.09 | 0.08 | 0.13 | 0.05 | |
BUL | 428 | 414 | 604 | 393 | 316 | 483 | 881 | 899 | 899 | 782 | 950 | 0.14 | 0.06 | 0.15 | 0.08 | 0.10 | 0.09 | 0.09 | 0.08 | 0.07 | 0.06 | 0.08 | 0.09 | 0.15 | 0.10 | 0.11 | 0.08 | 0.11 | 0.10 | 0.10 | 0.13 | 0.15 | 0.13 | 0.14 | 0.07 | 0.09 | 0.10 | 0.12 | 0.07 | |
DES | 691 | 859 | 702 | 881 | 725 | 915 | 198 | 427 | 427 | 255 | 126 | 1021 | 0.07 | 0.09 | 0.09 | 0.12 | 0.13 | 0.14 | 0.14 | 0.11 | 0.11 | 0.11 | 0.14 | 0.19 | 0.15 | 0.17 | 0.09 | 0.14 | 0.14 | 0.10 | 0.17 | 0.18 | 0.17 | 0.18 | 0.12 | 0.11 | 0.08 | 0.17 | 0.08 | |
TRE | 684 | 858 | 717 | 877 | 683 | 920 | 230 | 476 | 476 | 206 | 176 | 986 | 80 | 0.09 | 0.04 | 0.05 | 0.04 | 0.06 | 0.06 | 0.03 | 0.03 | 0.03 | 0.06 | 0.12 | 0.07 | 0.08 | 0.03 | 0.07 | 0.07 | 0.05 | 0.10 | 0.11 | 0.10 | 0.11 | 0.05 | 0.05 | 0.04 | 0.08 | 0.03 | |
BRE | 719 | 887 | 728 | 908 | 752 | 942 | 223 | 445 | 445 | 280 | 148 | 1048 | 28 | 92 | 0.12 | 0.14 | 0.13 | 0.15 | 0.16 | 0.12 | 0.12 | 0.12 | 0.15 | 0.20 | 0.16 | 0.18 | 0.11 | 0.15 | 0.15 | 0.12 | 0.18 | 0.19 | 0.18 | 0.19 | 0.13 | 0.13 | 0.10 | 0.18 | 0.11 | |
SIC | 537 | 480 | 340 | 502 | 858 | 420 | 715 | 483 | 483 | 865 | 779 | 894 | 905 | 945 | 925 | 0.09 | 0.07 | 0.08 | 0.08 | 0.06 | 0.05 | 0.06 | 0.09 | 0.14 | 0.10 | 0.10 | 0.06 | 0.10 | 0.09 | 0.09 | 0.12 | 0.14 | 0.12 | 0.13 | 0.06 | 0.08 | 0.08 | 0.11 | 0.05 | |
ROS | 683 | 771 | 884 | 761 | 367 | 859 | 917 | 1047 | 1047 | 726 | 961 | 445 | 977 | 915 | 1000 | 1219 | 0.09 | 0.10 | 0.10 | 0.07 | 0.07 | 0.08 | 0.10 | 0.16 | 0.11 | 0.13 | 0.09 | 0.11 | 0.11 | 0.10 | 0.13 | 0.15 | 0.14 | 0.14 | 0.08 | 0.09 | 0.09 | 0.13 | 0.07 | |
IMP | 840 | 984 | 787 | 1009 | 949 | 1020 | 341 | 428 | 428 | 507 | 277 | 1221 | 268 | 346 | 254 | 891 | 1232 | 0.08 | 0.08 | 0.05 | 0.04 | 0.05 | 0.09 | 0.15 | 0.09 | 0.11 | 0.07 | 0.09 | 0.09 | 0.08 | 0.12 | 0.15 | 0.13 | 0.13 | 0.06 | 0.08 | 0.07 | 0.10 | 0.05 | |
GRA | 711 | 534 | 705 | 515 | 884 | 476 | 1200 | 1070 | 1070 | 1220 | 1279 | 656 | 1390 | 1391 | 1417 | 736 | 1100 | 1492 | 0.07 | 0.07 | 0.05 | 0.07 | 0.02 | 0.13 | 0.08 | 0.09 | 0.08 | 0.09 | 0.08 | 0.10 | 0.11 | 0.13 | 0.11 | 0.12 | 0.06 | 0.08 | 0.09 | 0.10 | 0.07 | |
TUA | 851 | 694 | 901 | 669 | 932 | 667 | 1363 | 1271 | 1271 | 1340 | 1441 | 641 | 1542 | 1529 | 1569 | 994 | 1057 | 1678 | 277 | 0.07 | 0.05 | 0.07 | 0.08 | 0.14 | 0.09 | 0.06 | 0.08 | 0.10 | 0.09 | 0.10 | 0.12 | 0.14 | 0.12 | 0.13 | 0.05 | 0.09 | 0.10 | 0.03 | 0.06 | |
FRS | 1038 | 1192 | 1004 | 1216 | 1100 | 1234 | 525 | 654 | 654 | 629 | 448 | 1393 | 375 | 428 | 349 | 1119 | 1341 | 228 | 1709 | 1885 | 0.04 | 0.03 | 0.07 | 0.12 | 0.08 | 0.09 | 0.05 | 0.08 | 0.08 | 0.07 | 0.10 | 0.12 | 0.11 | 0.11 | 0.05 | 0.06 | 0.06 | 0.10 | 0.04 | |
MAL | 817 | 746 | 621 | 764 | 1139 | 674 | 943 | 694 | 694 | 1118 | 996 | 1157 | 1119 | 1170 | 1135 | 280 | 1498 | 1049 | 873 | 1148 | 1270 | 0.04 | 0.06 | 0.12 | 0.07 | 0.08 | 0.05 | 0.08 | 0.07 | 0.07 | 0.10 | 0.12 | 0.10 | 0.11 | 0.04 | 0.06 | 0.06 | 0.07 | 0.03 | |
STS | 1053 | 1230 | 1098 | 1248 | 989 | 1297 | 604 | 835 | 835 | 546 | 536 | 1304 | 410 | 381 | 390 | 1314 | 1125 | 528 | 1764 | 1886 | 412 | 1523 | 0.07 | 0.13 | 0.09 | 0.09 | 0.05 | 0.08 | 0.08 | 0.07 | 0.11 | 0.13 | 0.11 | 0.12 | 0.05 | 0.07 | 0.06 | 0.10 | 0.04 | |
GRC | 934 | 754 | 884 | 742 | 1141 | 679 | 1389 | 1224 | 1224 | 1440 | 1467 | 925 | 1585 | 1596 | 1611 | 812 | 1370 | 1653 | 270 | 445 | 1877 | 862 | 1975 | 0.13 | 0.09 | 0.10 | 0.08 | 0.10 | 0.09 | 0.10 | 0.12 | 0.14 | 0.12 | 0.12 | 0.07 | 0.08 | 0.09 | 0.11 | 0.07 | |
SEV | 1250 | 1196 | 1410 | 1170 | 1082 | 1235 | 1689 | 1723 | 1723 | 1551 | 1753 | 824 | 1804 | 1756 | 1830 | 1652 | 894 | 2029 | 1086 | 843 | 2180 | 1878 | 2012 | 1288 | 0.10 | 0.16 | 0.14 | 0.07 | 0.03 | 0.15 | 0.05 | 0.08 | 0.05 | 0.05 | 0.12 | 0.07 | 0.14 | 0.17 | 0.12 | |
ALU | 1321 | 1268 | 1482 | 1242 | 1149 | 1307 | 1757 | 1794 | 1794 | 1617 | 1821 | 895 | 1870 | 1821 | 1896 | 1724 | 950 | 2097 | 1151 | 904 | 2246 | 1948 | 2072 | 1348 | 72 | 0.11 | 0.09 | 0.06 | 0.05 | 0.11 | 0.07 | 0.09 | 0.09 | 0.08 | 0.08 | 0.06 | 0.10 | 0.13 | 0.08 | |
BAR | 1333 | 1446 | 1227 | 1473 | 1489 | 1458 | 878 | 862 | 862 | 1059 | 824 | 1742 | 815 | 887 | 796 | 1195 | 1783 | 552 | 1900 | 2123 | 512 | 1233 | 916 | 2007 | 2561 | 2631 | 0.10 | 0.12 | 0.11 | 0.13 | 0.14 | 0.16 | 0.14 | 0.14 | 0.04 | 0.11 | 0.12 | 0.11 | 0.09 | |
SPM | 1360 | 1450 | 1227 | 1478 | 1555 | 1448 | 956 | 883 | 883 | 1160 | 916 | 1782 | 933 | 1011 | 919 | 1136 | 1873 | 665 | 1864 | 2103 | 684 | 1128 | 1096 | 1944 | 2606 | 2677 | 211 | 0.09 | 0.08 | 0.02 | 0.11 | 0.13 | 0.12 | 0.12 | 0.06 | 0.06 | 0.01 | 0.06 | 0.04 | |
KER | 1490 | 1443 | 1655 | 1417 | 1301 | 1484 | 1913 | 1960 | 1960 | 1762 | 1973 | 1062 | 2017 | 1964 | 2042 | 1901 | 1073 | 2250 | 1327 | 1075 | 2391 | 2127 | 2197 | 1517 | 250 | 179 | 2789 | 2841 | 0.03 | 0.10 | 0.05 | 0.07 | 0.05 | 0.05 | 0.08 | 0.04 | 0.09 | 0.13 | 0.07 | |
KRA | 1687 | 1633 | 1848 | 1607 | 1506 | 1670 | 2117 | 2160 | 2160 | 1968 | 2178 | 1261 | 2222 | 2170 | 2248 | 2084 | 1276 | 2455 | 1478 | 1214 | 2596 | 2301 | 2401 | 1648 | 438 | 366 | 2992 | 3042 | 206 | 0.10 | 0.01 | 0.04 | 0.02 | 0.01 | 0.08 | 0.02 | 0.09 | 0.13 | 0.07 | |
SPC | 1601 | 1698 | 1474 | 1725 | 1781 | 1698 | 1173 | 1124 | 1124 | 1363 | 1125 | 2020 | 1122 | 1196 | 1104 | 1387 | 2086 | 857 | 2116 | 2354 | 818 | 1368 | 1214 | 2193 | 2843 | 2913 | 310 | 252 | 3075 | 3277 | 0.12 | 0.15 | 0.13 | 0.14 | 0.08 | 0.07 | 0.01 | 0.13 | 0.05 | |
SOC | 1748 | 1678 | 1898 | 1651 | 1591 | 1706 | 2197 | 2224 | 2224 | 2060 | 2262 | 1327 | 2313 | 2264 | 2339 | 2113 | 1388 | 2537 | 1470 | 1198 | 2689 | 2316 | 2512 | 1618 | 509 | 443 | 3068 | 3107 | 325 | 161 | 3346 | 0.06 | 0.01 | 0.02 | 0.10 | 0.04 | 0.12 | 0.15 | 0.10 | |
TIK | 1778 | 1733 | 1945 | 1707 | 1580 | 1774 | 2191 | 2247 | 2247 | 2033 | 2250 | 1350 | 2288 | 2233 | 2313 | 2190 | 1328 | 2527 | 1600 | 1339 | 2660 | 2414 | 2447 | 1776 | 539 | 467 | 3069 | 3126 | 290 | 132 | 3358 | 253 | 0.05 | 0.05 | 0.12 | 0.07 | 0.14 | 0.17 | 0.12 | |
GES | 1845 | 1770 | 1991 | 1743 | 1695 | 1795 | 2300 | 2321 | 2321 | 2166 | 2365 | 1426 | 2418 | 2370 | 2444 | 2197 | 1496 | 2640 | 1540 | 1265 | 2794 | 2395 | 2620 | 1675 | 614 | 550 | 3168 | 3204 | 434 | 257 | 3445 | 109 | 315 | 0.03 | 0.11 | 0.06 | 0.12 | 0.16 | 0.11 | |
RAR | 1853 | 1798 | 2014 | 1772 | 1670 | 1833 | 2281 | 2326 | 2326 | 2130 | 2342 | 1427 | 2385 | 2331 | 2410 | 2246 | 1432 | 2619 | 1629 | 1361 | 2758 | 2460 | 2555 | 1789 | 604 | 532 | 3157 | 3208 | 369 | 166 | 3442 | 187 | 127 | 210 | 0.11 | 0.05 | 0.13 | 0.16 | 0.11 | |
TUH | 1919 | 1828 | 2051 | 1801 | 1797 | 1844 | 2393 | 2395 | 2395 | 2272 | 2461 | 1512 | 2522 | 2478 | 2549 | 2233 | 1629 | 2734 | 1544 | 1267 | 2897 | 2412 | 2745 | 1650 | 734 | 679 | 3251 | 3275 | 597 | 443 | 3519 | 283 | 508 | 194 | 398 | 0.07 | 0.08 | 0.09 | 0.05 | |
ARM | 2235 | 2144 | 2367 | 2117 | 2107 | 2159 | 2706 | 2711 | 2711 | 2581 | 2773 | 1826 | 2832 | 2787 | 2859 | 2545 | 1923 | 3047 | 1848 | 1571 | 3207 | 2719 | 3045 | 1937 | 1034 | 974 | 3566 | 3591 | 862 | 674 | 3835 | 538 | 687 | 429 | 560 | 316 | 0.06 | 0.12 | 0.05 | |
SPB | 2158 | 2263 | 2040 | 2290 | 2313 | 2266 | 1702 | 1683 | 1683 | 1869 | 1644 | 2569 | 1617 | 1682 | 1595 | 1957 | 2594 | 1367 | 2687 | 2925 | 1264 | 1923 | 1601 | 2760 | 3388 | 3457 | 827 | 823 | 3614 | 3818 | 571 | 3895 | 3893 | 3995 | 3983 | 4076 | 4392 | 0.13 | 0.05 | |
SPS | 2144 | 2226 | 2002 | 2253 | 2341 | 2215 | 1737 | 1669 | 1669 | 1930 | 1690 | 2568 | 1689 | 1761 | 1670 | 1865 | 2651 | 1424 | 2601 | 2855 | 1371 | 1785 | 1750 | 2643 | 3393 | 3464 | 874 | 787 | 3628 | 3829 | 567 | 3892 | 3913 | 3988 | 3995 | 4054 | 4370 | 332 | 0.09 | |
POP | 2197 | 2320 | 2103 | 2347 | 2316 | 2335 | 1711 | 1734 | 1734 | 1852 | 1645 | 2591 | 1597 | 1653 | 1572 | 2065 | 2566 | 1370 | 2777 | 3001 | 1225 | 2067 | 1509 | 2876 | 3397 | 3464 | 878 | 939 | 3614 | 3819 | 702 | 3906 | 3885 | 4010 | 3981 | 4104 | 4417 | 283 | 615 |
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.01 \
--keep-fam output/fst/pops_4fst.txt \
--make-bed \
--out output/fst/mantel01 \
--silent;
grep 'samples\|variants\|remaining' output/fst/mantel01.log
17028 variants loaded from .bim file. –keep-fam: 407 people remaining. Total genotyping rate in remaining samples is 0.970158. 17028 variants and 407 people pass filters and QC.
Then convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/fst/mantel01 \
--recodeA \
--out output/fst/mantel01 \
--silent;
grep 'samples\|variants\|remaining' output/fst/mantel01.log
17028 variants loaded from .bim file. 17028 variants and 407 people pass filters and QC.
Import the data and covert it to genind format
# import the data
albo <-
read.PLINK(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/mantel01.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
# convert to genind
albo2 <- gl2gi(albo, probar = TRUE, verbose = NULL)
Save
Load it
Import Sample locations
## Pop_City Country Latitude Longitude Continent Abbreviation
## 1 Franceville Gabon -1.59207 13.53242 Africa GAB
## 2 Antananarivo Madagascar -18.87920 47.50790 Africa ANT
## 3 Diego ville Madagascar -12.27361 49.29372 Africa DGV
## 4 Morondava Madagascar -20.28420 44.27940 Africa MAD
## 5 Vohimasy Madagascar -22.81591 47.75026 Africa VOH
## 6 Dauguet Mauritius -20.18530 57.52154 Africa DAU
## Year Region Subregion order
## 1 2015 Central Africa Africa 72
## 2 2022 East Africa East Africa 76
## 3 2022 East Africa East Africa 77
## 4 2016 East Africa East Africa 78
## 5 2016 & 2017 East Africa East Africa 79
## 6 2022 Indian Ocean Indian Ocean 80
The fam file
fam_file <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/mantel01.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# # Replace the BEN a with BEN b (remember to never name samples with the same ID... I change it manually in the fam file.)
# fam_data <- fam_data %>%
# mutate(IndividualID = ifelse(FamilyID == "BEN" & IndividualID == "a", "b", IndividualID))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 SOC 1065 0 0 0 -9
## 2 SOC 1066 0 0 0 -9
## 3 SOC 1067 0 0 0 -9
## 4 SOC 1068 0 0 0 -9
## 5 SOC 1069 0 0 0 -9
## 6 SOC 1070 0 0 0 -9
Merge
# Join with sampling_loc to get sampling localities
loc_albo <- fam_data |>
left_join(sampling_loc, by = c("FamilyID" = "Abbreviation"))
head(loc_albo)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype Pop_City Country
## 1 SOC 1065 0 0 0 -9 Sochi Russia
## 2 SOC 1066 0 0 0 -9 Sochi Russia
## 3 SOC 1067 0 0 0 -9 Sochi Russia
## 4 SOC 1068 0 0 0 -9 Sochi Russia
## 5 SOC 1069 0 0 0 -9 Sochi Russia
## 6 SOC 1070 0 0 0 -9 Sochi Russia
## Latitude Longitude Continent Year Region Subregion order
## 1 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 38
## 2 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 38
## 3 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 38
## 4 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 38
## 5 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 38
## 6 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 38
Get the latitude and longitude
## [,1] [,2]
## [1,] 43.60042 39.74533
## [2,] 43.60042 39.74533
## [3,] 43.60042 39.74533
## [4,] 43.60042 39.74533
## [5,] 43.60042 39.74533
## [6,] 43.60042 39.74533
Add jitter
## x y
## [1,] 43.59886 39.74551
## [2,] 43.59906 39.74661
## [3,] 43.60204 39.74453
## [4,] 43.60060 39.74656
## [5,] 43.60100 39.74589
## [6,] 43.59957 39.74666
Add to object
Save
# Calculate distances
# Genetic
Dgen <- dist(albo2$tab)
# Physical
Dgeo <- dist(other(albo2)$xy)
# Run the Mantel test is a statistical test to compare two matrices and check if they are correlated. In this context, it's used to check if genetic distance (Dgen) correlates with geographic distance (Dgeo). The .randtest variant performs a randomized version of the test to estimate the significance of the observed correlation.
ibd <- mantel.randtest(Dgen,Dgeo)
# Check it
ibd
## Monte-Carlo test
## Call: mantel.randtest(m1 = Dgen, m2 = Dgeo)
##
## Observation: 0.36077
##
## Based on 999 replicates
## Simulated p-value: 0.001
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 2.845410e+01 4.867888e-04 1.603240e-04
Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "r_01", "simIBD_europe_r01_LD2.pdf"))
plot(ibd)
dev.off()
plot(Dgeo, Dgen)
# A linear regression model (lm stands for "linear model") is fitted, with the genetic distances (Dgen) as the response variable and the geographic distances (Dgeo) as the predictor. The distances are transformed into vectors using as.vector because the dist function produces a matrix-like structure, but the linear regression function lm requires vectors.
dist_lm <- lm(as.vector(Dgen) ~ as.vector(Dgeo))
abline(dist_lm, col="red", lty=2)
Add the equation
# Plotting the data
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
# You can adjust the position (e.g., x and y values) as necessary
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.7, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.65, labels = r2_label)
Save Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "r_01", "Genetic_v_Geog_distance_europe_r01_LD2.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
dev.off()
Use library MASS for plot
CairoPDF(here(
"output", "europe", "fst", "r_01", "IDB_PlotFromMASS_europe_r01_density.pdf"))
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
myPal <-
colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
title("Isolation by distance")
dev.off()
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
myPal <-
colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.5, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.45, labels = r2_label)
title("Isolation by distance")
Check the populations - I did not include those with less than 4 mosquitoes
## ALD ALU ALV ARM BAR BRE BUL CES CRO DES FRS GES GRA GRC IMP ITB ITP ITR KER KRA
## 10 12 12 10 12 13 10 14 12 16 12 12 11 10 4 5 8 12 12 12
## MAL POP RAR ROM ROS SER SEV SIC SLO SOC SPB SPC SPM SPS STS TIK TIR TRE TUA TUH
## 12 12 12 4 11 4 12 9 12 12 8 6 5 8 12 12 4 12 9 12
Create list of populations
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
awk '{print $1}' output/snps_sets/r2_0.01_b.fam | sort | uniq -c | awk '{print $2, $1}' | awk '$2 >= 4 {print}' | awk '{print $1}' > output/fst/pops_4bfst.txt;
head output/fst/pops_4bfst.txt;
wc -l output/fst/pops_4bfst.txt
## ALD
## ALU
## ALV
## ARM
## BAR
## BRE
## BUL
## CES
## CRO
## DES
## 40 output/fst/pops_4bfst.txt
We have 40 populations with 4 or more mosquitoes.
First load plink
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.01_b \
--keep-fam output/fst/pops_4bfst.txt \
--recodeA \
--out output/fst/r2_0.01_b \
--silent;
grep 'samples\|variants\|remaining' output/fst/r2_0.01_b.log
20968 variants loaded from .bim file. –keep-fam: 408 people remaining. Total genotyping rate in remaining samples is 0.971028. 20968 variants and 408 people pass filters and QC.
Look at https://rdrr.io/cran/StAMPP/man/stamppFst.html for details of Fst estimations
LD2 <-
read.PLINK(
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/r2_0.01_b.raw"
),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
##
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## Length Class Mode
## 1 genlight S4
The command below would also work, but you can simplify it and put only the numbers: genome_equal_2 <- stamppFst(neutral, nboots=100, percent=95 + nclusters==10)
This chunk will take a couple minutes to run.
# run stampp. If you want to run with bootstraps and nclusters use the HPC. It will run out of memory on a 32Gb laptop
LD2_3 <- stamppFst(LD2_2, 1, 95, 1)
Save it
To load it
Now lets look at the object
## SOC SEV GES KER
## Min. :0.007368 Min. :0.02984 Min. :0.01731 Min. :0.03114
## 1st Qu.:0.095683 1st Qu.:0.11672 1st Qu.:0.10258 1st Qu.:0.07804
## Median :0.114950 Median :0.13445 Median :0.12082 Median :0.09649
## Mean :0.108035 Mean :0.13179 Mean :0.11816 Mean :0.09821
## 3rd Qu.:0.128378 3rd Qu.:0.15304 3rd Qu.:0.13515 3rd Qu.:0.10979
## Max. :0.226722 Max. :0.24859 Max. :0.23290 Max. :0.20445
## NA's :1 NA's :2 NA's :3 NA's :4
## KRA TIK RAR TRE
## Min. :0.01126 Min. :0.05318 Min. :0.05336 Min. :0.02292
## 1st Qu.:0.07449 1st Qu.:0.12048 1st Qu.:0.11031 1st Qu.:0.03722
## Median :0.09153 Median :0.13660 Median :0.12626 Median :0.05341
## Mean :0.09234 Mean :0.13868 Mean :0.13086 Mean :0.05661
## 3rd Qu.:0.10511 3rd Qu.:0.15285 3rd Qu.:0.14308 3rd Qu.:0.06398
## Max. :0.19658 Max. :0.25029 Max. :0.23530 Max. :0.15403
## NA's :5 NA's :6 NA's :7 NA's :8
## ALU STS SIC BRE
## Min. :0.06494 Min. :0.03010 Min. :0.04422 Min. :0.08617
## 1st Qu.:0.07964 1st Qu.:0.05092 1st Qu.:0.06364 1st Qu.:0.12019
## Median :0.09507 Median :0.06948 Median :0.08073 Median :0.13841
## Mean :0.10215 Mean :0.07361 Mean :0.08448 Mean :0.14183
## 3rd Qu.:0.10886 3rd Qu.:0.07770 3rd Qu.:0.09281 3rd Qu.:0.15641
## Max. :0.19797 Max. :0.17530 Max. :0.19317 Max. :0.24879
## NA's :9 NA's :10 NA's :11 NA's :12
## DES CES TIR IMP
## Min. :0.07761 Min. :0.1316 Min. :-0.000934 Min. :0.04352
## 1st Qu.:0.10740 1st Qu.:0.1527 1st Qu.: 0.062847 1st Qu.:0.05687
## Median :0.12503 Median :0.1755 Median : 0.073205 Median :0.08116
## Mean :0.12773 Mean :0.1763 Mean : 0.083119 Mean :0.08135
## 3rd Qu.:0.14069 3rd Qu.:0.1882 3rd Qu.: 0.104343 3rd Qu.:0.08647
## Max. :0.22839 Max. :0.2844 Max. : 0.222903 Max. :0.22637
## NA's :13 NA's :14 NA's :15 NA's :16
## ROM GRC BAR BUL
## Min. :0.006012 Min. :0.02418 Min. :0.04376 Min. :0.05357
## 1st Qu.:0.044731 1st Qu.:0.06061 1st Qu.:0.08101 1st Qu.:0.06767
## Median :0.066024 Median :0.07681 Median :0.09852 Median :0.09179
## Mean :0.067477 Mean :0.08195 Mean :0.10274 Mean :0.09108
## 3rd Qu.:0.082312 3rd Qu.:0.09720 3rd Qu.:0.12083 3rd Qu.:0.10032
## Max. :0.207815 Max. :0.18931 Max. :0.21778 Max. :0.19614
## NA's :17 NA's :18 NA's :19 NA's :20
## CRO GRA ITB MAL
## Min. :0.02773 Min. :0.04970 Min. :0.05193 Min. :0.01802
## 1st Qu.:0.05615 1st Qu.:0.05852 1st Qu.:0.06888 1st Qu.:0.03876
## Median :0.06531 Median :0.07608 Median :0.08580 Median :0.06135
## Mean :0.07608 Mean :0.08161 Mean :0.09276 Mean :0.05974
## 3rd Qu.:0.09071 3rd Qu.:0.09589 3rd Qu.:0.10242 3rd Qu.:0.07366
## Max. :0.17329 Max. :0.18592 Max. :0.21971 Max. :0.16130
## NA's :21 NA's :22 NA's :23 NA's :24
## SPM TUA TUH ALD
## Min. :0.006262 Min. :0.02844 Min. :0.03486 Min. :0.02204
## 1st Qu.:0.056700 1st Qu.:0.05760 1st Qu.:0.04529 1st Qu.:0.08084
## Median :0.071302 Median :0.07320 Median :0.06970 Median :0.09978
## Mean :0.077542 Mean :0.08055 Mean :0.07255 Mean :0.10620
## 3rd Qu.:0.093710 3rd Qu.:0.09974 3rd Qu.:0.08458 3rd Qu.:0.12480
## Max. :0.213896 Max. :0.19864 Max. :0.17311 Max. :0.21576
## NA's :25 NA's :26 NA's :27 NA's :28
## FRS ITP POP ROS
## Min. :0.03937 Min. :0.02323 Min. :0.03491 Min. :0.05906
## 1st Qu.:0.04234 1st Qu.:0.04179 1st Qu.:0.05167 1st Qu.:0.08919
## Median :0.06316 Median :0.06778 Median :0.05496 Median :0.09704
## Mean :0.06904 Mean :0.07107 Mean :0.07007 Mean :0.10790
## 3rd Qu.:0.07371 3rd Qu.:0.07952 3rd Qu.:0.07260 3rd Qu.:0.10967
## Max. :0.16404 Max. :0.17461 Max. :0.16508 Max. :0.20308
## NA's :29 NA's :30 NA's :31 NA's :32
## SER SLO SPC SPB
## Min. :0.1529 Min. :0.04127 Min. :0.00711 Min. :0.06086
## 1st Qu.:0.1745 1st Qu.:0.05380 1st Qu.:0.07035 1st Qu.:0.07552
## Median :0.1902 Median :0.05741 Median :0.08783 Median :0.08804
## Mean :0.1930 Mean :0.06074 Mean :0.08176 Mean :0.09127
## 3rd Qu.:0.2100 3rd Qu.:0.06194 3rd Qu.:0.10685 3rd Qu.:0.10379
## Max. :0.2386 Max. :0.09230 Max. :0.13664 Max. :0.12813
## NA's :33 NA's :34 NA's :35 NA's :36
## SPS ARM ALV ITR
## Min. :0.07152 Min. :0.06985 Min. :0.05703 Min. : NA
## 1st Qu.:0.09248 1st Qu.:0.07432 1st Qu.:0.05703 1st Qu.: NA
## Median :0.11345 Median :0.07880 Median :0.05703 Median : NA
## Mean :0.10059 Mean :0.07880 Mean :0.05703 Mean :NaN
## 3rd Qu.:0.11513 3rd Qu.:0.08328 3rd Qu.:0.05703 3rd Qu.: NA
## Max. :0.11682 Max. :0.08776 Max. :0.05703 Max. : NA
## NA's :37 NA's :38 NA's :39 NA's :40
If you want you can save the fst values as csv.
# Save it
write.csv(LD2_df, file = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/LD2_df_MAF1.csv"))
Check the Fst values
## SOC SEV GES KER KRA TIK RAR TRE ALU STS
## SOC NA NA NA NA NA NA NA NA NA NA
## SEV 0.044764659 NA NA NA NA NA NA NA NA NA
## GES 0.011269778 0.05366015 NA NA NA NA NA NA NA NA
## KER 0.046807291 0.06763399 0.05416414 NA NA NA NA NA NA NA
## KRA 0.007367688 0.02983546 0.01730859 0.03114523 NA NA NA NA NA NA
## TIK 0.057321524 0.07850480 0.05231734 0.07125463 0.0352695 NA NA NA NA NA
## SIC BRE DES CES TIR IMP ROM GRC BAR BUL CRO GRA ITB MAL SPM TUA TUH ALD FRS
## SOC NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## SEV NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## GES NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## KER NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## KRA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## TIK NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## ITP POP ROS SER SLO SPC SPB SPS ARM ALV ITR
## SOC NA NA NA NA NA NA NA NA NA NA NA
## SEV NA NA NA NA NA NA NA NA NA NA NA
## GES NA NA NA NA NA NA NA NA NA NA NA
## KER NA NA NA NA NA NA NA NA NA NA NA
## KRA NA NA NA NA NA NA NA NA NA NA NA
## TIK NA NA NA NA NA NA NA NA NA NA NA
Convert the data into a matrix.
## SOC SEV GES KER KRA TIK
## SOC NA 0.04476466 0.01126978 0.04680729 0.007367688 0.05732152
## SEV 0.044764659 NA 0.05366015 0.06763399 0.029835464 0.07850480
## GES 0.011269778 0.05366015 NA 0.05416414 0.017308587 0.05231734
## KER 0.046807291 0.06763399 0.05416414 NA 0.031145234 0.07125463
## KRA 0.007367688 0.02983546 0.01730859 0.03114523 NA 0.03526950
## TIK 0.057321524 0.07850480 0.05231734 0.07125463 0.035269501 NA
## RAR TRE ALU STS SIC BRE DES
## SOC 0.02162356 0.09677667 0.07010602 0.10591967 0.11784326 0.1783248 0.1653535
## SEV 0.05301801 0.11534477 0.09442672 0.12611348 0.14011854 0.1957788 0.1837507
## GES 0.02837824 0.10102216 0.08513146 0.10927914 0.12290051 0.1813135 0.1691206
## KER 0.05279426 0.07229226 0.06560035 0.08193350 0.09777071 0.1534374 0.1390736
## KRA 0.01126164 0.07169993 0.05454288 0.08239319 0.09220277 0.1519838 0.1397541
## TIK 0.05317902 0.11330313 0.09008958 0.12334852 0.13737947 0.1945549 0.1803734
## CES TIR IMP ROM GRC BAR BUL
## SOC 0.2093281 0.1284323 0.12099490 0.10667493 0.11671692 0.1360204 0.1283236
## SEV 0.2267463 0.1529011 0.14651785 0.13206617 0.13376653 0.1575244 0.1491057
## GES 0.2119834 0.1351532 0.12748623 0.11182282 0.12208159 0.1427050 0.1338410
## KER 0.1814383 0.1104057 0.09219116 0.08795374 0.10017423 0.1237427 0.1095903
## KRA 0.1823634 0.1017700 0.09264869 0.08159695 0.09017657 0.1108940 0.1032033
## TIK 0.2237019 0.1538818 0.14536390 0.13186905 0.13432670 0.1539908 0.1478755
## CRO GRA ITB MAL SPM TUA TUH
## SOC 0.1149502 0.10889177 0.1337168 0.09458982 0.12205750 0.11826743 0.10435177
## SEV 0.1351410 0.12765441 0.1570420 0.11656423 0.14762503 0.13848504 0.12249588
## GES 0.1203672 0.11376844 0.1389219 0.10125102 0.12971049 0.12401729 0.11137798
## KER 0.0996719 0.09520248 0.1085958 0.07559881 0.09938746 0.10311247 0.08539267
## KRA 0.0918335 0.08437280 0.1070244 0.07285346 0.09393310 0.09124916 0.07816345
## TIK 0.1325973 0.12705980 0.1559944 0.11459438 0.14348939 0.13653641 0.12161918
## ALD FRS ITP POP ROS SER SLO
## SOC 0.1393253 0.10279814 0.10070244 0.09884633 0.1342376 0.2267217 0.09140753
## SEV 0.1592704 0.12144167 0.12209602 0.11938424 0.1537391 0.2485867 0.10967112
## GES 0.1445257 0.10779885 0.10598420 0.10461590 0.1391201 0.2329000 0.09665973
## KER 0.1249993 0.07884778 0.08100376 0.07324363 0.1106383 0.2044508 0.07291814
## KRA 0.1135568 0.07658576 0.07497710 0.07400209 0.1075813 0.1965758 0.06479207
## TIK 0.1586231 0.12035512 0.12086377 0.11777636 0.1497754 0.2502909 0.10749679
## SPC SPB SPS ARM ALV ITR
## SOC 0.12576249 0.11561471 0.1508072 0.04507389 0.11809185 0.09717221
## SEV 0.15308239 0.14034768 0.1720481 0.07105325 0.13842989 0.11718572
## GES 0.13312607 0.12082033 0.1581405 0.05626819 0.12396762 0.10257979
## KER 0.10263316 0.09346009 0.1332609 0.04022032 0.10266884 0.07947360
## KRA 0.09933254 0.09153356 0.1250888 0.01891726 0.09497027 0.07282745
## TIK 0.14746567 0.13712217 0.1701660 0.06736327 0.13665857 0.11595669
Import sample locations
sampling_loc <- readRDS(here("output", "sampling_loc_euro_global.rds"))
# Arrange by region
sampling_loc <- sampling_loc |>
dplyr::arrange(
order
)
# Check it
head(sampling_loc)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
Order
## [1] "BER" "COL" "PAL" "HOU" "LOS" "MAU" "REC" "GRV" "FRS" "STS" "POP" "POL"
## [13] "SPB" "SPS" "SPC" "BAR" "SPM" "IMP" "ITG" "BRE" "DES" "TRE" "ITB" "CES"
## [25] "ROM" "ITR" "SIC" "ITP" "MAL" "SLO" "CRO" "ALV" "ALD" "TIR" "SER" "GRA"
## [37] "GRC" "ROS" "BUL" "TUA" "TUH" "SEV" "ALU" "KER" "KRA" "SOC" "TIK" "RAR"
## [49] "GES" "ARM" "KAN" "UTS" "KAG" "OKI" "HAI" "YUN" "HUN" "TAI" "GEL" "BEN"
## [61] "KUN" "KAT" "JAF" "CAM" "SUF" "SUU" "INW" "INJ" "KLP" "MAT" "SSK" "KAC"
## [73] "SON" "CHA" "LAM" "HAN" "HOC" "QNC" "LIB" "MAD" "TRO" "DAU" "JAT" "YAT"
## [85] "GAB" "ANT" "DGV" "VOH" "RAB" "YAO" "AWK" "BRM" "JAM" "SAI" "BEA" "CHI"
## [97] "DAL" "FAY" "MAC" "MAN" "NEO" "NEW" "NUE" "PEO" "RUS" "SPR" "POR" "NOV"
## [109] "TUC" "MED" "AIZ" "HIR" "KHO" "KYO" "NAG" "NIG" "SAG" "SAK" "SEN" "TAN"
LD2_df_ordered <- read_csv(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/LD2_df_odered.csv"
)
## New names:
## Rows: 40 Columns: 41
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (40): FRS, STS, POP, SPB, SPS, SPC, BAR, SPM, IMP, BRE, DES, TRE,
## ITB, C...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
Below doesnt change order Create vector with order of populations
# Extract the populations that appear in LD2_df
populations_in_LD2 <- colnames(LD2_df_ordered)
# Reorder the populations based on order_pops
poporder <- populations_in_LD2[populations_in_LD2 %in% order_pops]
#LD2_df[match(poporder, LD2_df$Abbreviation),] #this also doesn't reorder it
# Print the reordered populations
print(poporder)
## [1] "FRS" "STS" "POP" "SPB" "SPS" "SPC" "BAR" "SPM" "IMP" "BRE" "DES" "TRE"
## [13] "ITB" "CES" "ROM" "ITR" "SIC" "ITP" "MAL" "SLO" "CRO" "ALV" "ALD" "TIR"
## [25] "SER" "GRA" "GRC" "ROS" "BUL" "TUA" "TUH" "SEV" "ALU" "KER" "KRA" "SOC"
## [37] "TIK" "RAR" "GES" "ARM"
Lets check if the matrix is symmetric.
## [1] TRUE
Order the matrix using poporder. We will also add NA on the upper left side of the matrix.
Now we have to convert the matrix to a data frame to plot it with ggplot.
## Var1 Var2 value
## FRS : 40 FRS : 40 Min. :-0.0009
## STS : 40 STS : 40 1st Qu.: 0.0668
## POP : 40 POP : 40 Median : 0.0946
## SPB : 40 SPB : 40 Mean : 0.1004
## SPS : 40 SPS : 40 3rd Qu.: 0.1261
## SPC : 40 SPC : 40 Max. : 0.2844
## (Other):1360 (Other):1360 NA's :820
Now lets plot the data with ggplot. You can click in the little square on the top left of the plot to open it on a new window. It will have the right proportions.
pairfst.f <- ggplot(pairfst.long, aes(Var1, Var2)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(
low = "white",
high = "#71b6ff",
name = "Fst",
na.value = "white",
limits = c(0, 0.5)
) +
scale_x_discrete(position = "top") +
theme_bw() +
geom_text(aes(label = ifelse(
is.na(value), "", formatC(value, digits = 2, format = "f")
)), size = 2) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.y = element_text(hjust = 0)
)
pairfst.f
Save it
# Step 1: Map abbreviation to country
abbreviation_to_country <- sampling_loc %>% dplyr::select(Abbreviation, Country)
# Step 2: Calculate mean Fst for each pair of countries
# Convert the matrix to a data frame and add row names as a new column
fst_df <- as.data.frame(as.matrix(LD2_df))
fst_df$Abbreviation1 <- rownames(fst_df)
# Gather columns into rows
fst_long <- fst_df %>% gather(key = "Abbreviation2", value = "Fst", -Abbreviation1)
# Merge with country mapping
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation1", by.y = "Abbreviation")
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation2", by.y = "Abbreviation", suffixes = c("_1", "_2"))
# Calculate mean Fst for each pair of countries
fst_summary <- fst_long %>%
group_by(Country_1, Country_2) %>%
summarize(Mean_Fst = mean(Fst, na.rm = TRUE), .groups = 'drop') %>%
filter(Country_1 != Country_2)
#save the fst values as csv
write.csv(fst_summary, file = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/fst_summary_MAF1.csv"))
fst_summary_ordered <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/fst_summary_ordered_MAF1.txt")
#fst_summary_ordered <- data.frame(fst_summary_ordered)
fst_summary_ordered <- read.table(fst_summary_ordered,
header=TRUE)
# Convert summary back to a matrix form, avoiding the use of tibbles for row names
fst_matrix_summary <- as.data.frame(spread(fst_summary_ordered, key = Country_2, value = Mean_Fst))
rownames(fst_matrix_summary) <- fst_matrix_summary$Country_1
fst_matrix_summary <- fst_matrix_summary[, -1]
fst_matrix_summary <- as.matrix(fst_matrix_summary)
# Make the matrix symmetric by averaging the off-diagonal elements
symmetric_fst_matrix <- matrix(nrow = nrow(fst_matrix_summary), ncol = ncol(fst_matrix_summary))
rownames(symmetric_fst_matrix) <- rownames(fst_matrix_summary)
colnames(symmetric_fst_matrix) <- colnames(fst_matrix_summary)
for(i in 1:nrow(fst_matrix_summary)) {
for(j in i:nrow(fst_matrix_summary)) {
if (i == j) {
symmetric_fst_matrix[i, j] <- fst_matrix_summary[i, j]
} else {
avg_value <- mean(c(fst_matrix_summary[i, j], fst_matrix_summary[j, i]), na.rm = TRUE)
symmetric_fst_matrix[i, j] <- avg_value
symmetric_fst_matrix[j, i] <- avg_value
}
}
}
# Check if the matrix is symmetric
# print(isSymmetric(symmetric_fst_matrix))
# Your symmetric Fst matrix by country is now in symmetric_fst_matrix
print(symmetric_fst_matrix)
## Albania Armenia Bulgaria Croatia France Georgia
## Albania NA 0.09394606 0.10094454 0.02819142 0.08151350 0.13454883
## Armenia 0.09394606 NA 0.09585657 0.08475669 0.06544647 0.05626819
## Bulgaria 0.10094454 0.09585657 NA 0.08722725 0.07492144 0.13384104
## Croatia 0.02819142 0.08475669 0.08722725 NA 0.06302575 0.12036716
## France 0.08151350 0.06544647 0.07492144 0.06302575 NA 0.10853899
## Georgia 0.13454883 0.05626819 0.13384104 0.12036716 0.10853899 NA
## Greece 0.06201272 0.08205863 0.09300545 0.05544159 0.07064597 0.11792501
## Italy 0.09997730 0.08203692 0.09055454 0.08407101 0.07093337 0.13731351
## Malta 0.06886598 0.06332318 0.05679897 0.05500955 0.03897441 0.10125102
## Portugal 0.07546730 0.05495953 0.06636208 0.06402375 0.04227483 0.10461590
## Romania 0.11103043 0.09451032 0.10293539 0.09925514 0.07490661 0.13912009
## Russia 0.12964785 0.04617807 0.12924704 0.11541609 0.10455781 0.02196892
## Serbia 0.20173826 0.19022765 0.19614135 0.17328819 0.16966823 0.23289996
## Slovenia 0.06612389 0.05361430 0.06530144 0.05729126 0.04390333 0.09665973
## Spain 0.11175243 0.08458340 0.10696192 0.09537204 0.07523979 0.13690047
## Turkey 0.07899673 0.07867139 0.07568412 0.07012296 0.05986011 0.11769763
## Ukraine 0.12249231 0.05873850 0.12119872 0.10928968 0.09552548 0.06165397
## Greece Italy Malta Portugal Romania Russia
## Albania 0.06201272 0.09997730 0.06886598 0.07546730 0.11103043 0.12964785
## Armenia 0.08205863 0.08203692 0.06332318 0.05495953 0.09451032 0.04617807
## Bulgaria 0.09300545 0.09055454 0.05679897 0.06636208 0.10293539 0.12924704
## Croatia 0.05544159 0.08407101 0.05500955 0.06402375 0.09925514 0.11541609
## France 0.07064597 0.07093337 0.03897441 0.04227483 0.07490661 0.10455781
## Georgia 0.11792501 0.13731351 0.10125102 0.10461590 0.13912009 0.02196892
## Greece NA 0.08917035 0.05472746 0.06798589 0.10150763 0.11254358
## Italy 0.08917035 NA 0.04641674 0.04923588 0.09169809 0.13335761
## Malta 0.05472746 0.04641674 NA 0.02975032 0.07357926 0.09679329
## Portugal 0.06798589 0.04923588 0.02975032 NA 0.07259454 0.10008283
## Romania 0.10150763 0.09169809 0.07357926 0.07259454 NA 0.13366776
## Russia 0.11254358 0.13335761 0.09679329 0.10008283 0.13366776 NA
## Serbia 0.18761510 0.19008007 0.16129723 0.16508121 0.20308218 0.22722156
## Slovenia 0.05726520 0.05241481 0.03916767 0.03586147 0.05906033 0.09065635
## Spain 0.09579872 0.09298149 0.07372547 0.06801861 0.10896586 0.13140191
## Turkey 0.07154177 0.06994499 0.04497299 0.05514368 0.09174178 0.11102150
## Ukraine 0.10496079 0.09975371 0.08753557 0.09026470 0.12570020 0.05816269
## Serbia Slovenia Spain Turkey Ukraine
## Albania 0.2017383 0.06612389 0.11175243 0.07899673 0.12249231
## Armenia 0.1902276 0.05361430 0.08458340 0.07867139 0.05873850
## Bulgaria 0.1961414 0.06530144 0.10696192 0.07568412 0.12119872
## Croatia 0.1732882 0.05729126 0.09537204 0.07012296 0.10928968
## France 0.1696682 0.04390333 0.07523979 0.05986011 0.09552548
## Georgia 0.2329000 0.09665973 0.13690047 0.11769763 0.06165397
## Greece 0.1876151 0.05726520 0.09579872 0.07154177 0.10496079
## Italy 0.1900801 0.05241481 0.09298149 0.06994499 0.09975371
## Malta 0.1612972 0.03916767 0.07372547 0.04497299 0.08753557
## Portugal 0.1650812 0.03586147 0.06801861 0.05514368 0.09026470
## Romania 0.2030822 0.05906033 0.10896586 0.09174178 0.12570020
## Russia 0.2272216 0.09065635 0.13140191 0.11102150 0.05816269
## Serbia NA 0.15287729 0.21769108 0.18587606 0.21700387
## Slovenia 0.1528773 NA 0.06748962 0.05168875 0.08308108
## Spain 0.2176911 0.06748962 NA 0.07429569 0.12408194
## Turkey 0.1858761 0.05168875 0.07429569 NA 0.10348110
## Ukraine 0.2170039 0.08308108 0.12408194 0.10348110 NA
# Read the file
country_order <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/country_order.txt")
country_order <- read.table(country_order,
header = FALSE,
col.names = c("country"))
order_countries <- as.vector(country_order$country)
order_countries
## [1] "France" "Portugal" "Spain" "Italy" "Malta" "Slovenia"
## [7] "Croatia" "Albania" "Serbia" "Greece" "Romania" "Bulgaria"
## [13] "Turkey" "Ukraine" "Russia" "Georgia" "Armenia"
write.csv(symmetric_fst_matrix, file = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/symmetric_fst_matrix_MAF1.csv"))
symmetric_fst_matrix_ordered <- read_csv(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/symmetric_fst_matrix_ordered_MAF1.csv"
)
## New names:
## Rows: 17 Columns: 18
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (17): France, Portugal, Spain, Italy, Malta, Slovenia, Croatia,
## Albania,...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
symmetric_fst_matrix_ordered<- as.data.frame(symmetric_fst_matrix_ordered) #convert to dataframe
rownames(symmetric_fst_matrix_ordered) <- symmetric_fst_matrix_ordered$...1 #use first column as rownames
symmetric_fst_matrix_ordered <- symmetric_fst_matrix_ordered[ -c(1) ] #remove 1st column
symmetric_fst_matrix_ordered <-as.matrix(symmetric_fst_matrix_ordered) #convert to matrix
symmetric_fst_matrix_ordered[lower.tri(symmetric_fst_matrix_ordered)] <- NA
print(symmetric_fst_matrix_ordered)
## France Portugal Spain Italy Malta Slovenia
## France NA 0.04227483 0.07523979 0.07093337 0.03897441 0.04390333
## Portugal NA NA 0.06801861 0.04923588 0.02975032 0.03586147
## Spain NA NA NA 0.09298149 0.07372547 0.06748962
## Italy NA NA NA NA 0.04641674 0.05241481
## Malta NA NA NA NA NA 0.03916767
## Slovenia NA NA NA NA NA NA
## Croatia NA NA NA NA NA NA
## Albania NA NA NA NA NA NA
## Serbia NA NA NA NA NA NA
## Greece NA NA NA NA NA NA
## Romania NA NA NA NA NA NA
## Bulgaria NA NA NA NA NA NA
## Turkey NA NA NA NA NA NA
## Ukraine NA NA NA NA NA NA
## Russia NA NA NA NA NA NA
## Georgia NA NA NA NA NA NA
## Armenia NA NA NA NA NA NA
## Croatia Albania Serbia Greece Romania Bulgaria
## France 0.06302575 0.08151350 0.1696682 0.07064597 0.07490661 0.07492144
## Portugal 0.06402375 0.07546730 0.1650812 0.06798589 0.07259454 0.06636208
## Spain 0.09537205 0.11175243 0.2176911 0.09579872 0.10896586 0.10696192
## Italy 0.08407101 0.09997730 0.1900801 0.08917035 0.09169809 0.09055454
## Malta 0.05500955 0.06886598 0.1612972 0.05472746 0.07357926 0.05679897
## Slovenia 0.05729126 0.06612389 0.1528773 0.05726520 0.05906033 0.06530144
## Croatia NA 0.02819142 0.1732882 0.05544159 0.09925514 0.08722725
## Albania NA NA 0.2017383 0.06201272 0.11103043 0.10094454
## Serbia NA NA NA 0.18761510 0.20308218 0.19614135
## Greece NA NA NA NA 0.10150763 0.09300545
## Romania NA NA NA NA NA 0.10293539
## Bulgaria NA NA NA NA NA NA
## Turkey NA NA NA NA NA NA
## Ukraine NA NA NA NA NA NA
## Russia NA NA NA NA NA NA
## Georgia NA NA NA NA NA NA
## Armenia NA NA NA NA NA NA
## Turkey Ukraine Russia Georgia Armenia
## France 0.05986011 0.09552548 0.10455781 0.10853899 0.06544647
## Portugal 0.05514368 0.09026470 0.10008283 0.10461590 0.05495953
## Spain 0.07429569 0.12408194 0.13140191 0.13690047 0.08458340
## Italy 0.06994499 0.09975371 0.13335761 0.13731351 0.08203692
## Malta 0.04497299 0.08753557 0.09679329 0.10125102 0.06332318
## Slovenia 0.05168875 0.08308108 0.09065635 0.09665973 0.05361430
## Croatia 0.07012296 0.10928968 0.11541609 0.12036716 0.08475669
## Albania 0.07899673 0.12249231 0.12964785 0.13454883 0.09394606
## Serbia 0.18587606 0.21700387 0.22722156 0.23289996 0.19022765
## Greece 0.07154177 0.10496079 0.11254358 0.11792501 0.08205863
## Romania 0.09174178 0.12570020 0.13366776 0.13912009 0.09451032
## Bulgaria 0.07568412 0.12119872 0.12924704 0.13384104 0.09585657
## Turkey NA 0.10348110 0.11102150 0.11769763 0.07867139
## Ukraine NA NA 0.05816269 0.06165397 0.05873850
## Russia NA NA NA 0.02196892 0.04617807
## Georgia NA NA NA NA 0.05626819
## Armenia NA NA NA NA NA
Now we have to convert the matrix to a data frame to plot it with ggplot.
## Var1 Var2 value
## France : 17 France : 17 Min. :0.02197
## Portugal: 17 Portugal: 17 1st Qu.:0.06541
## Spain : 17 Spain : 17 Median :0.09041
## Italy : 17 Italy : 17 Mean :0.09651
## Malta : 17 Malta : 17 3rd Qu.:0.11326
## Slovenia: 17 Slovenia: 17 Max. :0.23290
## (Other) :187 (Other) :187 NA's :153
You can click in the little square on the top left of the plot to open it on a new window. It will have the right proportions.
pairfst.f2 <- ggplot(pairfst.long2, aes(Var1, Var2)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(
low = "white",
high = "#71b6ff",
name = "Fst",
na.value = "white",
limits = c(0, 0.5)
) +
scale_x_discrete(position = "top") +
theme_bw() +
geom_text(aes(label = ifelse(
is.na(value), "", formatC(value, digits = 2, format = "f")
)), size = 3) +
theme(
axis.text.x = element_text(angle = 90, hjust = 0),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.y = element_text(hjust = 1)
)
pairfst.f2
ggsave(
filename = here("output", "europe", "fst", "fst_matrix_Europe_by_country_ordered_MAF1.pdf"),
pairfst.f2,
width = 6,
height = 5,
units = "in"
)
Remove NAs and rename columns
# remove NAs
fst2 <-
pairfst.long |>
drop_na()
# rename columns
fst2 <-
fst2 |>
dplyr::rename(pop1 = 1,
pop2 = 2,
fst = 3)
# Split the data into two data frames, one for pop1 and one for pop2
df_pop1 <- fst2 |>
dplyr::select(pop = pop1, fst)
df_pop2 <- fst2 |>
dplyr::select(pop = pop2, fst)
# Combine the two data frames
df_combined <- bind_rows(df_pop1, df_pop2)
# Calculate the mean fst for each population
mean_fst <- df_combined |>
group_by(pop) |>
summarise(mean_fst = mean(fst))
print(mean_fst)
## # A tibble: 40 × 2
## pop mean_fst
## <fct> <dbl>
## 1 FRS 0.0754
## 2 STS 0.0787
## 3 POP 0.0711
## 4 SPB 0.0903
## 5 SPS 0.122
## 6 SPC 0.100
## 7 BAR 0.114
## 8 SPM 0.0916
## 9 IMP 0.0923
## 10 BRE 0.146
## # ℹ 30 more rows
Merge
fst3 <-
sampling_loc |>
left_join(
mean_fst,
by = c("Abbreviation" = "pop")
) |>
drop_na() |>
dplyr::select(
-Subregion
)
# check output
head(fst3)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## Region order order2 orderold mean_fst
## 1 Western Europe 9 1 1 0.07544418
## 2 Western Europe 10 2 2 0.07874215
## 3 Southern Europe 11 3 3 0.07110287
## 4 Southern Europe 13 5 5 0.09032277
## 5 Southern Europe 14 6 6 0.12206649
## 6 Southern Europe 15 7 7 0.10043809
Mean by region
# Group by Region and calculate the mean_fst by Region
region_means <- fst3 |>
group_by(Region) |>
summarize(mean_fst_by_region = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_region column to the fst3 tibble
fst3 <- fst3 |>
left_join(region_means, by = "Region")
# Print the modified fst3 tibble
print(fst3)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## 7 Barcelona Spain 41.38510 2.173400 Europe BAR 2018
## 8 Magaluf Spain 39.50679 2.530729 Europe SPM 2017
## 9 Imperia Italy 43.87159 8.003559 Europe IMP 2017
## 10 Brescia Italy 45.53373 10.204450 Europe BRE 1995
## 11 Desenzano Italy 45.46289 10.549140 Europe DES 1995
## 12 Trentino Italy 46.05917 11.117220 Europe TRE 2020
## 13 Bologna Italy 44.48478 11.366584 Europe ITB 2017
## 14 Cesena Italy 44.15287 12.244265 Europe CES 1995
## 15 Rome (Sapienza) Italy 41.90215 12.517399 Europe ROM 2017
## 16 Rome (Trappola) Italy 41.90215 12.517399 Europe ITR 2013
## 17 Sicilia Italy 38.23294 15.550877 Europe SIC 2016
## 18 Puglia Italy 41.12213 16.844107 Europe ITP 2016
## 19 Luqa Malta 35.86053 14.487028 Europe MAL 2019
## 20 Ajdovscina Slovenia 45.88715 13.770997 Europe SLO 2017
## 21 Dubrovnik Croatia 42.60654 18.226612 Europe CRO 2017
## 22 Vlore Albania 40.46600 19.489700 Europe ALV 2020
## 23 Durres Albania 41.29704 19.503734 Europe ALD 2018
## 24 Tirana Albania 41.31473 19.831716 Europe TIR 2017
## 25 Novi Sad Serbia 45.25887 19.818778 Europe SER 2019
## 26 Athens Greece 37.93719 23.946883 Europe GRA 2019
## 27 Chania Greece 35.51448 24.017960 Europe GRC 2019
## 28 Satu Mare Romania 47.79147 22.890202 Europe ROS 2020
## 29 Lom Bulgaria 43.80489 23.236340 Europe BUL 2019
## 30 Aliaga Turkey 38.76390 26.944800 Europe TUA 2019
## 31 Hopa Turkey 41.38760 41.437800 Europe TUH 2019
## 32 Sevastopol, Crimea Ukraine 44.54125 33.514005 Europe SEV 2021
## 33 Alushta Ukraine 44.68289 34.403681 Europe ALU 2021
## 34 Kerch, Crimea Ukraine 45.35246 36.470150 Europe KER 2021
## 35 Krasnodar Russia 44.95504 39.027817 Europe KRA 2017
## 36 Sochi Russia 43.60042 39.745328 Europe SOC 2021
## 37 Tikhoretsk Russia 45.85460 40.125600 Europe TIK 2021
## 38 Armavir Russia 44.96034 41.133057 Europe RAR 2021
## 39 Sakhumi, Abkhazia Georgia 43.07851 40.887588 Europe GES 2021
## 40 Ijevan Armenia 40.87971 45.147640 Europe ARM 2020
## Region order order2 orderold mean_fst mean_fst_by_region
## 1 Western Europe 9 1 1 0.07544418 0.08
## 2 Western Europe 10 2 2 0.07874215 0.08
## 3 Southern Europe 11 3 3 0.07110287 0.10
## 4 Southern Europe 13 5 5 0.09032277 0.10
## 5 Southern Europe 14 6 6 0.12206649 0.10
## 6 Southern Europe 15 7 7 0.10043809 0.10
## 7 Southern Europe 16 8 8 0.11431214 0.10
## 8 Southern Europe 17 9 9 0.09158751 0.10
## 9 Southern Europe 18 10 10 0.09227643 0.10
## 10 Southern Europe 20 12 12 0.14626402 0.10
## 11 Southern Europe 21 13 13 0.13081478 0.10
## 12 Southern Europe 22 14 14 0.06377881 0.10
## 13 Southern Europe 23 15 15 0.10269616 0.10
## 14 Southern Europe 24 16 16 0.17817200 0.10
## 15 Southern Europe 25 17 17 0.08057195 0.10
## 16 Southern Europe 26 18 18 0.07113428 0.10
## 17 Southern Europe 27 19 19 0.08929935 0.10
## 18 Southern Europe 28 20 20 0.07221817 0.10
## 19 Southern Europe 29 21 21 0.06905326 0.10
## 20 Southern Europe 30 22 22 0.06755041 0.10
## 21 Southern Europe 31 23 23 0.08731382 0.10
## 22 Southern Europe 32 24 24 0.09104076 0.10
## 23 Southern Europe 33 25 25 0.11233310 0.10
## 24 Southern Europe 34 26 26 0.09919701 0.10
## 25 Eastern Europe 35 27 27 0.20349780 0.11
## 26 Southern Europe 36 28 28 0.08888572 0.10
## 27 Southern Europe 37 29 29 0.09286274 0.10
## 28 Eastern Europe 38 30 30 0.10748444 0.11
## 29 Eastern Europe 39 31 31 0.10234234 0.11
## 30 Eastern Europe 40 32 32 0.09220266 0.11
## 31 Eastern Europe 41 33 33 0.07821586 0.11
## 32 Eastern Europe 42 34 34 0.12956210 0.11
## 33 Eastern Europe 43 35 35 0.09680747 0.11
## 34 Eastern Europe 44 36 36 0.09497907 0.11
## 35 Eastern Europe 45 37 37 0.08506636 0.11
## 36 Eastern Europe 46 38 38 0.10803478 0.11
## 37 Eastern Europe 47 39 39 0.12845411 0.11
## 38 Eastern Europe 48 40 40 0.11637869 0.11
## 39 Eastern Europe 49 41 41 0.11376564 0.11
## 40 Eastern Europe 50 42 42 0.08040068 0.11
Mean By country
# Group by Country and calculate the mean_fst by Country
country_means <- fst3 |>
group_by(Country) |>
summarize(mean_fst_by_country = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_country column to the fst3 tibble
fst3 <- fst3 |>
left_join(country_means, by = "Country")
# Print the modified fst3 tibble
print(fst3)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## 7 Barcelona Spain 41.38510 2.173400 Europe BAR 2018
## 8 Magaluf Spain 39.50679 2.530729 Europe SPM 2017
## 9 Imperia Italy 43.87159 8.003559 Europe IMP 2017
## 10 Brescia Italy 45.53373 10.204450 Europe BRE 1995
## 11 Desenzano Italy 45.46289 10.549140 Europe DES 1995
## 12 Trentino Italy 46.05917 11.117220 Europe TRE 2020
## 13 Bologna Italy 44.48478 11.366584 Europe ITB 2017
## 14 Cesena Italy 44.15287 12.244265 Europe CES 1995
## 15 Rome (Sapienza) Italy 41.90215 12.517399 Europe ROM 2017
## 16 Rome (Trappola) Italy 41.90215 12.517399 Europe ITR 2013
## 17 Sicilia Italy 38.23294 15.550877 Europe SIC 2016
## 18 Puglia Italy 41.12213 16.844107 Europe ITP 2016
## 19 Luqa Malta 35.86053 14.487028 Europe MAL 2019
## 20 Ajdovscina Slovenia 45.88715 13.770997 Europe SLO 2017
## 21 Dubrovnik Croatia 42.60654 18.226612 Europe CRO 2017
## 22 Vlore Albania 40.46600 19.489700 Europe ALV 2020
## 23 Durres Albania 41.29704 19.503734 Europe ALD 2018
## 24 Tirana Albania 41.31473 19.831716 Europe TIR 2017
## 25 Novi Sad Serbia 45.25887 19.818778 Europe SER 2019
## 26 Athens Greece 37.93719 23.946883 Europe GRA 2019
## 27 Chania Greece 35.51448 24.017960 Europe GRC 2019
## 28 Satu Mare Romania 47.79147 22.890202 Europe ROS 2020
## 29 Lom Bulgaria 43.80489 23.236340 Europe BUL 2019
## 30 Aliaga Turkey 38.76390 26.944800 Europe TUA 2019
## 31 Hopa Turkey 41.38760 41.437800 Europe TUH 2019
## 32 Sevastopol, Crimea Ukraine 44.54125 33.514005 Europe SEV 2021
## 33 Alushta Ukraine 44.68289 34.403681 Europe ALU 2021
## 34 Kerch, Crimea Ukraine 45.35246 36.470150 Europe KER 2021
## 35 Krasnodar Russia 44.95504 39.027817 Europe KRA 2017
## 36 Sochi Russia 43.60042 39.745328 Europe SOC 2021
## 37 Tikhoretsk Russia 45.85460 40.125600 Europe TIK 2021
## 38 Armavir Russia 44.96034 41.133057 Europe RAR 2021
## 39 Sakhumi, Abkhazia Georgia 43.07851 40.887588 Europe GES 2021
## 40 Ijevan Armenia 40.87971 45.147640 Europe ARM 2020
## Region order order2 orderold mean_fst mean_fst_by_region
## 1 Western Europe 9 1 1 0.07544418 0.08
## 2 Western Europe 10 2 2 0.07874215 0.08
## 3 Southern Europe 11 3 3 0.07110287 0.10
## 4 Southern Europe 13 5 5 0.09032277 0.10
## 5 Southern Europe 14 6 6 0.12206649 0.10
## 6 Southern Europe 15 7 7 0.10043809 0.10
## 7 Southern Europe 16 8 8 0.11431214 0.10
## 8 Southern Europe 17 9 9 0.09158751 0.10
## 9 Southern Europe 18 10 10 0.09227643 0.10
## 10 Southern Europe 20 12 12 0.14626402 0.10
## 11 Southern Europe 21 13 13 0.13081478 0.10
## 12 Southern Europe 22 14 14 0.06377881 0.10
## 13 Southern Europe 23 15 15 0.10269616 0.10
## 14 Southern Europe 24 16 16 0.17817200 0.10
## 15 Southern Europe 25 17 17 0.08057195 0.10
## 16 Southern Europe 26 18 18 0.07113428 0.10
## 17 Southern Europe 27 19 19 0.08929935 0.10
## 18 Southern Europe 28 20 20 0.07221817 0.10
## 19 Southern Europe 29 21 21 0.06905326 0.10
## 20 Southern Europe 30 22 22 0.06755041 0.10
## 21 Southern Europe 31 23 23 0.08731382 0.10
## 22 Southern Europe 32 24 24 0.09104076 0.10
## 23 Southern Europe 33 25 25 0.11233310 0.10
## 24 Southern Europe 34 26 26 0.09919701 0.10
## 25 Eastern Europe 35 27 27 0.20349780 0.11
## 26 Southern Europe 36 28 28 0.08888572 0.10
## 27 Southern Europe 37 29 29 0.09286274 0.10
## 28 Eastern Europe 38 30 30 0.10748444 0.11
## 29 Eastern Europe 39 31 31 0.10234234 0.11
## 30 Eastern Europe 40 32 32 0.09220266 0.11
## 31 Eastern Europe 41 33 33 0.07821586 0.11
## 32 Eastern Europe 42 34 34 0.12956210 0.11
## 33 Eastern Europe 43 35 35 0.09680747 0.11
## 34 Eastern Europe 44 36 36 0.09497907 0.11
## 35 Eastern Europe 45 37 37 0.08506636 0.11
## 36 Eastern Europe 46 38 38 0.10803478 0.11
## 37 Eastern Europe 47 39 39 0.12845411 0.11
## 38 Eastern Europe 48 40 40 0.11637869 0.11
## 39 Eastern Europe 49 41 41 0.11376564 0.11
## 40 Eastern Europe 50 42 42 0.08040068 0.11
## mean_fst_by_country
## 1 0.08
## 2 0.08
## 3 0.07
## 4 0.10
## 5 0.10
## 6 0.10
## 7 0.10
## 8 0.10
## 9 0.10
## 10 0.10
## 11 0.10
## 12 0.10
## 13 0.10
## 14 0.10
## 15 0.10
## 16 0.10
## 17 0.10
## 18 0.10
## 19 0.07
## 20 0.07
## 21 0.09
## 22 0.10
## 23 0.10
## 24 0.10
## 25 0.20
## 26 0.09
## 27 0.09
## 28 0.11
## 29 0.10
## 30 0.09
## 31 0.09
## 32 0.11
## 33 0.11
## 34 0.11
## 35 0.11
## 36 0.11
## 37 0.11
## 38 0.11
## 39 0.11
## 40 0.08
Mean by latitude
# Add a new column to indicate whether the latitude is above or below 30N
fst3 <- fst3 |>
mutate(Latitude_group = ifelse(Latitude >= 40, "Above 40N", "Below 40N"))
# Summarize the data by Latitude_group and calculate the mean_fst
summary_by_latitude <- fst3 |>
group_by(Latitude_group) |>
summarize(mean_fst_by_latitude = mean(mean_fst, na.rm = TRUE)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_latitude column to the fst3 tibble
fst3 <- fst3 |>
left_join(summary_by_latitude, by = "Latitude_group")
# Rename columns
fst3 <- fst3 |>
dplyr::rename(
City = Pop_City)
# Print the modified fst3 tibble
print(fst3)
## City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## 7 Barcelona Spain 41.38510 2.173400 Europe BAR 2018
## 8 Magaluf Spain 39.50679 2.530729 Europe SPM 2017
## 9 Imperia Italy 43.87159 8.003559 Europe IMP 2017
## 10 Brescia Italy 45.53373 10.204450 Europe BRE 1995
## 11 Desenzano Italy 45.46289 10.549140 Europe DES 1995
## 12 Trentino Italy 46.05917 11.117220 Europe TRE 2020
## 13 Bologna Italy 44.48478 11.366584 Europe ITB 2017
## 14 Cesena Italy 44.15287 12.244265 Europe CES 1995
## 15 Rome (Sapienza) Italy 41.90215 12.517399 Europe ROM 2017
## 16 Rome (Trappola) Italy 41.90215 12.517399 Europe ITR 2013
## 17 Sicilia Italy 38.23294 15.550877 Europe SIC 2016
## 18 Puglia Italy 41.12213 16.844107 Europe ITP 2016
## 19 Luqa Malta 35.86053 14.487028 Europe MAL 2019
## 20 Ajdovscina Slovenia 45.88715 13.770997 Europe SLO 2017
## 21 Dubrovnik Croatia 42.60654 18.226612 Europe CRO 2017
## 22 Vlore Albania 40.46600 19.489700 Europe ALV 2020
## 23 Durres Albania 41.29704 19.503734 Europe ALD 2018
## 24 Tirana Albania 41.31473 19.831716 Europe TIR 2017
## 25 Novi Sad Serbia 45.25887 19.818778 Europe SER 2019
## 26 Athens Greece 37.93719 23.946883 Europe GRA 2019
## 27 Chania Greece 35.51448 24.017960 Europe GRC 2019
## 28 Satu Mare Romania 47.79147 22.890202 Europe ROS 2020
## 29 Lom Bulgaria 43.80489 23.236340 Europe BUL 2019
## 30 Aliaga Turkey 38.76390 26.944800 Europe TUA 2019
## 31 Hopa Turkey 41.38760 41.437800 Europe TUH 2019
## 32 Sevastopol, Crimea Ukraine 44.54125 33.514005 Europe SEV 2021
## 33 Alushta Ukraine 44.68289 34.403681 Europe ALU 2021
## 34 Kerch, Crimea Ukraine 45.35246 36.470150 Europe KER 2021
## 35 Krasnodar Russia 44.95504 39.027817 Europe KRA 2017
## 36 Sochi Russia 43.60042 39.745328 Europe SOC 2021
## 37 Tikhoretsk Russia 45.85460 40.125600 Europe TIK 2021
## 38 Armavir Russia 44.96034 41.133057 Europe RAR 2021
## 39 Sakhumi, Abkhazia Georgia 43.07851 40.887588 Europe GES 2021
## 40 Ijevan Armenia 40.87971 45.147640 Europe ARM 2020
## Region order order2 orderold mean_fst mean_fst_by_region
## 1 Western Europe 9 1 1 0.07544418 0.08
## 2 Western Europe 10 2 2 0.07874215 0.08
## 3 Southern Europe 11 3 3 0.07110287 0.10
## 4 Southern Europe 13 5 5 0.09032277 0.10
## 5 Southern Europe 14 6 6 0.12206649 0.10
## 6 Southern Europe 15 7 7 0.10043809 0.10
## 7 Southern Europe 16 8 8 0.11431214 0.10
## 8 Southern Europe 17 9 9 0.09158751 0.10
## 9 Southern Europe 18 10 10 0.09227643 0.10
## 10 Southern Europe 20 12 12 0.14626402 0.10
## 11 Southern Europe 21 13 13 0.13081478 0.10
## 12 Southern Europe 22 14 14 0.06377881 0.10
## 13 Southern Europe 23 15 15 0.10269616 0.10
## 14 Southern Europe 24 16 16 0.17817200 0.10
## 15 Southern Europe 25 17 17 0.08057195 0.10
## 16 Southern Europe 26 18 18 0.07113428 0.10
## 17 Southern Europe 27 19 19 0.08929935 0.10
## 18 Southern Europe 28 20 20 0.07221817 0.10
## 19 Southern Europe 29 21 21 0.06905326 0.10
## 20 Southern Europe 30 22 22 0.06755041 0.10
## 21 Southern Europe 31 23 23 0.08731382 0.10
## 22 Southern Europe 32 24 24 0.09104076 0.10
## 23 Southern Europe 33 25 25 0.11233310 0.10
## 24 Southern Europe 34 26 26 0.09919701 0.10
## 25 Eastern Europe 35 27 27 0.20349780 0.11
## 26 Southern Europe 36 28 28 0.08888572 0.10
## 27 Southern Europe 37 29 29 0.09286274 0.10
## 28 Eastern Europe 38 30 30 0.10748444 0.11
## 29 Eastern Europe 39 31 31 0.10234234 0.11
## 30 Eastern Europe 40 32 32 0.09220266 0.11
## 31 Eastern Europe 41 33 33 0.07821586 0.11
## 32 Eastern Europe 42 34 34 0.12956210 0.11
## 33 Eastern Europe 43 35 35 0.09680747 0.11
## 34 Eastern Europe 44 36 36 0.09497907 0.11
## 35 Eastern Europe 45 37 37 0.08506636 0.11
## 36 Eastern Europe 46 38 38 0.10803478 0.11
## 37 Eastern Europe 47 39 39 0.12845411 0.11
## 38 Eastern Europe 48 40 40 0.11637869 0.11
## 39 Eastern Europe 49 41 41 0.11376564 0.11
## 40 Eastern Europe 50 42 42 0.08040068 0.11
## mean_fst_by_country Latitude_group mean_fst_by_latitude
## 1 0.08 Above 40N 0.10257911
## 2 0.08 Above 40N 0.10257911
## 3 0.07 Above 40N 0.10257911
## 4 0.10 Below 40N 0.09296873
## 5 0.10 Below 40N 0.09296873
## 6 0.10 Below 40N 0.09296873
## 7 0.10 Above 40N 0.10257911
## 8 0.10 Below 40N 0.09296873
## 9 0.10 Above 40N 0.10257911
## 10 0.10 Above 40N 0.10257911
## 11 0.10 Above 40N 0.10257911
## 12 0.10 Above 40N 0.10257911
## 13 0.10 Above 40N 0.10257911
## 14 0.10 Above 40N 0.10257911
## 15 0.10 Above 40N 0.10257911
## 16 0.10 Above 40N 0.10257911
## 17 0.10 Below 40N 0.09296873
## 18 0.10 Above 40N 0.10257911
## 19 0.07 Below 40N 0.09296873
## 20 0.07 Above 40N 0.10257911
## 21 0.09 Above 40N 0.10257911
## 22 0.10 Above 40N 0.10257911
## 23 0.10 Above 40N 0.10257911
## 24 0.10 Above 40N 0.10257911
## 25 0.20 Above 40N 0.10257911
## 26 0.09 Below 40N 0.09296873
## 27 0.09 Below 40N 0.09296873
## 28 0.11 Above 40N 0.10257911
## 29 0.10 Above 40N 0.10257911
## 30 0.09 Below 40N 0.09296873
## 31 0.09 Above 40N 0.10257911
## 32 0.11 Above 40N 0.10257911
## 33 0.11 Above 40N 0.10257911
## 34 0.11 Above 40N 0.10257911
## 35 0.11 Above 40N 0.10257911
## 36 0.11 Above 40N 0.10257911
## 37 0.11 Above 40N 0.10257911
## 38 0.11 Above 40N 0.10257911
## 39 0.11 Above 40N 0.10257911
## 40 0.08 Above 40N 0.10257911
fst4 <- fst3 |>
dplyr::select(
Latitude_group, mean_fst_by_latitude, Region, mean_fst_by_region, Country, mean_fst_by_country, City, Abbreviation, mean_fst,
)
fst4 <- fst4 |>
arrange(
Latitude_group, Region, Country, City
)
# Round
fst4 <- fst4 |>
mutate_if(is.numeric, ~ round(., 2))
head(fst4)
## Latitude_group mean_fst_by_latitude Region mean_fst_by_region
## 1 Above 40N 0.1 Eastern Europe 0.11
## 2 Above 40N 0.1 Eastern Europe 0.11
## 3 Above 40N 0.1 Eastern Europe 0.11
## 4 Above 40N 0.1 Eastern Europe 0.11
## 5 Above 40N 0.1 Eastern Europe 0.11
## 6 Above 40N 0.1 Eastern Europe 0.11
## Country mean_fst_by_country City Abbreviation mean_fst
## 1 Armenia 0.08 Ijevan ARM 0.08
## 2 Bulgaria 0.10 Lom BUL 0.10
## 3 Georgia 0.11 Sakhumi, Abkhazia GES 0.11
## 4 Romania 0.11 Satu Mare ROS 0.11
## 5 Russia 0.11 Armavir RAR 0.12
## 6 Russia 0.11 Krasnodar KRA 0.09
# Set theme if you want to use something different from the previous table
set_flextable_defaults(
font.family = "Arial",
font.size = 9,
big.mark = ",",
theme_fun = "theme_zebra" # try the themes: theme_alafoli(), theme_apa(), theme_booktabs(), theme_box(), theme_tron_legacy(), theme_tron(), theme_vader(), theme_vanilla(), theme_zebra()
)
# Then create the flextable object
flex_table <- flextable(fst4) |>
set_caption(caption = as_paragraph(
as_chunk(
"Table 1. Fst values for Europe using SNP Set 3.",
props = fp_text_default(color = "#000000", font.size = 14)
)
),
fp_p = fp_par(text.align = "center", padding = 5))
# Print the flextable
flex_table
Latitude_group | mean_fst_by_latitude | Region | mean_fst_by_region | Country | mean_fst_by_country | City | Abbreviation | mean_fst |
---|---|---|---|---|---|---|---|---|
Above 40N | 0.10 | Eastern Europe | 0.11 | Armenia | 0.08 | Ijevan | ARM | 0.08 |
Above 40N | 0.10 | Eastern Europe | 0.11 | Bulgaria | 0.10 | Lom | BUL | 0.10 |
Above 40N | 0.10 | Eastern Europe | 0.11 | Georgia | 0.11 | Sakhumi, Abkhazia | GES | 0.11 |
Above 40N | 0.10 | Eastern Europe | 0.11 | Romania | 0.11 | Satu Mare | ROS | 0.11 |
Above 40N | 0.10 | Eastern Europe | 0.11 | Russia | 0.11 | Armavir | RAR | 0.12 |
Above 40N | 0.10 | Eastern Europe | 0.11 | Russia | 0.11 | Krasnodar | KRA | 0.09 |
Above 40N | 0.10 | Eastern Europe | 0.11 | Russia | 0.11 | Sochi | SOC | 0.11 |
Above 40N | 0.10 | Eastern Europe | 0.11 | Russia | 0.11 | Tikhoretsk | TIK | 0.13 |
Above 40N | 0.10 | Eastern Europe | 0.11 | Serbia | 0.20 | Novi Sad | SER | 0.20 |
Above 40N | 0.10 | Eastern Europe | 0.11 | Turkey | 0.09 | Hopa | TUH | 0.08 |
Above 40N | 0.10 | Eastern Europe | 0.11 | Ukraine | 0.11 | Alushta | ALU | 0.10 |
Above 40N | 0.10 | Eastern Europe | 0.11 | Ukraine | 0.11 | Kerch, Crimea | KER | 0.09 |
Above 40N | 0.10 | Eastern Europe | 0.11 | Ukraine | 0.11 | Sevastopol, Crimea | SEV | 0.13 |
Above 40N | 0.10 | Southern Europe | 0.10 | Albania | 0.10 | Durres | ALD | 0.11 |
Above 40N | 0.10 | Southern Europe | 0.10 | Albania | 0.10 | Tirana | TIR | 0.10 |
Above 40N | 0.10 | Southern Europe | 0.10 | Albania | 0.10 | Vlore | ALV | 0.09 |
Above 40N | 0.10 | Southern Europe | 0.10 | Croatia | 0.09 | Dubrovnik | CRO | 0.09 |
Above 40N | 0.10 | Southern Europe | 0.10 | Italy | 0.10 | Bologna | ITB | 0.10 |
Above 40N | 0.10 | Southern Europe | 0.10 | Italy | 0.10 | Brescia | BRE | 0.15 |
Above 40N | 0.10 | Southern Europe | 0.10 | Italy | 0.10 | Cesena | CES | 0.18 |
Above 40N | 0.10 | Southern Europe | 0.10 | Italy | 0.10 | Desenzano | DES | 0.13 |
Above 40N | 0.10 | Southern Europe | 0.10 | Italy | 0.10 | Imperia | IMP | 0.09 |
Above 40N | 0.10 | Southern Europe | 0.10 | Italy | 0.10 | Puglia | ITP | 0.07 |
Above 40N | 0.10 | Southern Europe | 0.10 | Italy | 0.10 | Rome (Sapienza) | ROM | 0.08 |
Above 40N | 0.10 | Southern Europe | 0.10 | Italy | 0.10 | Rome (Trappola) | ITR | 0.07 |
Above 40N | 0.10 | Southern Europe | 0.10 | Italy | 0.10 | Trentino | TRE | 0.06 |
Above 40N | 0.10 | Southern Europe | 0.10 | Portugal | 0.07 | Penafiel | POP | 0.07 |
Above 40N | 0.10 | Southern Europe | 0.10 | Slovenia | 0.07 | Ajdovscina | SLO | 0.07 |
Above 40N | 0.10 | Southern Europe | 0.10 | Spain | 0.10 | Barcelona | BAR | 0.11 |
Above 40N | 0.10 | Western Europe | 0.08 | France | 0.08 | Saint-Martin-d'Heres | FRS | 0.08 |
Above 40N | 0.10 | Western Europe | 0.08 | France | 0.08 | Strasbourg | STS | 0.08 |
Below 40N | 0.09 | Eastern Europe | 0.11 | Turkey | 0.09 | Aliaga | TUA | 0.09 |
Below 40N | 0.09 | Southern Europe | 0.10 | Greece | 0.09 | Athens | GRA | 0.09 |
Below 40N | 0.09 | Southern Europe | 0.10 | Greece | 0.09 | Chania | GRC | 0.09 |
Below 40N | 0.09 | Southern Europe | 0.10 | Italy | 0.10 | Sicilia | SIC | 0.09 |
Below 40N | 0.09 | Southern Europe | 0.10 | Malta | 0.07 | Luqa | MAL | 0.07 |
Below 40N | 0.09 | Southern Europe | 0.10 | Spain | 0.10 | Badajoz | SPB | 0.09 |
Below 40N | 0.09 | Southern Europe | 0.10 | Spain | 0.10 | Catarroja | SPC | 0.10 |
Below 40N | 0.09 | Southern Europe | 0.10 | Spain | 0.10 | Magaluf | SPM | 0.09 |
Below 40N | 0.09 | Southern Europe | 0.10 | Spain | 0.10 | San Roque | SPS | 0.12 |
# Initialize Word document
doc <-
read_docx() |>
body_add_flextable(value = flex_table)
# Define the output path with 'here' library
output_path <- here(
"output",
"europe",
"fst",
"fst_Europe_MAF1.docx"
)
# Save the Word document
print(doc, target = output_path)
Without latitude
Merge
fst3 <-
sampling_loc |>
left_join(
mean_fst,
by = c("Abbreviation" = "pop")
) |>
drop_na() |>
dplyr::select(
-Subregion
)
# check output
head(fst3)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## Region order order2 orderold mean_fst
## 1 Western Europe 9 1 1 0.07544418
## 2 Western Europe 10 2 2 0.07874215
## 3 Southern Europe 11 3 3 0.07110287
## 4 Southern Europe 13 5 5 0.09032277
## 5 Southern Europe 14 6 6 0.12206649
## 6 Southern Europe 15 7 7 0.10043809
Mean by region
# Group by Region and calculate the mean_fst by Region
region_means <- fst3 |>
group_by(Region) |>
summarize(mean_fst_by_region = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_region column to the fst3 tibble
fst3 <- fst3 |>
left_join(region_means, by = "Region")
# Print the modified fst3 tibble
print(fst3)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## 7 Barcelona Spain 41.38510 2.173400 Europe BAR 2018
## 8 Magaluf Spain 39.50679 2.530729 Europe SPM 2017
## 9 Imperia Italy 43.87159 8.003559 Europe IMP 2017
## 10 Brescia Italy 45.53373 10.204450 Europe BRE 1995
## 11 Desenzano Italy 45.46289 10.549140 Europe DES 1995
## 12 Trentino Italy 46.05917 11.117220 Europe TRE 2020
## 13 Bologna Italy 44.48478 11.366584 Europe ITB 2017
## 14 Cesena Italy 44.15287 12.244265 Europe CES 1995
## 15 Rome (Sapienza) Italy 41.90215 12.517399 Europe ROM 2017
## 16 Rome (Trappola) Italy 41.90215 12.517399 Europe ITR 2013
## 17 Sicilia Italy 38.23294 15.550877 Europe SIC 2016
## 18 Puglia Italy 41.12213 16.844107 Europe ITP 2016
## 19 Luqa Malta 35.86053 14.487028 Europe MAL 2019
## 20 Ajdovscina Slovenia 45.88715 13.770997 Europe SLO 2017
## 21 Dubrovnik Croatia 42.60654 18.226612 Europe CRO 2017
## 22 Vlore Albania 40.46600 19.489700 Europe ALV 2020
## 23 Durres Albania 41.29704 19.503734 Europe ALD 2018
## 24 Tirana Albania 41.31473 19.831716 Europe TIR 2017
## 25 Novi Sad Serbia 45.25887 19.818778 Europe SER 2019
## 26 Athens Greece 37.93719 23.946883 Europe GRA 2019
## 27 Chania Greece 35.51448 24.017960 Europe GRC 2019
## 28 Satu Mare Romania 47.79147 22.890202 Europe ROS 2020
## 29 Lom Bulgaria 43.80489 23.236340 Europe BUL 2019
## 30 Aliaga Turkey 38.76390 26.944800 Europe TUA 2019
## 31 Hopa Turkey 41.38760 41.437800 Europe TUH 2019
## 32 Sevastopol, Crimea Ukraine 44.54125 33.514005 Europe SEV 2021
## 33 Alushta Ukraine 44.68289 34.403681 Europe ALU 2021
## 34 Kerch, Crimea Ukraine 45.35246 36.470150 Europe KER 2021
## 35 Krasnodar Russia 44.95504 39.027817 Europe KRA 2017
## 36 Sochi Russia 43.60042 39.745328 Europe SOC 2021
## 37 Tikhoretsk Russia 45.85460 40.125600 Europe TIK 2021
## 38 Armavir Russia 44.96034 41.133057 Europe RAR 2021
## 39 Sakhumi, Abkhazia Georgia 43.07851 40.887588 Europe GES 2021
## 40 Ijevan Armenia 40.87971 45.147640 Europe ARM 2020
## Region order order2 orderold mean_fst mean_fst_by_region
## 1 Western Europe 9 1 1 0.07544418 0.08
## 2 Western Europe 10 2 2 0.07874215 0.08
## 3 Southern Europe 11 3 3 0.07110287 0.10
## 4 Southern Europe 13 5 5 0.09032277 0.10
## 5 Southern Europe 14 6 6 0.12206649 0.10
## 6 Southern Europe 15 7 7 0.10043809 0.10
## 7 Southern Europe 16 8 8 0.11431214 0.10
## 8 Southern Europe 17 9 9 0.09158751 0.10
## 9 Southern Europe 18 10 10 0.09227643 0.10
## 10 Southern Europe 20 12 12 0.14626402 0.10
## 11 Southern Europe 21 13 13 0.13081478 0.10
## 12 Southern Europe 22 14 14 0.06377881 0.10
## 13 Southern Europe 23 15 15 0.10269616 0.10
## 14 Southern Europe 24 16 16 0.17817200 0.10
## 15 Southern Europe 25 17 17 0.08057195 0.10
## 16 Southern Europe 26 18 18 0.07113428 0.10
## 17 Southern Europe 27 19 19 0.08929935 0.10
## 18 Southern Europe 28 20 20 0.07221817 0.10
## 19 Southern Europe 29 21 21 0.06905326 0.10
## 20 Southern Europe 30 22 22 0.06755041 0.10
## 21 Southern Europe 31 23 23 0.08731382 0.10
## 22 Southern Europe 32 24 24 0.09104076 0.10
## 23 Southern Europe 33 25 25 0.11233310 0.10
## 24 Southern Europe 34 26 26 0.09919701 0.10
## 25 Eastern Europe 35 27 27 0.20349780 0.11
## 26 Southern Europe 36 28 28 0.08888572 0.10
## 27 Southern Europe 37 29 29 0.09286274 0.10
## 28 Eastern Europe 38 30 30 0.10748444 0.11
## 29 Eastern Europe 39 31 31 0.10234234 0.11
## 30 Eastern Europe 40 32 32 0.09220266 0.11
## 31 Eastern Europe 41 33 33 0.07821586 0.11
## 32 Eastern Europe 42 34 34 0.12956210 0.11
## 33 Eastern Europe 43 35 35 0.09680747 0.11
## 34 Eastern Europe 44 36 36 0.09497907 0.11
## 35 Eastern Europe 45 37 37 0.08506636 0.11
## 36 Eastern Europe 46 38 38 0.10803478 0.11
## 37 Eastern Europe 47 39 39 0.12845411 0.11
## 38 Eastern Europe 48 40 40 0.11637869 0.11
## 39 Eastern Europe 49 41 41 0.11376564 0.11
## 40 Eastern Europe 50 42 42 0.08040068 0.11
Mean By country
# Group by Country and calculate the mean_fst by Country
country_means <- fst3 |>
group_by(Country) |>
summarize(mean_fst_by_country = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_country column to the fst3 tibble
fst3 <- fst3 |>
left_join(country_means, by = "Country")
# Print the modified fst3 tibble
print(fst3)
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 Europe FRS 2019
## 2 Strasbourg France 48.61124 7.754512 Europe STS 2019
## 3 Penafiel Portugal 41.18555 -8.329371 Europe POP 2017
## 4 Badajoz Spain 38.86622 -6.974194 Europe SPB 2018
## 5 San Roque Spain 36.17042 -5.371530 Europe SPS 2017
## 6 Catarroja Spain 39.40294 -0.395514 Europe SPC 2017
## 7 Barcelona Spain 41.38510 2.173400 Europe BAR 2018
## 8 Magaluf Spain 39.50679 2.530729 Europe SPM 2017
## 9 Imperia Italy 43.87159 8.003559 Europe IMP 2017
## 10 Brescia Italy 45.53373 10.204450 Europe BRE 1995
## 11 Desenzano Italy 45.46289 10.549140 Europe DES 1995
## 12 Trentino Italy 46.05917 11.117220 Europe TRE 2020
## 13 Bologna Italy 44.48478 11.366584 Europe ITB 2017
## 14 Cesena Italy 44.15287 12.244265 Europe CES 1995
## 15 Rome (Sapienza) Italy 41.90215 12.517399 Europe ROM 2017
## 16 Rome (Trappola) Italy 41.90215 12.517399 Europe ITR 2013
## 17 Sicilia Italy 38.23294 15.550877 Europe SIC 2016
## 18 Puglia Italy 41.12213 16.844107 Europe ITP 2016
## 19 Luqa Malta 35.86053 14.487028 Europe MAL 2019
## 20 Ajdovscina Slovenia 45.88715 13.770997 Europe SLO 2017
## 21 Dubrovnik Croatia 42.60654 18.226612 Europe CRO 2017
## 22 Vlore Albania 40.46600 19.489700 Europe ALV 2020
## 23 Durres Albania 41.29704 19.503734 Europe ALD 2018
## 24 Tirana Albania 41.31473 19.831716 Europe TIR 2017
## 25 Novi Sad Serbia 45.25887 19.818778 Europe SER 2019
## 26 Athens Greece 37.93719 23.946883 Europe GRA 2019
## 27 Chania Greece 35.51448 24.017960 Europe GRC 2019
## 28 Satu Mare Romania 47.79147 22.890202 Europe ROS 2020
## 29 Lom Bulgaria 43.80489 23.236340 Europe BUL 2019
## 30 Aliaga Turkey 38.76390 26.944800 Europe TUA 2019
## 31 Hopa Turkey 41.38760 41.437800 Europe TUH 2019
## 32 Sevastopol, Crimea Ukraine 44.54125 33.514005 Europe SEV 2021
## 33 Alushta Ukraine 44.68289 34.403681 Europe ALU 2021
## 34 Kerch, Crimea Ukraine 45.35246 36.470150 Europe KER 2021
## 35 Krasnodar Russia 44.95504 39.027817 Europe KRA 2017
## 36 Sochi Russia 43.60042 39.745328 Europe SOC 2021
## 37 Tikhoretsk Russia 45.85460 40.125600 Europe TIK 2021
## 38 Armavir Russia 44.96034 41.133057 Europe RAR 2021
## 39 Sakhumi, Abkhazia Georgia 43.07851 40.887588 Europe GES 2021
## 40 Ijevan Armenia 40.87971 45.147640 Europe ARM 2020
## Region order order2 orderold mean_fst mean_fst_by_region
## 1 Western Europe 9 1 1 0.07544418 0.08
## 2 Western Europe 10 2 2 0.07874215 0.08
## 3 Southern Europe 11 3 3 0.07110287 0.10
## 4 Southern Europe 13 5 5 0.09032277 0.10
## 5 Southern Europe 14 6 6 0.12206649 0.10
## 6 Southern Europe 15 7 7 0.10043809 0.10
## 7 Southern Europe 16 8 8 0.11431214 0.10
## 8 Southern Europe 17 9 9 0.09158751 0.10
## 9 Southern Europe 18 10 10 0.09227643 0.10
## 10 Southern Europe 20 12 12 0.14626402 0.10
## 11 Southern Europe 21 13 13 0.13081478 0.10
## 12 Southern Europe 22 14 14 0.06377881 0.10
## 13 Southern Europe 23 15 15 0.10269616 0.10
## 14 Southern Europe 24 16 16 0.17817200 0.10
## 15 Southern Europe 25 17 17 0.08057195 0.10
## 16 Southern Europe 26 18 18 0.07113428 0.10
## 17 Southern Europe 27 19 19 0.08929935 0.10
## 18 Southern Europe 28 20 20 0.07221817 0.10
## 19 Southern Europe 29 21 21 0.06905326 0.10
## 20 Southern Europe 30 22 22 0.06755041 0.10
## 21 Southern Europe 31 23 23 0.08731382 0.10
## 22 Southern Europe 32 24 24 0.09104076 0.10
## 23 Southern Europe 33 25 25 0.11233310 0.10
## 24 Southern Europe 34 26 26 0.09919701 0.10
## 25 Eastern Europe 35 27 27 0.20349780 0.11
## 26 Southern Europe 36 28 28 0.08888572 0.10
## 27 Southern Europe 37 29 29 0.09286274 0.10
## 28 Eastern Europe 38 30 30 0.10748444 0.11
## 29 Eastern Europe 39 31 31 0.10234234 0.11
## 30 Eastern Europe 40 32 32 0.09220266 0.11
## 31 Eastern Europe 41 33 33 0.07821586 0.11
## 32 Eastern Europe 42 34 34 0.12956210 0.11
## 33 Eastern Europe 43 35 35 0.09680747 0.11
## 34 Eastern Europe 44 36 36 0.09497907 0.11
## 35 Eastern Europe 45 37 37 0.08506636 0.11
## 36 Eastern Europe 46 38 38 0.10803478 0.11
## 37 Eastern Europe 47 39 39 0.12845411 0.11
## 38 Eastern Europe 48 40 40 0.11637869 0.11
## 39 Eastern Europe 49 41 41 0.11376564 0.11
## 40 Eastern Europe 50 42 42 0.08040068 0.11
## mean_fst_by_country
## 1 0.08
## 2 0.08
## 3 0.07
## 4 0.10
## 5 0.10
## 6 0.10
## 7 0.10
## 8 0.10
## 9 0.10
## 10 0.10
## 11 0.10
## 12 0.10
## 13 0.10
## 14 0.10
## 15 0.10
## 16 0.10
## 17 0.10
## 18 0.10
## 19 0.07
## 20 0.07
## 21 0.09
## 22 0.10
## 23 0.10
## 24 0.10
## 25 0.20
## 26 0.09
## 27 0.09
## 28 0.11
## 29 0.10
## 30 0.09
## 31 0.09
## 32 0.11
## 33 0.11
## 34 0.11
## 35 0.11
## 36 0.11
## 37 0.11
## 38 0.11
## 39 0.11
## 40 0.08
fst3 <- fst3 |>
dplyr::rename(
City = Pop_City)
fst4 <- fst3 |>
dplyr::select(
Region, mean_fst_by_region, Country, mean_fst_by_country, City, Abbreviation, mean_fst,
)
fst4 <- fst4 |>
arrange(
Region, Country, City
)
# Round
fst4 <- fst4 |>
mutate_if(is.numeric, ~ round(., 2))
head(fst4)
## Region mean_fst_by_region Country mean_fst_by_country
## 1 Eastern Europe 0.11 Armenia 0.08
## 2 Eastern Europe 0.11 Bulgaria 0.10
## 3 Eastern Europe 0.11 Georgia 0.11
## 4 Eastern Europe 0.11 Romania 0.11
## 5 Eastern Europe 0.11 Russia 0.11
## 6 Eastern Europe 0.11 Russia 0.11
## City Abbreviation mean_fst
## 1 Ijevan ARM 0.08
## 2 Lom BUL 0.10
## 3 Sakhumi, Abkhazia GES 0.11
## 4 Satu Mare ROS 0.11
## 5 Armavir RAR 0.12
## 6 Krasnodar KRA 0.09
# Set theme if you want to use something different from the previous table
set_flextable_defaults(
font.family = "Arial",
font.size = 9,
big.mark = ",",
theme_fun = "theme_zebra" # try the themes: theme_alafoli(), theme_apa(), theme_booktabs(), theme_box(), theme_tron_legacy(), theme_tron(), theme_vader(), theme_vanilla(), theme_zebra()
)
# Then create the flextable object
flex_table <- flextable(fst4) |>
set_caption(caption = as_paragraph(
as_chunk(
"Table 1. Fst values for Europe using SNP Set 3.",
props = fp_text_default(color = "#000000", font.size = 14)
)
),
fp_p = fp_par(text.align = "center", padding = 5))
# Print the flextable
flex_table
Region | mean_fst_by_region | Country | mean_fst_by_country | City | Abbreviation | mean_fst |
---|---|---|---|---|---|---|
Eastern Europe | 0.11 | Armenia | 0.08 | Ijevan | ARM | 0.08 |
Eastern Europe | 0.11 | Bulgaria | 0.10 | Lom | BUL | 0.10 |
Eastern Europe | 0.11 | Georgia | 0.11 | Sakhumi, Abkhazia | GES | 0.11 |
Eastern Europe | 0.11 | Romania | 0.11 | Satu Mare | ROS | 0.11 |
Eastern Europe | 0.11 | Russia | 0.11 | Armavir | RAR | 0.12 |
Eastern Europe | 0.11 | Russia | 0.11 | Krasnodar | KRA | 0.09 |
Eastern Europe | 0.11 | Russia | 0.11 | Sochi | SOC | 0.11 |
Eastern Europe | 0.11 | Russia | 0.11 | Tikhoretsk | TIK | 0.13 |
Eastern Europe | 0.11 | Serbia | 0.20 | Novi Sad | SER | 0.20 |
Eastern Europe | 0.11 | Turkey | 0.09 | Aliaga | TUA | 0.09 |
Eastern Europe | 0.11 | Turkey | 0.09 | Hopa | TUH | 0.08 |
Eastern Europe | 0.11 | Ukraine | 0.11 | Alushta | ALU | 0.10 |
Eastern Europe | 0.11 | Ukraine | 0.11 | Kerch, Crimea | KER | 0.09 |
Eastern Europe | 0.11 | Ukraine | 0.11 | Sevastopol, Crimea | SEV | 0.13 |
Southern Europe | 0.10 | Albania | 0.10 | Durres | ALD | 0.11 |
Southern Europe | 0.10 | Albania | 0.10 | Tirana | TIR | 0.10 |
Southern Europe | 0.10 | Albania | 0.10 | Vlore | ALV | 0.09 |
Southern Europe | 0.10 | Croatia | 0.09 | Dubrovnik | CRO | 0.09 |
Southern Europe | 0.10 | Greece | 0.09 | Athens | GRA | 0.09 |
Southern Europe | 0.10 | Greece | 0.09 | Chania | GRC | 0.09 |
Southern Europe | 0.10 | Italy | 0.10 | Bologna | ITB | 0.10 |
Southern Europe | 0.10 | Italy | 0.10 | Brescia | BRE | 0.15 |
Southern Europe | 0.10 | Italy | 0.10 | Cesena | CES | 0.18 |
Southern Europe | 0.10 | Italy | 0.10 | Desenzano | DES | 0.13 |
Southern Europe | 0.10 | Italy | 0.10 | Imperia | IMP | 0.09 |
Southern Europe | 0.10 | Italy | 0.10 | Puglia | ITP | 0.07 |
Southern Europe | 0.10 | Italy | 0.10 | Rome (Sapienza) | ROM | 0.08 |
Southern Europe | 0.10 | Italy | 0.10 | Rome (Trappola) | ITR | 0.07 |
Southern Europe | 0.10 | Italy | 0.10 | Sicilia | SIC | 0.09 |
Southern Europe | 0.10 | Italy | 0.10 | Trentino | TRE | 0.06 |
Southern Europe | 0.10 | Malta | 0.07 | Luqa | MAL | 0.07 |
Southern Europe | 0.10 | Portugal | 0.07 | Penafiel | POP | 0.07 |
Southern Europe | 0.10 | Slovenia | 0.07 | Ajdovscina | SLO | 0.07 |
Southern Europe | 0.10 | Spain | 0.10 | Badajoz | SPB | 0.09 |
Southern Europe | 0.10 | Spain | 0.10 | Barcelona | BAR | 0.11 |
Southern Europe | 0.10 | Spain | 0.10 | Catarroja | SPC | 0.10 |
Southern Europe | 0.10 | Spain | 0.10 | Magaluf | SPM | 0.09 |
Southern Europe | 0.10 | Spain | 0.10 | San Roque | SPS | 0.12 |
Western Europe | 0.08 | France | 0.08 | Saint-Martin-d'Heres | FRS | 0.08 |
Western Europe | 0.08 | France | 0.08 | Strasbourg | STS | 0.08 |
# Initialize Word document
doc <-
read_docx() |>
body_add_flextable(value = flex_table)
# Define the output path with 'here' library
output_path <- here(
"output",
"europe",
"fst",
"fst_Europe_MAF1_no_lat.docx"
)
# Save the Word document
print(doc, target = output_path)
To make scatter plot
# Group by Country and calculate the mean for mean_fst_by_country
aggregated_data <- fst4 |>
dplyr::group_by(Country) |>
dplyr::summarise(mean_fst = mean(mean_fst_by_country, na.rm = TRUE))
# save the data
saveRDS(aggregated_data, here(
"output", "europe", "fst", "fst_country_MAF1.rds"
))
# Order the aggregated data
aggregated_data <- aggregated_data[order(aggregated_data$mean_fst), ]
# Assign a numeric index for plotting
aggregated_data$index <- 1:nrow(aggregated_data)
# Fit a linear model
lm_fit <- lm(mean_fst ~ index, data = aggregated_data)
# Predicted values from the linear model
aggregated_data$fitted_values <- predict(lm_fit)
ggplot(aggregated_data, aes(x = index, y = mean_fst)) +
geom_point(aes(color = Country), size = 3) +
geom_line(aes(y = fitted_values), color = "blue") + # Fitted line
labs(
title = "Mean Fst by Country",
x = "Ordered Countries",
y = "Mean Fst Value"
) +
scale_x_continuous(breaks = aggregated_data$index, labels = aggregated_data$Country) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(legend.position = "none")
Estimate distances
# Grab the population names from the matrix aa
populations_with_fst <- colnames(aa)
# Subset the sampling_loc dataframe to only include populations with FST estimates
filtered_sampling_loc <- sampling_loc %>% filter(Abbreviation %in% populations_with_fst)
# Create an empty matrix to store the distances
n <- nrow(filtered_sampling_loc)
distance_matrix <- matrix(0, n, n)
rownames(distance_matrix) <- filtered_sampling_loc$Abbreviation
colnames(distance_matrix) <- filtered_sampling_loc$Abbreviation
# Calculate the distances
for (i in 1:n) {
for (j in 1:n) {
if (i != j) {
coord1 <- c(filtered_sampling_loc$Longitude[i], filtered_sampling_loc$Latitude[i])
coord2 <- c(filtered_sampling_loc$Longitude[j], filtered_sampling_loc$Latitude[j])
distance_matrix[i, j] <- distHaversine(coord1, coord2) / 1000 # distance in km
}
}
}
# Print the distance matrix
head(distance_matrix)
## FRS STS POP SPB SPS SPC BAR
## FRS 0.0000 412.1522 1225.4383 1263.7518 1371.4448 817.6702 511.8716
## STS 412.1522 0.0000 1509.1745 1601.1573 1750.4874 1213.5639 915.9614
## POP 1225.4383 1509.1745 0.0000 282.8409 614.5101 701.9619 878.2910
## SPB 1263.7518 1601.1573 282.8409 0.0000 331.7685 571.0509 827.0716
## SPS 1371.4448 1750.4874 614.5101 331.7685 0.0000 566.5153 874.4050
## SPC 817.6702 1213.5639 701.9619 571.0509 566.5153 0.0000 310.0074
## SPM IMP BRE DES TRE ITB CES
## FRS 683.8845 228.2822 349.1505 375.3876 427.8791 448.0871 524.5893
## STS 1095.6072 527.9620 389.6504 409.5459 380.8019 536.0256 604.0707
## POP 939.4811 1370.3898 1572.0008 1596.9006 1653.2807 1644.8695 1711.3013
## SPB 822.8201 1367.0861 1594.7230 1616.9360 1682.2286 1643.6694 1701.8380
## SPS 787.3372 1423.5497 1670.1969 1688.5822 1761.4648 1690.3169 1736.8728
## SPC 251.7726 857.0395 1104.4760 1122.4598 1195.9602 1124.6677 1173.2270
## ROM ITR SIC ITP MAL SLO CRO ALV
## FRS 654.1062 654.1062 1119.260 1004.474 1269.904 628.7258 1037.845 1234.162
## STS 834.5057 834.5057 1314.450 1098.126 1522.503 546.2343 1052.803 1296.908
## POP 1734.4434 1734.4434 2064.584 2102.566 2066.730 1851.8481 2197.100 2334.720
## SPB 1683.0686 1683.0686 1957.316 2040.301 1923.429 1869.2123 2157.626 2266.196
## SPS 1669.1413 1669.1413 1865.247 2002.327 1785.308 1929.6259 2144.409 2215.487
## SPC 1124.2692 1124.2692 1387.383 1474.434 1368.139 1363.2153 1600.981 1697.872
## ALD TIR SER GRA GRC ROS BUL TUA
## FRS 1192.210 1216.107 1100.2591 1709.174 1877.379 1341.427 1392.500 1884.818
## STS 1230.239 1248.235 988.6774 1763.684 1974.712 1124.852 1304.105 1885.544
## POP 2319.742 2346.521 2315.5228 2777.366 2876.039 2566.256 2591.385 3000.557
## SPB 2262.516 2289.859 2313.3538 2686.566 2760.206 2594.045 2569.099 2924.845
## SPS 2225.649 2253.121 2340.8167 2601.355 2642.916 2651.131 2568.416 2854.982
## SPC 1697.515 1724.957 1780.6744 2115.539 2193.176 2085.530 2019.617 2354.477
## TUH SEV ALU KER KRA SOC TIK RAR
## FRS 2897.030 2179.621 2245.512 2390.859 2596.424 2688.509 2660.271 2758.005
## STS 2745.357 2012.146 2071.666 2197.341 2400.939 2511.584 2447.357 2555.484
## POP 4103.736 3397.015 3464.308 3613.507 3818.891 3906.072 3885.080 3981.365
## SPB 4076.402 3387.944 3457.259 3614.231 3818.203 3894.554 3893.085 3982.756
## SPS 4053.790 3392.537 3463.561 3628.220 3828.858 3891.912 3913.323 3994.595
## SPC 3518.971 2842.701 2913.061 3074.730 3276.925 3346.038 3357.781 3442.342
## GES ARM
## FRS 2793.592 3207.499
## STS 2619.862 3045.177
## POP 4009.674 4417.211
## SPB 3995.220 4391.989
## SPS 3988.376 4369.793
## SPC 3444.704 3834.995
Compare distance and FST
# Fill lower triangle of 'aa' matrix
aa[lower.tri(aa)] <- t(aa)[lower.tri(aa)]
# Fill diagonal with 0 (or another value that makes sense in your context)
diag(aa) <- 0
# Combine 'aa' and 'distance_matrix'
data <- data.frame(Distance = as.vector(distance_matrix), FST = as.vector(aa))
# Add row and column indices for easier tracking
data$row_index <- rep(rownames(distance_matrix), each = ncol(distance_matrix))
data$col_index <- rep(colnames(distance_matrix), nrow(distance_matrix))
data <- data |>
dplyr::arrange(
Distance
)
head(data)
## Distance FST row_index col_index
## 1 0 0 FRS FRS
## 2 0 0 STS STS
## 3 0 0 POP POP
## 4 0 0 SPB SPB
## 5 0 0 SPS SPS
## 6 0 0 SPC SPC
Fit linear regression
data <- data[data$Distance > 0, ]
# Fit linear model
lm_model <- lm(FST/(1-FST) ~ log(Distance), data = data)
equation_text <- sprintf("y = %.6fx + %.3f", coef(lm_model)[2], coef(lm_model)[1])
r2_text <- sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
# source the plotting function
source(here("analyses", "my_theme2.R"))
# Plot
ggplot(data, aes(x = log(Distance), y = FST/(1-FST))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
annotate("text", x = max(log((data$Distance))) * 0.85, y = max(data$FST/(1-data$FST)) * 0.95, label = paste(equation_text, r2_text, sep = "\n"), size = 4, color = "black") +
labs(title = "FST vs Distance - All populations",
x = "Log(Distance)",
y = "FST(1-FST)") +
scale_x_continuous(labels = scales::comma) +
theme_classic() +
theme(axis.text.x = element_text(size = 14), # Increase font size for x-axis
axis.text.y = element_text(size = 14 )) # Increase font size for y-axi
## `geom_smooth()` using formula = 'y ~ x'
Select countries with at least 3 sampling localities in Europe
countries_with_3_pops <- filtered_sampling_loc %>%
group_by(Country) %>%
filter(n() >= 3) %>%
pull(Country) %>%
unique()
countries_with_3_pops
## [1] "Spain" "Italy" "Albania" "Ukraine" "Russia"
Do test for each country
results <- list()
for (country in countries_with_3_pops) {
# Extract abbreviations for the country
abbreviations <- filtered_sampling_loc %>%
filter(Country == country) %>%
pull(Abbreviation)
# Subset the data
subset_data <- data %>%
filter(row_index %in% abbreviations & col_index %in% abbreviations)
subset_data <- subset_data[subset_data$Distance > 0, ]
# Perform linear regression
lm_model <- lm(FST/(1-FST) ~ log(Distance), data = subset_data)
results[[country]] <- list(
equation = sprintf("y = %.5fx + %.3f", coef(lm_model)[2], coef(lm_model)[1]),
r2 = sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
)
}
results
## $Spain
## $Spain$equation
## [1] "y = -0.01500x + 0.191"
##
## $Spain$r2
## [1] "R^2 = 0.02"
##
##
## $Italy
## $Italy$equation
## [1] "y = -0.01389x + 0.190"
##
## $Italy$r2
## [1] "R^2 = 0.03"
##
##
## $Albania
## $Albania$equation
## [1] "y = -0.01524x + 0.080"
##
## $Albania$r2
## [1] "R^2 = 0.49"
##
##
## $Ukraine
## $Ukraine$equation
## [1] "y = -0.02810x + 0.223"
##
## $Ukraine$r2
## [1] "R^2 = 0.90"
##
##
## $Russia
## $Russia$equation
## [1] "y = 0.01290x + -0.034"
##
## $Russia$r2
## [1] "R^2 = 0.02"
Merge the data
data_merged <- data %>%
left_join(filtered_sampling_loc[, c("Pop_City", "Country", "Abbreviation")], by = c("row_index" = "Abbreviation")) %>%
rename(Country1 = Country) %>%
left_join(filtered_sampling_loc[, c("Pop_City", "Country", "Abbreviation")], by = c("col_index" = "Abbreviation")) %>%
dplyr::select(-Pop_City.x, -Pop_City.y) %>%
filter(Country1 == Country) # Ensures the data is within the same country
# Filter to get the coutries with 3 or more sampling localities
countries_to_include <- c("Spain", "Italy", "Albania", "Ukraine", "Russia")
# Filter
data_filtered <- data_merged %>%
group_by(Country1) %>%
filter(n() >= 3 & Country1 %in% countries_to_include) %>%
ungroup()
Calculate linear regression for each country
regression_results <- data_filtered %>%
group_by(Country1) %>%
do(model = lm(FST/(1-FST) ~ log(Distance), data = .)) %>%
rowwise() %>%
mutate(equation = sprintf("italic(y) == %.3f * italic(x) + %.3f", coef(model)[2], coef(model)[1]),
r2 = sprintf("italic(R)^2 == %.2f", summary(model)$r.squared))
Plot it
ggplot(data_filtered, aes(x = log(Distance), y = FST/(1-FST))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ Country1, scales = "free", ncol = 2) +
geom_text(
data = regression_results,
aes(label = paste(equation, r2, sep = "~~")),
x = Inf, y = Inf,
vjust = 2, hjust = 1.2,
size = 3.5,
parse = TRUE,
inherit.aes = FALSE
) +
labs(title = "Fst vs Distance by Country",
x = "Log(Distance)",
y = "Fst(1-Fst)") +
scale_x_continuous(labels = scales::comma) +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggsave(
filename = here("output", "europe", "fst", "fst_by_distance_countries_Europe_MAF1.pdf"),
width = 6,
height = 8,
units = "in"
)
We can merge the FST and distance matrices
# Ensure the matrices have the same names in the same order
common_names <- intersect(rownames(distance_matrix), rownames(aa))
sorted_names <- sort(common_names)
# Reorder the matrices
distance_matrix <- distance_matrix[sorted_names, sorted_names]
aa <- aa[sorted_names, sorted_names]
# Initialize the final merged matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names
colnames(merged_matrix) <- sorted_names
# Fill the upper triangular part from aa
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
# Fill the lower triangular part from distance_matrix
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]
# Format the matrix (Fst two decimals and distance in Km with zero decimals)
# Format the elements based on their position in the matrix
for(i in 1:nrow(merged_matrix)) {
for(j in 1:ncol(merged_matrix)) {
if (i < j) {
# Upper triangular - Fst values with two decimal places
merged_matrix[i, j] <- sprintf("%.2f", as.numeric(merged_matrix[i, j]))
} else if (i > j) {
# Lower triangular - Distance values with zero decimal places
merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
}
}
}
# Now the merged_matrix should be formatted as you need
print(merged_matrix)
## ALD ALU ALV ARM BAR BRE BUL CES CRO DES
## ALD NA "0.12" "0.02" "0.11" "0.12" "0.17" "0.11" "0.20" "0.05" "0.16"
## ALU "1268" NA "0.10" "0.06" "0.11" "0.16" "0.10" "0.19" "0.09" "0.15"
## ALV "93" "1307" NA "0.09" "0.10" "0.16" "0.09" "0.18" "0.03" "0.14"
## ARM "2144" "974" "2159" NA "0.11" "0.13" "0.10" "0.16" "0.08" "0.11"
## BAR "1446" "2631" "1458" "3566" NA "0.18" "0.11" "0.21" "0.09" "0.17"
## BRE "887" "1896" "942" "2859" "796" NA "0.15" "0.16" "0.15" "0.09"
## BUL "414" "895" "483" "1826" "1742" "1048" NA "0.19" "0.09" "0.14"
## CES "673" "1757" "724" "2706" "878" "223" "881" NA "0.18" "0.14"
## CRO "180" "1321" "260" "2235" "1333" "719" "428" "514" NA "0.13"
## DES "859" "1870" "915" "2832" "815" "28" "1021" "198" "691" NA
## FRS "1192" "2246" "1234" "3207" "512" "349" "1393" "525" "1038" "375"
## GES "1770" "550" "1795" "429" "3168" "2444" "1426" "2300" "1845" "2418"
## GRA "534" "1151" "476" "1848" "1900" "1417" "656" "1200" "711" "1390"
## GRC "754" "1348" "679" "1937" "2007" "1611" "925" "1389" "934" "1585"
## IMP "984" "2097" "1020" "3047" "552" "254" "1221" "341" "840" "268"
## ITB "752" "1821" "802" "2773" "824" "148" "950" "79" "591" "126"
## ITP "224" "1482" "235" "2367" "1227" "728" "604" "506" "201" "702"
## ITR "585" "1794" "605" "2711" "862" "445" "899" "252" "477" "427"
## KER "1443" "179" "1484" "862" "2789" "2042" "1062" "1913" "1490" "2017"
## KRA "1633" "366" "1670" "674" "2992" "2248" "1261" "2117" "1687" "2222"
## MAL "746" "1948" "674" "2719" "1233" "1135" "1157" "943" "817" "1119"
## POP "2320" "3464" "2335" "4417" "878" "1572" "2591" "1711" "2197" "1597"
## RAR "1798" "532" "1833" "560" "3157" "2410" "1427" "2281" "1853" "2385"
## ROM "585" "1794" "605" "2711" "862" "445" "899" "252" "477" "427"
## ROS "771" "950" "859" "1923" "1783" "1000" "445" "917" "683" "977"
## SER "442" "1149" "534" "2107" "1489" "752" "316" "612" "322" "725"
## SEV "1196" "72" "1235" "1034" "2561" "1830" "824" "1689" "1250" "1804"
## SIC "480" "1724" "420" "2545" "1195" "925" "894" "715" "537" "905"
## SLO "689" "1617" "761" "2581" "1059" "280" "782" "227" "509" "255"
## SOC "1678" "443" "1706" "538" "3068" "2339" "1327" "2197" "1748" "2313"
## SPB "2263" "3457" "2266" "4392" "827" "1595" "2569" "1702" "2158" "1617"
## SPC "1698" "2913" "1698" "3835" "310" "1104" "2020" "1173" "1601" "1122"
## SPM "1450" "2677" "1448" "3591" "211" "919" "1782" "956" "1360" "933"
## SPS "2226" "3464" "2215" "4370" "874" "1670" "2568" "1737" "2144" "1689"
## STS "1230" "2072" "1297" "3045" "916" "390" "1304" "604" "1053" "410"
## TIK "1733" "467" "1774" "687" "3069" "2313" "1350" "2191" "1778" "2288"
## TIR "27" "1242" "99" "2117" "1473" "908" "393" "696" "196" "881"
## TRE "858" "1821" "920" "2787" "887" "92" "986" "230" "684" "80"
## TUA "694" "904" "667" "1571" "2123" "1569" "641" "1363" "851" "1542"
## TUH "1828" "679" "1844" "316" "3251" "2549" "1512" "2393" "1919" "2522"
## FRS GES GRA GRC IMP ITB ITP ITR KER KRA
## ALD "0.09" "0.14" "0.08" "0.07" "0.11" "0.12" "0.08" "0.08" "0.12" "0.11"
## ALU "0.08" "0.09" "0.08" "0.09" "0.09" "0.10" "0.08" "0.07" "0.07" "0.05"
## ALV "0.07" "0.12" "0.06" "0.05" "0.08" "0.10" "0.06" "0.06" "0.10" "0.09"
## ARM "0.06" "0.06" "0.08" "0.08" "0.08" "0.09" "0.07" "0.07" "0.04" "0.02"
## BAR "0.09" "0.14" "0.09" "0.10" "0.11" "0.13" "0.08" "0.07" "0.12" "0.11"
## BRE "0.12" "0.18" "0.15" "0.15" "0.13" "0.12" "0.13" "0.14" "0.15" "0.15"
## BUL "0.07" "0.13" "0.09" "0.09" "0.09" "0.10" "0.06" "0.05" "0.11" "0.10"
## CES "0.15" "0.21" "0.18" "0.19" "0.18" "0.17" "0.16" "0.17" "0.18" "0.18"
## CRO "0.06" "0.12" "0.06" "0.05" "0.08" "0.09" "0.05" "0.05" "0.10" "0.09"
## DES "0.11" "0.17" "0.14" "0.14" "0.12" "0.11" "0.11" "0.13" "0.14" "0.14"
## FRS NA "0.11" "0.07" "0.07" "0.05" "0.06" "0.04" "0.04" "0.08" "0.08"
## GES "2794" NA "0.11" "0.12" "0.13" "0.14" "0.11" "0.10" "0.05" "0.02"
## GRA "1709" "1540" NA "0.02" "0.08" "0.10" "0.05" "0.05" "0.10" "0.08"
## GRC "1877" "1675" "270" NA "0.09" "0.10" "0.06" "0.06" "0.10" "0.09"
## IMP "228" "2640" "1492" "1653" NA "0.08" "0.05" "0.05" "0.09" "0.09"
## ITB "448" "2365" "1279" "1467" "277" NA "0.07" "0.07" "0.11" "0.11"
## ITP "1004" "1991" "705" "884" "787" "583" NA "0.02" "0.08" "0.07"
## ITR "654" "2321" "1070" "1224" "428" "302" "371" NA "0.08" "0.07"
## KER "2391" "434" "1327" "1517" "2250" "1973" "1655" "1960" NA "0.03"
## KRA "2596" "257" "1478" "1648" "2455" "2178" "1848" "2160" "206" NA
## MAL "1270" "2395" "873" "862" "1049" "996" "621" "694" "2127" "2301"
## POP "1225" "4010" "2777" "2876" "1370" "1645" "2103" "1734" "3614" "3819"
## RAR "2758" "210" "1629" "1789" "2619" "2342" "2014" "2326" "369" "166"
## ROM "654" "2321" "1070" "1224" "428" "302" "371" "0" "1960" "2160"
## ROS "1341" "1496" "1100" "1370" "1232" "961" "884" "1047" "1073" "1276"
## SER "1100" "1695" "884" "1141" "949" "672" "520" "697" "1301" "1506"
## SEV "2180" "614" "1086" "1288" "2029" "1753" "1410" "1723" "250" "438"
## SIC "1119" "2197" "736" "812" "891" "779" "340" "483" "1901" "2084"
## SLO "629" "2166" "1220" "1440" "507" "245" "585" "455" "1762" "1968"
## SOC "2689" "109" "1470" "1618" "2537" "2262" "1898" "2224" "325" "161"
## SPB "1264" "3995" "2687" "2760" "1367" "1644" "2040" "1683" "3614" "3818"
## SPC "818" "3445" "2116" "2193" "857" "1125" "1474" "1124" "3075" "3277"
## SPM "684" "3204" "1864" "1944" "665" "916" "1227" "883" "2841" "3042"
## SPS "1371" "3988" "2601" "2643" "1424" "1690" "2002" "1669" "3628" "3829"
## STS "412" "2620" "1764" "1975" "528" "536" "1098" "835" "2197" "2401"
## TIK "2660" "315" "1600" "1776" "2527" "2250" "1945" "2247" "290" "132"
## TIR "1216" "1743" "515" "742" "1009" "775" "251" "612" "1417" "1607"
## TRE "428" "2370" "1391" "1596" "346" "176" "717" "476" "1964" "2170"
## TUA "1885" "1265" "277" "445" "1678" "1441" "901" "1271" "1075" "1214"
## TUH "2897" "194" "1544" "1650" "2734" "2461" "2051" "2395" "597" "443"
## MAL POP RAR ROM ROS SER SEV SIC SLO SOC
## ALD "0.08" "0.09" "0.15" "0.10" "0.12" "0.22" "0.16" "0.11" "0.08" "0.14"
## ALU "0.07" "0.08" "0.08" "0.08" "0.11" "0.20" "0.09" "0.10" "0.07" "0.07"
## ALV "0.06" "0.07" "0.13" "0.07" "0.10" "0.18" "0.14" "0.09" "0.06" "0.12"
## ARM "0.06" "0.05" "0.05" "0.08" "0.09" "0.19" "0.07" "0.08" "0.05" "0.05"
## BAR "0.08" "0.09" "0.14" "0.08" "0.12" "0.22" "0.16" "0.10" "0.08" "0.14"
## BRE "0.12" "0.11" "0.19" "0.16" "0.14" "0.25" "0.20" "0.12" "0.10" "0.18"
## BUL "0.06" "0.07" "0.14" "0.07" "0.10" "0.20" "0.15" "0.08" "0.07" "0.13"
## CES "0.15" "0.13" "0.22" "0.19" "0.17" "0.28" "0.23" "0.16" "0.14" "0.21"
## CRO "0.06" "0.06" "0.12" "0.05" "0.10" "0.17" "0.14" "0.08" "0.06" "0.11"
## DES "0.11" "0.08" "0.18" "0.14" "0.11" "0.23" "0.18" "0.09" "0.08" "0.17"
## FRS "0.04" "0.04" "0.11" "0.04" "0.07" "0.16" "0.12" "0.06" "0.04" "0.10"
## GES "0.10" "0.10" "0.03" "0.11" "0.14" "0.23" "0.05" "0.12" "0.10" "0.01"
## GRA "0.05" "0.07" "0.12" "0.05" "0.10" "0.19" "0.13" "0.08" "0.05" "0.11"
## GRC "0.06" "0.07" "0.12" "0.06" "0.10" "0.19" "0.13" "0.08" "0.06" "0.12"
## IMP "0.04" "0.05" "0.13" "0.06" "0.09" "0.23" "0.15" "0.07" "0.05" "0.12"
## ITB "0.06" "0.05" "0.14" "0.08" "0.10" "0.22" "0.16" "0.08" "0.06" "0.13"
## ITP "0.02" "0.04" "0.11" "0.02" "0.08" "0.17" "0.12" "0.04" "0.04" "0.10"
## ITR "0.02" "0.03" "0.10" "0.01" "0.08" "0.16" "0.12" "0.05" "0.04" "0.10"
## KER "0.08" "0.07" "0.05" "0.09" "0.11" "0.20" "0.07" "0.10" "0.07" "0.05"
## KRA "0.07" "0.07" "0.01" "0.08" "0.11" "0.20" "0.03" "0.09" "0.06" "0.01"
## MAL NA "0.03" "0.11" "0.02" "0.07" "0.16" "0.12" "0.05" "0.04" "0.09"
## POP "2067" NA "0.11" "0.04" "0.07" "0.17" "0.12" "0.05" "0.04" "0.10"
## RAR "2460" "3981" NA "0.12" "0.14" "0.24" "0.05" "0.13" "0.10" "0.02"
## ROM "694" "1734" "2326" NA "0.09" "0.21" "0.13" "0.06" "0.04" "0.11"
## ROS "1498" "2566" "1432" "1047" NA "0.20" "0.15" "0.09" "0.06" "0.13"
## SER "1139" "2316" "1670" "697" "367" NA "0.25" "0.19" "0.15" "0.23"
## SEV "1878" "3397" "604" "1723" "894" "1082" NA "0.14" "0.11" "0.04"
## SIC "280" "2065" "2246" "483" "1219" "858" "1652" NA "0.05" "0.12"
## SLO "1118" "1852" "2130" "455" "726" "476" "1551" "865" NA "0.09"
## SOC "2316" "3906" "187" "2224" "1388" "1591" "509" "2113" "2060" NA
## SPB "1923" "283" "3983" "1683" "2594" "2313" "3388" "1957" "1869" "3895"
## SPC "1368" "702" "3442" "1124" "2086" "1781" "2843" "1387" "1363" "3346"
## SPM "1128" "939" "3208" "883" "1873" "1555" "2606" "1136" "1160" "3107"
## SPS "1785" "615" "3995" "1669" "2651" "2341" "3393" "1865" "1930" "3892"
## STS "1523" "1509" "2555" "835" "1125" "989" "2012" "1314" "546" "2512"
## TIK "2414" "3885" "127" "2247" "1328" "1580" "539" "2190" "2033" "253"
## TIR "764" "2347" "1772" "612" "761" "439" "1170" "502" "705" "1651"
## TRE "1170" "1653" "2331" "476" "915" "683" "1756" "945" "206" "2264"
## TUA "1148" "3001" "1361" "1271" "1057" "932" "843" "994" "1340" "1198"
## TUH "2412" "4104" "398" "2395" "1629" "1797" "734" "2233" "2272" "283"
## SPB SPC SPM SPS STS TIK TIR TRE TUA TUH
## ALD "0.12" "0.13" "0.11" "0.14" "0.10" "0.16" "0.03" "0.08" "0.10" "0.09"
## ALU "0.10" "0.11" "0.10" "0.13" "0.08" "0.09" "0.10" "0.07" "0.09" "0.08"
## ALV "0.10" "0.11" "0.10" "0.11" "0.07" "0.14" "-0.00" "0.06" "0.08" "0.07"
## ARM "0.06" "0.07" "0.07" "0.12" "0.07" "0.07" "0.09" "0.05" "0.09" "0.07"
## BAR "0.12" "0.13" "0.11" "0.11" "0.09" "0.15" "0.10" "0.08" "0.06" "0.04"
## BRE "0.10" "0.12" "0.12" "0.18" "0.12" "0.19" "0.17" "0.09" "0.16" "0.13"
## BUL "0.10" "0.10" "0.10" "0.12" "0.08" "0.15" "0.10" "0.06" "0.08" "0.07"
## CES "0.13" "0.15" "0.15" "0.22" "0.15" "0.22" "0.21" "0.12" "0.19" "0.16"
## CRO "0.09" "0.10" "0.09" "0.11" "0.06" "0.13" "0.02" "0.05" "0.07" "0.07"
## DES "0.08" "0.10" "0.10" "0.17" "0.11" "0.18" "0.15" "0.07" "0.14" "0.12"
## FRS "0.06" "0.07" "0.06" "0.10" "0.03" "0.12" "0.07" "0.03" "0.07" "0.05"
## GES "0.12" "0.13" "0.13" "0.16" "0.11" "0.05" "0.14" "0.10" "0.12" "0.11"
## GRA "0.09" "0.10" "0.09" "0.10" "0.07" "0.13" "0.06" "0.06" "0.07" "0.06"
## GRC "0.09" "0.11" "0.10" "0.11" "0.07" "0.13" "0.06" "0.06" "0.08" "0.07"
## IMP "0.07" "0.08" "0.08" "0.10" "0.05" "0.15" "0.10" "0.04" "0.08" "0.06"
## ITB "0.08" "0.09" "0.09" "0.13" "0.07" "0.16" "0.11" "0.04" "0.10" "0.08"
## ITP "0.07" "0.08" "0.06" "0.09" "0.04" "0.12" "0.06" "0.03" "0.06" "0.04"
## ITR "0.08" "0.09" "0.07" "0.07" "0.04" "0.12" "0.06" "0.04" "0.04" "0.03"
## KER "0.09" "0.10" "0.10" "0.13" "0.08" "0.07" "0.11" "0.07" "0.10" "0.09"
## KRA "0.09" "0.10" "0.09" "0.13" "0.08" "0.04" "0.10" "0.07" "0.09" "0.08"
## MAL "0.07" "0.07" "0.06" "0.07" "0.04" "0.11" "0.07" "0.03" "0.05" "0.04"
## POP "0.05" "0.05" "0.05" "0.09" "0.04" "0.12" "0.07" "0.03" "0.06" "0.05"
## RAR "0.13" "0.14" "0.13" "0.16" "0.11" "0.05" "0.14" "0.11" "0.12" "0.11"
## ROM "0.08" "0.10" "0.08" "0.09" "0.04" "0.13" "0.07" "0.03" "0.05" "0.04"
## ROS "0.09" "0.10" "0.09" "0.13" "0.08" "0.15" "0.12" "0.05" "0.10" "0.08"
## SER "0.20" "0.22" "0.21" "0.24" "0.18" "0.25" "0.22" "0.15" "0.20" "0.17"
## SEV "0.14" "0.15" "0.15" "0.17" "0.13" "0.08" "0.15" "0.12" "0.14" "0.12"
## SIC "0.08" "0.09" "0.07" "0.11" "0.06" "0.14" "0.09" "0.04" "0.08" "0.06"
## SLO "0.05" "0.06" "0.05" "0.09" "0.04" "0.11" "0.06" "0.02" "0.06" "0.04"
## SOC "0.12" "0.13" "0.12" "0.15" "0.11" "0.06" "0.13" "0.10" "0.12" "0.10"
## SPB NA "0.01" "0.01" "0.13" "0.06" "0.14" "0.10" "0.04" "0.10" "0.08"
## SPC "571" NA "0.03" "0.14" "0.07" "0.15" "0.11" "0.05" "0.11" "0.08"
## SPM "823" "252" NA "0.08" "0.07" "0.14" "0.11" "0.04" "0.09" "0.07"
## SPS "332" "567" "787" NA "0.10" "0.17" "0.13" "0.08" "0.03" "0.09"
## STS "1601" "1214" "1096" "1750" NA "0.12" "0.08" "0.03" "0.07" "0.05"
## TIK "3893" "3358" "3126" "3913" "2447" NA "0.15" "0.11" "0.14" "0.12"
## TIR "2290" "1725" "1478" "2253" "1248" "1707" NA "0.06" "0.08" "0.07"
## TRE "1682" "1196" "1011" "1761" "381" "2233" "877" NA "0.06" "0.04"
## TUA "2925" "2354" "2103" "2855" "1886" "1339" "669" "1529" NA "0.05"
## TUH "4076" "3519" "3275" "4054" "2745" "508" "1801" "2478" "1267" NA
cities <- readRDS(here("output", "sampling_loc_euro_global.rds"))
cities <- as_tibble(cities)
head(cities)
## # A tibble: 6 × 12
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 Berlin, NJ USA 39.8 -74.9 Americas BER 2018 North Am…
## 2 Columbus, OH USA 40.0 -82.9 Americas COL 2015 North Am…
## 3 Palm Beach USA 26.7 -80.0 Americas PAL 2018 North Am…
## 4 Houston, TX USA 29.8 -95.4 Americas HOU 2018 North Am…
## 5 Los Angeles USA 34.1 -118. Americas LOS 2018 North Am…
## 6 Manaus, AM Brazil -3.09 -60.0 Americas MAU 2017 South Am…
## # ℹ 4 more variables: Subregion <chr>, order <int>, order2 <int>,
## # orderold <int>
We can sort by distance
# Calculate row-wise mean distances (excluding diagonal)
row_means <- rowMeans(distance_matrix, na.rm=TRUE)
# Sort row names by mean distances
sorted_names_by_distance <- names(sort(row_means))
# Reorder distance_matrix and aa matrices based on these sorted names
distance_matrix <- distance_matrix[sorted_names_by_distance, sorted_names_by_distance]
aa <- aa[sorted_names_by_distance, sorted_names_by_distance]
# Your existing code to initialize and fill the merged_matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names_by_distance
colnames(merged_matrix) <- sorted_names_by_distance
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]
# Formatting code with absolute value for upper triangular part
for(i in 1:nrow(merged_matrix)) {
for(j in 1:ncol(merged_matrix)) {
if (i < j) {
merged_matrix[i, j] <- sprintf("%.2f", abs(as.numeric(merged_matrix[i, j])))
} else if (i > j) {
merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
}
}
}
# Print the merged matrix
print(merged_matrix)
## CRO ALD ITP TIR SER ALV CES ITR ROM SLO
## CRO NA "0.05" "0.05" "0.02" "0.17" "0.03" "0.18" "0.05" "0.05" "0.06"
## ALD "180" NA "0.08" "0.03" "0.22" "0.02" "0.20" "0.08" "0.10" "0.08"
## ITP "201" "224" NA "0.06" "0.17" "0.06" "0.16" "0.02" "0.02" "0.04"
## TIR "196" "27" "251" NA "0.22" "0.00" "0.21" "0.06" "0.07" "0.06"
## SER "322" "442" "520" "439" NA "0.18" "0.28" "0.16" "0.21" "0.15"
## ALV "260" "93" "235" "99" "534" NA "0.18" "0.06" "0.07" "0.06"
## CES "514" "673" "506" "696" "612" "724" NA "0.17" "0.19" "0.14"
## ITR "477" "585" "371" "612" "697" "605" "252" NA "0.01" "0.04"
## ROM "477" "585" "371" "612" "697" "605" "252" "0" NA "0.04"
## SLO "509" "689" "585" "705" "476" "761" "227" "455" "455" NA
## ITB "591" "752" "583" "775" "672" "802" "79" "302" "302" "245"
## BUL "428" "414" "604" "393" "316" "483" "881" "899" "899" "782"
## DES "691" "859" "702" "881" "725" "915" "198" "427" "427" "255"
## TRE "684" "858" "717" "877" "683" "920" "230" "476" "476" "206"
## BRE "719" "887" "728" "908" "752" "942" "223" "445" "445" "280"
## SIC "537" "480" "340" "502" "858" "420" "715" "483" "483" "865"
## ROS "683" "771" "884" "761" "367" "859" "917" "1047" "1047" "726"
## IMP "840" "984" "787" "1009" "949" "1020" "341" "428" "428" "507"
## GRA "711" "534" "705" "515" "884" "476" "1200" "1070" "1070" "1220"
## TUA "851" "694" "901" "669" "932" "667" "1363" "1271" "1271" "1340"
## FRS "1038" "1192" "1004" "1216" "1100" "1234" "525" "654" "654" "629"
## MAL "817" "746" "621" "764" "1139" "674" "943" "694" "694" "1118"
## STS "1053" "1230" "1098" "1248" "989" "1297" "604" "835" "835" "546"
## GRC "934" "754" "884" "742" "1141" "679" "1389" "1224" "1224" "1440"
## SEV "1250" "1196" "1410" "1170" "1082" "1235" "1689" "1723" "1723" "1551"
## ALU "1321" "1268" "1482" "1242" "1149" "1307" "1757" "1794" "1794" "1617"
## BAR "1333" "1446" "1227" "1473" "1489" "1458" "878" "862" "862" "1059"
## SPM "1360" "1450" "1227" "1478" "1555" "1448" "956" "883" "883" "1160"
## KER "1490" "1443" "1655" "1417" "1301" "1484" "1913" "1960" "1960" "1762"
## KRA "1687" "1633" "1848" "1607" "1506" "1670" "2117" "2160" "2160" "1968"
## SPC "1601" "1698" "1474" "1725" "1781" "1698" "1173" "1124" "1124" "1363"
## SOC "1748" "1678" "1898" "1651" "1591" "1706" "2197" "2224" "2224" "2060"
## TIK "1778" "1733" "1945" "1707" "1580" "1774" "2191" "2247" "2247" "2033"
## GES "1845" "1770" "1991" "1743" "1695" "1795" "2300" "2321" "2321" "2166"
## RAR "1853" "1798" "2014" "1772" "1670" "1833" "2281" "2326" "2326" "2130"
## TUH "1919" "1828" "2051" "1801" "1797" "1844" "2393" "2395" "2395" "2272"
## ARM "2235" "2144" "2367" "2117" "2107" "2159" "2706" "2711" "2711" "2581"
## SPB "2158" "2263" "2040" "2290" "2313" "2266" "1702" "1683" "1683" "1869"
## SPS "2144" "2226" "2002" "2253" "2341" "2215" "1737" "1669" "1669" "1930"
## POP "2197" "2320" "2103" "2347" "2316" "2335" "1711" "1734" "1734" "1852"
## ITB BUL DES TRE BRE SIC ROS IMP GRA TUA
## CRO "0.09" "0.09" "0.13" "0.05" "0.15" "0.08" "0.10" "0.08" "0.06" "0.07"
## ALD "0.12" "0.11" "0.16" "0.08" "0.17" "0.11" "0.12" "0.11" "0.08" "0.10"
## ITP "0.07" "0.06" "0.11" "0.03" "0.13" "0.04" "0.08" "0.05" "0.05" "0.06"
## TIR "0.11" "0.10" "0.15" "0.06" "0.17" "0.09" "0.12" "0.10" "0.06" "0.08"
## SER "0.22" "0.20" "0.23" "0.15" "0.25" "0.19" "0.20" "0.23" "0.19" "0.20"
## ALV "0.10" "0.09" "0.14" "0.06" "0.16" "0.09" "0.10" "0.08" "0.06" "0.08"
## CES "0.17" "0.19" "0.14" "0.12" "0.16" "0.16" "0.17" "0.18" "0.18" "0.19"
## ITR "0.07" "0.05" "0.13" "0.04" "0.14" "0.05" "0.08" "0.05" "0.05" "0.04"
## ROM "0.08" "0.07" "0.14" "0.03" "0.16" "0.06" "0.09" "0.06" "0.05" "0.05"
## SLO "0.06" "0.07" "0.08" "0.02" "0.10" "0.05" "0.06" "0.05" "0.05" "0.06"
## ITB NA "0.10" "0.11" "0.04" "0.12" "0.08" "0.10" "0.08" "0.10" "0.10"
## BUL "950" NA "0.14" "0.06" "0.15" "0.08" "0.10" "0.09" "0.09" "0.08"
## DES "126" "1021" NA "0.07" "0.09" "0.09" "0.11" "0.12" "0.14" "0.14"
## TRE "176" "986" "80" NA "0.09" "0.04" "0.05" "0.04" "0.06" "0.06"
## BRE "148" "1048" "28" "92" NA "0.12" "0.14" "0.13" "0.15" "0.16"
## SIC "779" "894" "905" "945" "925" NA "0.09" "0.07" "0.08" "0.08"
## ROS "961" "445" "977" "915" "1000" "1219" NA "0.09" "0.10" "0.10"
## IMP "277" "1221" "268" "346" "254" "891" "1232" NA "0.08" "0.08"
## GRA "1279" "656" "1390" "1391" "1417" "736" "1100" "1492" NA "0.07"
## TUA "1441" "641" "1542" "1529" "1569" "994" "1057" "1678" "277" NA
## FRS "448" "1393" "375" "428" "349" "1119" "1341" "228" "1709" "1885"
## MAL "996" "1157" "1119" "1170" "1135" "280" "1498" "1049" "873" "1148"
## STS "536" "1304" "410" "381" "390" "1314" "1125" "528" "1764" "1886"
## GRC "1467" "925" "1585" "1596" "1611" "812" "1370" "1653" "270" "445"
## SEV "1753" "824" "1804" "1756" "1830" "1652" "894" "2029" "1086" "843"
## ALU "1821" "895" "1870" "1821" "1896" "1724" "950" "2097" "1151" "904"
## BAR "824" "1742" "815" "887" "796" "1195" "1783" "552" "1900" "2123"
## SPM "916" "1782" "933" "1011" "919" "1136" "1873" "665" "1864" "2103"
## KER "1973" "1062" "2017" "1964" "2042" "1901" "1073" "2250" "1327" "1075"
## KRA "2178" "1261" "2222" "2170" "2248" "2084" "1276" "2455" "1478" "1214"
## SPC "1125" "2020" "1122" "1196" "1104" "1387" "2086" "857" "2116" "2354"
## SOC "2262" "1327" "2313" "2264" "2339" "2113" "1388" "2537" "1470" "1198"
## TIK "2250" "1350" "2288" "2233" "2313" "2190" "1328" "2527" "1600" "1339"
## GES "2365" "1426" "2418" "2370" "2444" "2197" "1496" "2640" "1540" "1265"
## RAR "2342" "1427" "2385" "2331" "2410" "2246" "1432" "2619" "1629" "1361"
## TUH "2461" "1512" "2522" "2478" "2549" "2233" "1629" "2734" "1544" "1267"
## ARM "2773" "1826" "2832" "2787" "2859" "2545" "1923" "3047" "1848" "1571"
## SPB "1644" "2569" "1617" "1682" "1595" "1957" "2594" "1367" "2687" "2925"
## SPS "1690" "2568" "1689" "1761" "1670" "1865" "2651" "1424" "2601" "2855"
## POP "1645" "2591" "1597" "1653" "1572" "2065" "2566" "1370" "2777" "3001"
## FRS MAL STS GRC SEV ALU BAR SPM KER KRA
## CRO "0.06" "0.06" "0.06" "0.05" "0.14" "0.09" "0.09" "0.09" "0.10" "0.09"
## ALD "0.09" "0.08" "0.10" "0.07" "0.16" "0.12" "0.12" "0.11" "0.12" "0.11"
## ITP "0.04" "0.02" "0.04" "0.06" "0.12" "0.08" "0.08" "0.06" "0.08" "0.07"
## TIR "0.07" "0.07" "0.08" "0.06" "0.15" "0.10" "0.10" "0.11" "0.11" "0.10"
## SER "0.16" "0.16" "0.18" "0.19" "0.25" "0.20" "0.22" "0.21" "0.20" "0.20"
## ALV "0.07" "0.06" "0.07" "0.05" "0.14" "0.10" "0.10" "0.10" "0.10" "0.09"
## CES "0.15" "0.15" "0.15" "0.19" "0.23" "0.19" "0.21" "0.15" "0.18" "0.18"
## ITR "0.04" "0.02" "0.04" "0.06" "0.12" "0.07" "0.07" "0.07" "0.08" "0.07"
## ROM "0.04" "0.02" "0.04" "0.06" "0.13" "0.08" "0.08" "0.08" "0.09" "0.08"
## SLO "0.04" "0.04" "0.04" "0.06" "0.11" "0.07" "0.08" "0.05" "0.07" "0.06"
## ITB "0.06" "0.06" "0.07" "0.10" "0.16" "0.10" "0.13" "0.09" "0.11" "0.11"
## BUL "0.07" "0.06" "0.08" "0.09" "0.15" "0.10" "0.11" "0.10" "0.11" "0.10"
## DES "0.11" "0.11" "0.11" "0.14" "0.18" "0.15" "0.17" "0.10" "0.14" "0.14"
## TRE "0.03" "0.03" "0.03" "0.06" "0.12" "0.07" "0.08" "0.04" "0.07" "0.07"
## BRE "0.12" "0.12" "0.12" "0.15" "0.20" "0.16" "0.18" "0.12" "0.15" "0.15"
## SIC "0.06" "0.05" "0.06" "0.08" "0.14" "0.10" "0.10" "0.07" "0.10" "0.09"
## ROS "0.07" "0.07" "0.08" "0.10" "0.15" "0.11" "0.12" "0.09" "0.11" "0.11"
## IMP "0.05" "0.04" "0.05" "0.09" "0.15" "0.09" "0.11" "0.08" "0.09" "0.09"
## GRA "0.07" "0.05" "0.07" "0.02" "0.13" "0.08" "0.09" "0.09" "0.10" "0.08"
## TUA "0.07" "0.05" "0.07" "0.08" "0.14" "0.09" "0.06" "0.09" "0.10" "0.09"
## FRS NA "0.04" "0.03" "0.07" "0.12" "0.08" "0.09" "0.06" "0.08" "0.08"
## MAL "1270" NA "0.04" "0.06" "0.12" "0.07" "0.08" "0.06" "0.08" "0.07"
## STS "412" "1523" NA "0.07" "0.13" "0.08" "0.09" "0.07" "0.08" "0.08"
## GRC "1877" "862" "1975" NA "0.13" "0.09" "0.10" "0.10" "0.10" "0.09"
## SEV "2180" "1878" "2012" "1288" NA "0.09" "0.16" "0.15" "0.07" "0.03"
## ALU "2246" "1948" "2072" "1348" "72" NA "0.11" "0.10" "0.07" "0.05"
## BAR "512" "1233" "916" "2007" "2561" "2631" NA "0.11" "0.12" "0.11"
## SPM "684" "1128" "1096" "1944" "2606" "2677" "211" NA "0.10" "0.09"
## KER "2391" "2127" "2197" "1517" "250" "179" "2789" "2841" NA "0.03"
## KRA "2596" "2301" "2401" "1648" "438" "366" "2992" "3042" "206" NA
## SPC "818" "1368" "1214" "2193" "2843" "2913" "310" "252" "3075" "3277"
## SOC "2689" "2316" "2512" "1618" "509" "443" "3068" "3107" "325" "161"
## TIK "2660" "2414" "2447" "1776" "539" "467" "3069" "3126" "290" "132"
## GES "2794" "2395" "2620" "1675" "614" "550" "3168" "3204" "434" "257"
## RAR "2758" "2460" "2555" "1789" "604" "532" "3157" "3208" "369" "166"
## TUH "2897" "2412" "2745" "1650" "734" "679" "3251" "3275" "597" "443"
## ARM "3207" "2719" "3045" "1937" "1034" "974" "3566" "3591" "862" "674"
## SPB "1264" "1923" "1601" "2760" "3388" "3457" "827" "823" "3614" "3818"
## SPS "1371" "1785" "1750" "2643" "3393" "3464" "874" "787" "3628" "3829"
## POP "1225" "2067" "1509" "2876" "3397" "3464" "878" "939" "3614" "3819"
## SPC SOC TIK GES RAR TUH ARM SPB SPS POP
## CRO "0.10" "0.11" "0.13" "0.12" "0.12" "0.07" "0.08" "0.09" "0.11" "0.06"
## ALD "0.13" "0.14" "0.16" "0.14" "0.15" "0.09" "0.11" "0.12" "0.14" "0.09"
## ITP "0.08" "0.10" "0.12" "0.11" "0.11" "0.04" "0.07" "0.07" "0.09" "0.04"
## TIR "0.11" "0.13" "0.15" "0.14" "0.14" "0.07" "0.09" "0.10" "0.13" "0.07"
## SER "0.22" "0.23" "0.25" "0.23" "0.24" "0.17" "0.19" "0.20" "0.24" "0.17"
## ALV "0.11" "0.12" "0.14" "0.12" "0.13" "0.07" "0.09" "0.10" "0.11" "0.07"
## CES "0.15" "0.21" "0.22" "0.21" "0.22" "0.16" "0.16" "0.13" "0.22" "0.13"
## ITR "0.09" "0.10" "0.12" "0.10" "0.10" "0.03" "0.07" "0.08" "0.07" "0.03"
## ROM "0.10" "0.11" "0.13" "0.11" "0.12" "0.04" "0.08" "0.08" "0.09" "0.04"
## SLO "0.06" "0.09" "0.11" "0.10" "0.10" "0.04" "0.05" "0.05" "0.09" "0.04"
## ITB "0.09" "0.13" "0.16" "0.14" "0.14" "0.08" "0.09" "0.08" "0.13" "0.05"
## BUL "0.10" "0.13" "0.15" "0.13" "0.14" "0.07" "0.10" "0.10" "0.12" "0.07"
## DES "0.10" "0.17" "0.18" "0.17" "0.18" "0.12" "0.11" "0.08" "0.17" "0.08"
## TRE "0.05" "0.10" "0.11" "0.10" "0.11" "0.04" "0.05" "0.04" "0.08" "0.03"
## BRE "0.12" "0.18" "0.19" "0.18" "0.19" "0.13" "0.13" "0.10" "0.18" "0.11"
## SIC "0.09" "0.12" "0.14" "0.12" "0.13" "0.06" "0.08" "0.08" "0.11" "0.05"
## ROS "0.10" "0.13" "0.15" "0.14" "0.14" "0.08" "0.09" "0.09" "0.13" "0.07"
## IMP "0.08" "0.12" "0.15" "0.13" "0.13" "0.06" "0.08" "0.07" "0.10" "0.05"
## GRA "0.10" "0.11" "0.13" "0.11" "0.12" "0.06" "0.08" "0.09" "0.10" "0.07"
## TUA "0.11" "0.12" "0.14" "0.12" "0.12" "0.05" "0.09" "0.10" "0.03" "0.06"
## FRS "0.07" "0.10" "0.12" "0.11" "0.11" "0.05" "0.06" "0.06" "0.10" "0.04"
## MAL "0.07" "0.09" "0.11" "0.10" "0.11" "0.04" "0.06" "0.07" "0.07" "0.03"
## STS "0.07" "0.11" "0.12" "0.11" "0.11" "0.05" "0.07" "0.06" "0.10" "0.04"
## GRC "0.11" "0.12" "0.13" "0.12" "0.12" "0.07" "0.08" "0.09" "0.11" "0.07"
## SEV "0.15" "0.04" "0.08" "0.05" "0.05" "0.12" "0.07" "0.14" "0.17" "0.12"
## ALU "0.11" "0.07" "0.09" "0.09" "0.08" "0.08" "0.06" "0.10" "0.13" "0.08"
## BAR "0.13" "0.14" "0.15" "0.14" "0.14" "0.04" "0.11" "0.12" "0.11" "0.09"
## SPM "0.03" "0.12" "0.14" "0.13" "0.13" "0.07" "0.07" "0.01" "0.08" "0.05"
## KER "0.10" "0.05" "0.07" "0.05" "0.05" "0.09" "0.04" "0.09" "0.13" "0.07"
## KRA "0.10" "0.01" "0.04" "0.02" "0.01" "0.08" "0.02" "0.09" "0.13" "0.07"
## SPC NA "0.13" "0.15" "0.13" "0.14" "0.08" "0.07" "0.01" "0.14" "0.05"
## SOC "3346" NA "0.06" "0.01" "0.02" "0.10" "0.05" "0.12" "0.15" "0.10"
## TIK "3358" "253" NA "0.05" "0.05" "0.12" "0.07" "0.14" "0.17" "0.12"
## GES "3445" "109" "315" NA "0.03" "0.11" "0.06" "0.12" "0.16" "0.10"
## RAR "3442" "187" "127" "210" NA "0.11" "0.05" "0.13" "0.16" "0.11"
## TUH "3519" "283" "508" "194" "398" NA "0.07" "0.08" "0.09" "0.05"
## ARM "3835" "538" "687" "429" "560" "316" NA "0.06" "0.12" "0.05"
## SPB "571" "3895" "3893" "3995" "3983" "4076" "4392" NA "0.13" "0.05"
## SPS "567" "3892" "3913" "3988" "3995" "4054" "4370" "332" NA "0.09"
## POP "702" "3906" "3885" "4010" "3981" "4104" "4417" "283" "615" NA
Make a table and save it as a word document
# Convert the matrix to a data frame and add a column with row names
merged_df <- as.data.frame(merged_matrix)
merged_df$Population <- rownames(merged_matrix)
# Reorder columns to have RowNames as the first column
merged_df <- merged_df[, c("Population", colnames(merged_matrix))]
merged_df1 <- as.data.frame(merged_df)
write.csv(merged_df1, "/gpfs/gibbs/pi/caccone/mkc54/albo/scripts/RMarkdowns/output/europe/fst/merged_df_MAF1.csv")
# Create a flextable object from the merged_matrix
ft <- qflextable(as.data.frame(merged_df))
ft
Population | CRO | ALD | ITP | TIR | SER | ALV | CES | ITR | ROM | SLO | ITB | BUL | DES | TRE | BRE | SIC | ROS | IMP | GRA | TUA | FRS | MAL | STS | GRC | SEV | ALU | BAR | SPM | KER | KRA | SPC | SOC | TIK | GES | RAR | TUH | ARM | SPB | SPS | POP |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CRO | 0.05 | 0.05 | 0.02 | 0.17 | 0.03 | 0.18 | 0.05 | 0.05 | 0.06 | 0.09 | 0.09 | 0.13 | 0.05 | 0.15 | 0.08 | 0.10 | 0.08 | 0.06 | 0.07 | 0.06 | 0.06 | 0.06 | 0.05 | 0.14 | 0.09 | 0.09 | 0.09 | 0.10 | 0.09 | 0.10 | 0.11 | 0.13 | 0.12 | 0.12 | 0.07 | 0.08 | 0.09 | 0.11 | 0.06 | |
ALD | 180 | 0.08 | 0.03 | 0.22 | 0.02 | 0.20 | 0.08 | 0.10 | 0.08 | 0.12 | 0.11 | 0.16 | 0.08 | 0.17 | 0.11 | 0.12 | 0.11 | 0.08 | 0.10 | 0.09 | 0.08 | 0.10 | 0.07 | 0.16 | 0.12 | 0.12 | 0.11 | 0.12 | 0.11 | 0.13 | 0.14 | 0.16 | 0.14 | 0.15 | 0.09 | 0.11 | 0.12 | 0.14 | 0.09 | |
ITP | 201 | 224 | 0.06 | 0.17 | 0.06 | 0.16 | 0.02 | 0.02 | 0.04 | 0.07 | 0.06 | 0.11 | 0.03 | 0.13 | 0.04 | 0.08 | 0.05 | 0.05 | 0.06 | 0.04 | 0.02 | 0.04 | 0.06 | 0.12 | 0.08 | 0.08 | 0.06 | 0.08 | 0.07 | 0.08 | 0.10 | 0.12 | 0.11 | 0.11 | 0.04 | 0.07 | 0.07 | 0.09 | 0.04 | |
TIR | 196 | 27 | 251 | 0.22 | 0.00 | 0.21 | 0.06 | 0.07 | 0.06 | 0.11 | 0.10 | 0.15 | 0.06 | 0.17 | 0.09 | 0.12 | 0.10 | 0.06 | 0.08 | 0.07 | 0.07 | 0.08 | 0.06 | 0.15 | 0.10 | 0.10 | 0.11 | 0.11 | 0.10 | 0.11 | 0.13 | 0.15 | 0.14 | 0.14 | 0.07 | 0.09 | 0.10 | 0.13 | 0.07 | |
SER | 322 | 442 | 520 | 439 | 0.18 | 0.28 | 0.16 | 0.21 | 0.15 | 0.22 | 0.20 | 0.23 | 0.15 | 0.25 | 0.19 | 0.20 | 0.23 | 0.19 | 0.20 | 0.16 | 0.16 | 0.18 | 0.19 | 0.25 | 0.20 | 0.22 | 0.21 | 0.20 | 0.20 | 0.22 | 0.23 | 0.25 | 0.23 | 0.24 | 0.17 | 0.19 | 0.20 | 0.24 | 0.17 | |
ALV | 260 | 93 | 235 | 99 | 534 | 0.18 | 0.06 | 0.07 | 0.06 | 0.10 | 0.09 | 0.14 | 0.06 | 0.16 | 0.09 | 0.10 | 0.08 | 0.06 | 0.08 | 0.07 | 0.06 | 0.07 | 0.05 | 0.14 | 0.10 | 0.10 | 0.10 | 0.10 | 0.09 | 0.11 | 0.12 | 0.14 | 0.12 | 0.13 | 0.07 | 0.09 | 0.10 | 0.11 | 0.07 | |
CES | 514 | 673 | 506 | 696 | 612 | 724 | 0.17 | 0.19 | 0.14 | 0.17 | 0.19 | 0.14 | 0.12 | 0.16 | 0.16 | 0.17 | 0.18 | 0.18 | 0.19 | 0.15 | 0.15 | 0.15 | 0.19 | 0.23 | 0.19 | 0.21 | 0.15 | 0.18 | 0.18 | 0.15 | 0.21 | 0.22 | 0.21 | 0.22 | 0.16 | 0.16 | 0.13 | 0.22 | 0.13 | |
ITR | 477 | 585 | 371 | 612 | 697 | 605 | 252 | 0.01 | 0.04 | 0.07 | 0.05 | 0.13 | 0.04 | 0.14 | 0.05 | 0.08 | 0.05 | 0.05 | 0.04 | 0.04 | 0.02 | 0.04 | 0.06 | 0.12 | 0.07 | 0.07 | 0.07 | 0.08 | 0.07 | 0.09 | 0.10 | 0.12 | 0.10 | 0.10 | 0.03 | 0.07 | 0.08 | 0.07 | 0.03 | |
ROM | 477 | 585 | 371 | 612 | 697 | 605 | 252 | 0 | 0.04 | 0.08 | 0.07 | 0.14 | 0.03 | 0.16 | 0.06 | 0.09 | 0.06 | 0.05 | 0.05 | 0.04 | 0.02 | 0.04 | 0.06 | 0.13 | 0.08 | 0.08 | 0.08 | 0.09 | 0.08 | 0.10 | 0.11 | 0.13 | 0.11 | 0.12 | 0.04 | 0.08 | 0.08 | 0.09 | 0.04 | |
SLO | 509 | 689 | 585 | 705 | 476 | 761 | 227 | 455 | 455 | 0.06 | 0.07 | 0.08 | 0.02 | 0.10 | 0.05 | 0.06 | 0.05 | 0.05 | 0.06 | 0.04 | 0.04 | 0.04 | 0.06 | 0.11 | 0.07 | 0.08 | 0.05 | 0.07 | 0.06 | 0.06 | 0.09 | 0.11 | 0.10 | 0.10 | 0.04 | 0.05 | 0.05 | 0.09 | 0.04 | |
ITB | 591 | 752 | 583 | 775 | 672 | 802 | 79 | 302 | 302 | 245 | 0.10 | 0.11 | 0.04 | 0.12 | 0.08 | 0.10 | 0.08 | 0.10 | 0.10 | 0.06 | 0.06 | 0.07 | 0.10 | 0.16 | 0.10 | 0.13 | 0.09 | 0.11 | 0.11 | 0.09 | 0.13 | 0.16 | 0.14 | 0.14 | 0.08 | 0.09 | 0.08 | 0.13 | 0.05 | |
BUL | 428 | 414 | 604 | 393 | 316 | 483 | 881 | 899 | 899 | 782 | 950 | 0.14 | 0.06 | 0.15 | 0.08 | 0.10 | 0.09 | 0.09 | 0.08 | 0.07 | 0.06 | 0.08 | 0.09 | 0.15 | 0.10 | 0.11 | 0.10 | 0.11 | 0.10 | 0.10 | 0.13 | 0.15 | 0.13 | 0.14 | 0.07 | 0.10 | 0.10 | 0.12 | 0.07 | |
DES | 691 | 859 | 702 | 881 | 725 | 915 | 198 | 427 | 427 | 255 | 126 | 1021 | 0.07 | 0.09 | 0.09 | 0.11 | 0.12 | 0.14 | 0.14 | 0.11 | 0.11 | 0.11 | 0.14 | 0.18 | 0.15 | 0.17 | 0.10 | 0.14 | 0.14 | 0.10 | 0.17 | 0.18 | 0.17 | 0.18 | 0.12 | 0.11 | 0.08 | 0.17 | 0.08 | |
TRE | 684 | 858 | 717 | 877 | 683 | 920 | 230 | 476 | 476 | 206 | 176 | 986 | 80 | 0.09 | 0.04 | 0.05 | 0.04 | 0.06 | 0.06 | 0.03 | 0.03 | 0.03 | 0.06 | 0.12 | 0.07 | 0.08 | 0.04 | 0.07 | 0.07 | 0.05 | 0.10 | 0.11 | 0.10 | 0.11 | 0.04 | 0.05 | 0.04 | 0.08 | 0.03 | |
BRE | 719 | 887 | 728 | 908 | 752 | 942 | 223 | 445 | 445 | 280 | 148 | 1048 | 28 | 92 | 0.12 | 0.14 | 0.13 | 0.15 | 0.16 | 0.12 | 0.12 | 0.12 | 0.15 | 0.20 | 0.16 | 0.18 | 0.12 | 0.15 | 0.15 | 0.12 | 0.18 | 0.19 | 0.18 | 0.19 | 0.13 | 0.13 | 0.10 | 0.18 | 0.11 | |
SIC | 537 | 480 | 340 | 502 | 858 | 420 | 715 | 483 | 483 | 865 | 779 | 894 | 905 | 945 | 925 | 0.09 | 0.07 | 0.08 | 0.08 | 0.06 | 0.05 | 0.06 | 0.08 | 0.14 | 0.10 | 0.10 | 0.07 | 0.10 | 0.09 | 0.09 | 0.12 | 0.14 | 0.12 | 0.13 | 0.06 | 0.08 | 0.08 | 0.11 | 0.05 | |
ROS | 683 | 771 | 884 | 761 | 367 | 859 | 917 | 1047 | 1047 | 726 | 961 | 445 | 977 | 915 | 1000 | 1219 | 0.09 | 0.10 | 0.10 | 0.07 | 0.07 | 0.08 | 0.10 | 0.15 | 0.11 | 0.12 | 0.09 | 0.11 | 0.11 | 0.10 | 0.13 | 0.15 | 0.14 | 0.14 | 0.08 | 0.09 | 0.09 | 0.13 | 0.07 | |
IMP | 840 | 984 | 787 | 1009 | 949 | 1020 | 341 | 428 | 428 | 507 | 277 | 1221 | 268 | 346 | 254 | 891 | 1232 | 0.08 | 0.08 | 0.05 | 0.04 | 0.05 | 0.09 | 0.15 | 0.09 | 0.11 | 0.08 | 0.09 | 0.09 | 0.08 | 0.12 | 0.15 | 0.13 | 0.13 | 0.06 | 0.08 | 0.07 | 0.10 | 0.05 | |
GRA | 711 | 534 | 705 | 515 | 884 | 476 | 1200 | 1070 | 1070 | 1220 | 1279 | 656 | 1390 | 1391 | 1417 | 736 | 1100 | 1492 | 0.07 | 0.07 | 0.05 | 0.07 | 0.02 | 0.13 | 0.08 | 0.09 | 0.09 | 0.10 | 0.08 | 0.10 | 0.11 | 0.13 | 0.11 | 0.12 | 0.06 | 0.08 | 0.09 | 0.10 | 0.07 | |
TUA | 851 | 694 | 901 | 669 | 932 | 667 | 1363 | 1271 | 1271 | 1340 | 1441 | 641 | 1542 | 1529 | 1569 | 994 | 1057 | 1678 | 277 | 0.07 | 0.05 | 0.07 | 0.08 | 0.14 | 0.09 | 0.06 | 0.09 | 0.10 | 0.09 | 0.11 | 0.12 | 0.14 | 0.12 | 0.12 | 0.05 | 0.09 | 0.10 | 0.03 | 0.06 | |
FRS | 1038 | 1192 | 1004 | 1216 | 1100 | 1234 | 525 | 654 | 654 | 629 | 448 | 1393 | 375 | 428 | 349 | 1119 | 1341 | 228 | 1709 | 1885 | 0.04 | 0.03 | 0.07 | 0.12 | 0.08 | 0.09 | 0.06 | 0.08 | 0.08 | 0.07 | 0.10 | 0.12 | 0.11 | 0.11 | 0.05 | 0.06 | 0.06 | 0.10 | 0.04 | |
MAL | 817 | 746 | 621 | 764 | 1139 | 674 | 943 | 694 | 694 | 1118 | 996 | 1157 | 1119 | 1170 | 1135 | 280 | 1498 | 1049 | 873 | 1148 | 1270 | 0.04 | 0.06 | 0.12 | 0.07 | 0.08 | 0.06 | 0.08 | 0.07 | 0.07 | 0.09 | 0.11 | 0.10 | 0.11 | 0.04 | 0.06 | 0.07 | 0.07 | 0.03 | |
STS | 1053 | 1230 | 1098 | 1248 | 989 | 1297 | 604 | 835 | 835 | 546 | 536 | 1304 | 410 | 381 | 390 | 1314 | 1125 | 528 | 1764 | 1886 | 412 | 1523 | 0.07 | 0.13 | 0.08 | 0.09 | 0.07 | 0.08 | 0.08 | 0.07 | 0.11 | 0.12 | 0.11 | 0.11 | 0.05 | 0.07 | 0.06 | 0.10 | 0.04 | |
GRC | 934 | 754 | 884 | 742 | 1141 | 679 | 1389 | 1224 | 1224 | 1440 | 1467 | 925 | 1585 | 1596 | 1611 | 812 | 1370 | 1653 | 270 | 445 | 1877 | 862 | 1975 | 0.13 | 0.09 | 0.10 | 0.10 | 0.10 | 0.09 | 0.11 | 0.12 | 0.13 | 0.12 | 0.12 | 0.07 | 0.08 | 0.09 | 0.11 | 0.07 | |
SEV | 1250 | 1196 | 1410 | 1170 | 1082 | 1235 | 1689 | 1723 | 1723 | 1551 | 1753 | 824 | 1804 | 1756 | 1830 | 1652 | 894 | 2029 | 1086 | 843 | 2180 | 1878 | 2012 | 1288 | 0.09 | 0.16 | 0.15 | 0.07 | 0.03 | 0.15 | 0.04 | 0.08 | 0.05 | 0.05 | 0.12 | 0.07 | 0.14 | 0.17 | 0.12 | |
ALU | 1321 | 1268 | 1482 | 1242 | 1149 | 1307 | 1757 | 1794 | 1794 | 1617 | 1821 | 895 | 1870 | 1821 | 1896 | 1724 | 950 | 2097 | 1151 | 904 | 2246 | 1948 | 2072 | 1348 | 72 | 0.11 | 0.10 | 0.07 | 0.05 | 0.11 | 0.07 | 0.09 | 0.09 | 0.08 | 0.08 | 0.06 | 0.10 | 0.13 | 0.08 | |
BAR | 1333 | 1446 | 1227 | 1473 | 1489 | 1458 | 878 | 862 | 862 | 1059 | 824 | 1742 | 815 | 887 | 796 | 1195 | 1783 | 552 | 1900 | 2123 | 512 | 1233 | 916 | 2007 | 2561 | 2631 | 0.11 | 0.12 | 0.11 | 0.13 | 0.14 | 0.15 | 0.14 | 0.14 | 0.04 | 0.11 | 0.12 | 0.11 | 0.09 | |
SPM | 1360 | 1450 | 1227 | 1478 | 1555 | 1448 | 956 | 883 | 883 | 1160 | 916 | 1782 | 933 | 1011 | 919 | 1136 | 1873 | 665 | 1864 | 2103 | 684 | 1128 | 1096 | 1944 | 2606 | 2677 | 211 | 0.10 | 0.09 | 0.03 | 0.12 | 0.14 | 0.13 | 0.13 | 0.07 | 0.07 | 0.01 | 0.08 | 0.05 | |
KER | 1490 | 1443 | 1655 | 1417 | 1301 | 1484 | 1913 | 1960 | 1960 | 1762 | 1973 | 1062 | 2017 | 1964 | 2042 | 1901 | 1073 | 2250 | 1327 | 1075 | 2391 | 2127 | 2197 | 1517 | 250 | 179 | 2789 | 2841 | 0.03 | 0.10 | 0.05 | 0.07 | 0.05 | 0.05 | 0.09 | 0.04 | 0.09 | 0.13 | 0.07 | |
KRA | 1687 | 1633 | 1848 | 1607 | 1506 | 1670 | 2117 | 2160 | 2160 | 1968 | 2178 | 1261 | 2222 | 2170 | 2248 | 2084 | 1276 | 2455 | 1478 | 1214 | 2596 | 2301 | 2401 | 1648 | 438 | 366 | 2992 | 3042 | 206 | 0.10 | 0.01 | 0.04 | 0.02 | 0.01 | 0.08 | 0.02 | 0.09 | 0.13 | 0.07 | |
SPC | 1601 | 1698 | 1474 | 1725 | 1781 | 1698 | 1173 | 1124 | 1124 | 1363 | 1125 | 2020 | 1122 | 1196 | 1104 | 1387 | 2086 | 857 | 2116 | 2354 | 818 | 1368 | 1214 | 2193 | 2843 | 2913 | 310 | 252 | 3075 | 3277 | 0.13 | 0.15 | 0.13 | 0.14 | 0.08 | 0.07 | 0.01 | 0.14 | 0.05 | |
SOC | 1748 | 1678 | 1898 | 1651 | 1591 | 1706 | 2197 | 2224 | 2224 | 2060 | 2262 | 1327 | 2313 | 2264 | 2339 | 2113 | 1388 | 2537 | 1470 | 1198 | 2689 | 2316 | 2512 | 1618 | 509 | 443 | 3068 | 3107 | 325 | 161 | 3346 | 0.06 | 0.01 | 0.02 | 0.10 | 0.05 | 0.12 | 0.15 | 0.10 | |
TIK | 1778 | 1733 | 1945 | 1707 | 1580 | 1774 | 2191 | 2247 | 2247 | 2033 | 2250 | 1350 | 2288 | 2233 | 2313 | 2190 | 1328 | 2527 | 1600 | 1339 | 2660 | 2414 | 2447 | 1776 | 539 | 467 | 3069 | 3126 | 290 | 132 | 3358 | 253 | 0.05 | 0.05 | 0.12 | 0.07 | 0.14 | 0.17 | 0.12 | |
GES | 1845 | 1770 | 1991 | 1743 | 1695 | 1795 | 2300 | 2321 | 2321 | 2166 | 2365 | 1426 | 2418 | 2370 | 2444 | 2197 | 1496 | 2640 | 1540 | 1265 | 2794 | 2395 | 2620 | 1675 | 614 | 550 | 3168 | 3204 | 434 | 257 | 3445 | 109 | 315 | 0.03 | 0.11 | 0.06 | 0.12 | 0.16 | 0.10 | |
RAR | 1853 | 1798 | 2014 | 1772 | 1670 | 1833 | 2281 | 2326 | 2326 | 2130 | 2342 | 1427 | 2385 | 2331 | 2410 | 2246 | 1432 | 2619 | 1629 | 1361 | 2758 | 2460 | 2555 | 1789 | 604 | 532 | 3157 | 3208 | 369 | 166 | 3442 | 187 | 127 | 210 | 0.11 | 0.05 | 0.13 | 0.16 | 0.11 | |
TUH | 1919 | 1828 | 2051 | 1801 | 1797 | 1844 | 2393 | 2395 | 2395 | 2272 | 2461 | 1512 | 2522 | 2478 | 2549 | 2233 | 1629 | 2734 | 1544 | 1267 | 2897 | 2412 | 2745 | 1650 | 734 | 679 | 3251 | 3275 | 597 | 443 | 3519 | 283 | 508 | 194 | 398 | 0.07 | 0.08 | 0.09 | 0.05 | |
ARM | 2235 | 2144 | 2367 | 2117 | 2107 | 2159 | 2706 | 2711 | 2711 | 2581 | 2773 | 1826 | 2832 | 2787 | 2859 | 2545 | 1923 | 3047 | 1848 | 1571 | 3207 | 2719 | 3045 | 1937 | 1034 | 974 | 3566 | 3591 | 862 | 674 | 3835 | 538 | 687 | 429 | 560 | 316 | 0.06 | 0.12 | 0.05 | |
SPB | 2158 | 2263 | 2040 | 2290 | 2313 | 2266 | 1702 | 1683 | 1683 | 1869 | 1644 | 2569 | 1617 | 1682 | 1595 | 1957 | 2594 | 1367 | 2687 | 2925 | 1264 | 1923 | 1601 | 2760 | 3388 | 3457 | 827 | 823 | 3614 | 3818 | 571 | 3895 | 3893 | 3995 | 3983 | 4076 | 4392 | 0.13 | 0.05 | |
SPS | 2144 | 2226 | 2002 | 2253 | 2341 | 2215 | 1737 | 1669 | 1669 | 1930 | 1690 | 2568 | 1689 | 1761 | 1670 | 1865 | 2651 | 1424 | 2601 | 2855 | 1371 | 1785 | 1750 | 2643 | 3393 | 3464 | 874 | 787 | 3628 | 3829 | 567 | 3892 | 3913 | 3988 | 3995 | 4054 | 4370 | 332 | 0.09 | |
POP | 2197 | 2320 | 2103 | 2347 | 2316 | 2335 | 1711 | 1734 | 1734 | 1852 | 1645 | 2591 | 1597 | 1653 | 1572 | 2065 | 2566 | 1370 | 2777 | 3001 | 1225 | 2067 | 1509 | 2876 | 3397 | 3464 | 878 | 939 | 3614 | 3819 | 702 | 3906 | 3885 | 4010 | 3981 | 4104 | 4417 | 283 | 615 |
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.01_b \
--keep-fam output/fst/pops_4bfst.txt \
--make-bed \
--out output/fst/mantel_MAF1 \
--silent;
grep 'samples\|variants\|remaining' output/fst/mantel_MAF1.log
20968 variants loaded from .bim file. –keep-fam: 408 people remaining. Total genotyping rate in remaining samples is 0.971028. 20968 variants and 408 people pass filters and QC.
Then convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/fst/mantel_MAF1 \
--recodeA \
--out output/fst/mantel_MAF1 \
--silent;
grep 'samples\|variants\|remaining' output/fst/mantel_MAF1.log
20968 variants loaded from .bim file. 20968 variants and 408 people pass filters and QC.
Import the data and covert it to genind format
# import the data
albo <-
read.PLINK(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/mantel_MAF1.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
# convert to genind
albo2 <- gl2gi(albo, probar = TRUE, verbose = NULL)
Save
Load it
Import Sample locations
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
The fam file
fam_file <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/mantel_MAF1.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# # Replace the BEN a with BEN b (remember to never name samples with the same ID... I change it manually in the fam file.)
# fam_data <- fam_data %>%
# mutate(IndividualID = ifelse(FamilyID == "BEN" & IndividualID == "a", "b", IndividualID))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 SOC 1065 0 0 0 -9
## 2 SOC 1066 0 0 0 -9
## 3 SOC 1067 0 0 0 -9
## 4 SOC 1068 0 0 0 -9
## 5 SOC 1069 0 0 0 -9
## 6 SOC 1070 0 0 0 -9
Merge
# Join with sampling_loc to get sampling localities
loc_albo <- fam_data |>
left_join(sampling_loc, by = c("FamilyID" = "Abbreviation"))
head(loc_albo)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype Pop_City Country
## 1 SOC 1065 0 0 0 -9 Sochi Russia
## 2 SOC 1066 0 0 0 -9 Sochi Russia
## 3 SOC 1067 0 0 0 -9 Sochi Russia
## 4 SOC 1068 0 0 0 -9 Sochi Russia
## 5 SOC 1069 0 0 0 -9 Sochi Russia
## 6 SOC 1070 0 0 0 -9 Sochi Russia
## Latitude Longitude Continent Year Region Subregion order order2
## 1 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 46 38
## 2 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 46 38
## 3 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 46 38
## 4 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 46 38
## 5 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 46 38
## 6 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 46 38
## orderold
## 1 38
## 2 38
## 3 38
## 4 38
## 5 38
## 6 38
Get the latitude and longitude
## [,1] [,2]
## [1,] 43.60042 39.74533
## [2,] 43.60042 39.74533
## [3,] 43.60042 39.74533
## [4,] 43.60042 39.74533
## [5,] 43.60042 39.74533
## [6,] 43.60042 39.74533
Add jitter
## x y
## [1,] 43.59856 39.74459
## [2,] 43.59977 39.74418
## [3,] 43.59923 39.74627
## [4,] 43.59845 39.74716
## [5,] 43.60109 39.74391
## [6,] 43.60151 39.74429
Add to object
Save
Convert the data
##
## Converting data from a genind to a genpop object...
##
## ...done.
Get 1 mosquito per population, it is just to get the geographical coordinates
unique_populations <- unique(albo2@pop)
selected_individuals <- integer(length(unique_populations))
for (i in seq_along(unique_populations)) {
inds_in_pop <- which(albo2@pop == unique_populations[i])
selected_individuals[i] <- sample(inds_in_pop, 1)
}
albo2_subset <- albo2[selected_individuals, ]
Mantel test
Dgen <- dist.genpop(toto,method=2)
Dgeo <- dist(albo2_subset$other$xy)
ibd <- mantel.randtest(Dgen,Dgeo)
ibd
## Monte-Carlo test
## Call: mantel.randtest(m1 = Dgen, m2 = Dgeo)
##
## Observation: -0.01144286
##
## Based on 999 replicates
## Simulated p-value: 0.533
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## -0.141786675 0.001207842 0.007960831
Simulated p-value is not statistically signficant. Alternative hypothesis: greater. The test was one-sided, checking if the observed correlation is greater than what would be expected by chance.
Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "simIBD_europe_MAF1.pdf"))
plot(ibd)
dev.off()
plot(Dgeo, Dgen)
# A linear regression model (lm stands for "linear model") is fitted, with the genetic distances (Dgen) as the response variable and the geographic distances (Dgeo) as the predictor. The distances are transformed into vectors using as.vector because the dist function produces a matrix-like structure, but the linear regression function lm requires vectors.
dist_lm <- lm(as.vector(Dgen) ~ as.vector(Dgeo))
abline(dist_lm, col="red", lty=2)
Save Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "Genetic_v_Geog_distance_europe_MAF_1.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
dev.off()
Use library MASS for plot
library(MASS)
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
#myPal <-
# colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
title("Isolation by distance")
Save plot
library(MASS)
CairoPDF(here(
"output", "europe", "fst", "IDB_PlotFromMASS_density_MAF1.pdf"))
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
#myPal <-
# colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
png(here("output", "europe", "fst", "ibd2_MAF1.png"),
width = 5,
height = 4,
units='in',
res = 300)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.4fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
Subset by region
Add subset column
iberia <- c("Spain", "Portugal")
italy <- c("Italy")
central <- c("Albania", "Croatia", "Greece")
east <- c("Armenia", "Georgia", "Russia", "Ukraine")
other <- c("Bulgaria", "France", "Malta", "Romania", "Serbia", "Slovenia", "Turkey")
library(dplyr)
filtered_sampling_loc2 <- mutate(filtered_sampling_loc, subset_cat = factor(case_when(Country %in% iberia ~ "Iberian Peninsula", Country %in% italy ~ "Italy", Country %in% central ~ "Albania, Croatia, Greece", Country %in% east ~ "Eastern", Country %in% other ~ "Central", TRUE ~ NA_character_)))
Select subsets
countries_for_subsets <- filtered_sampling_loc2 %>%
group_by(subset_cat) %>%
filter(subset_cat !="Central") %>%
pull(subset_cat) %>%
unique()
countries_for_subsets
## [1] Iberian Peninsula Italy Albania, Croatia, Greece
## [4] Eastern
## Levels: Albania, Croatia, Greece Central Eastern Iberian Peninsula Italy
results <- list()
for (country in countries_with_3_pops) { # Extract abbreviations for the country abbreviations <- filtered_sampling_loc %>% filter(Country == country) %>% pull(Abbreviation)
# Subset the data subset_data <- data %>% filter(row_index %in% abbreviations & col_index %in% abbreviations)
# Perform linear regression lm_model <- lm(FST ~ Distance, data = subset_data) results[[country]] <- list( equation = sprintf(“y = %.5fx + %.3f”, coef(lm_model)[2], coef(lm_model)[1]), r2 = sprintf(“R^2 = %.2f”, summary(lm_model)$r.squared) ) }
results Do test for each country
results <- list()
for (subset_cat in countries_for_subsets) {
# Extract abbreviations for the country
abbreviations <- filtered_sampling_loc2 %>%
filter(subset_cat == subset_cat) %>%
pull(Abbreviation)
# Subset the data
subset_data <- data %>%
filter(row_index %in% abbreviations & col_index %in% abbreviations)
# Perform linear regression
lm_model <- lm(FST ~ Distance, data = subset_data)
results[[subset_cat]] <- list(
equation = sprintf("y = %.5fx + %.3f", coef(lm_model)[2], coef(lm_model)[1]),
r2 = sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
)
}
results
## $`Iberian Peninsula`
## $`Iberian Peninsula`$equation
## [1] "y = 0.00001x + 0.088"
##
## $`Iberian Peninsula`$r2
## [1] "R^2 = 0.03"
##
##
## $Italy
## $Italy$equation
## [1] "y = 0.00001x + 0.088"
##
## $Italy$r2
## [1] "R^2 = 0.03"
##
##
## $`Albania, Croatia, Greece`
## $`Albania, Croatia, Greece`$equation
## [1] "y = 0.00001x + 0.088"
##
## $`Albania, Croatia, Greece`$r2
## [1] "R^2 = 0.03"
##
##
## $Eastern
## $Eastern$equation
## [1] "y = 0.00001x + 0.088"
##
## $Eastern$r2
## [1] "R^2 = 0.03"
Merge the data
data_merged <- data %>%
left_join(filtered_sampling_loc2[, c("Pop_City", "subset_cat", "Abbreviation")], by = c("row_index" = "Abbreviation")) %>%
rename(Country1 = subset_cat) %>%
left_join(filtered_sampling_loc2[, c("Pop_City", "subset_cat", "Abbreviation")], by = c("col_index" = "Abbreviation")) %>%
dplyr::select(-Pop_City.x, -Pop_City.y) %>%
filter(Country1 == subset_cat) # Ensures the data is within the same country
# Filter to get the coutries with 3 or more sampling localities
to_include <- c("Italy", "Albania, Croatia, Greece", "Eastern", "Iberian Peninsula")
# Filter
data_filtered <- data_merged %>%
group_by(Country1) %>%
filter(n() >= 3 & Country1 %in% to_include) %>%
ungroup()
Calculate linear regression for each country
regression_results <- data_filtered %>%
group_by(Country1) %>%
do(model = lm(FST ~ Distance, data = .)) %>%
rowwise() %>%
mutate(equation = sprintf("y = %.3fx + %.3f", coef(model)[2], coef(model)[1]), r2 = sprintf("R^2 = %.2f", summary(model)$r.squared))
Plot it
ggplot(data_filtered, aes(x = Distance, y = FST)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ Country1, scales = "free", ncol = 2) +
geom_text(data = regression_results, aes(label = paste(equation, r2, sep = "\n"), x = Inf, y = Inf),
vjust = 2, hjust = 2, size = 3.5, inherit.aes = FALSE) +
labs(title = "FST vs Distance by Region",
x = "Distance",
y = "FST") +
scale_x_continuous(labels = scales::comma) +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'
ggsave(
filename = here("output", "europe", "fst", "r_01", "fst_by_distance_Europe_subsets_r01_LD2.pdf"),
width = 6,
height = 8,
units = "in"
)
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.01_b \
--keep-fam output/fst/subsets/pops_iberia.txt \
--make-bed \
--out output/fst/subsets/mantel_iberia \
--silent;
grep 'samples\|variants\|remaining' output/fst/subsets/mantel_iberia.log
20968 variants loaded from .bim file. –keep-fam: 52 people remaining. Total genotyping rate in remaining samples is 0.974247. 20968 variants and 52 people pass filters and QC.
Then convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/fst/subsets/mantel_iberia \
--recodeA \
--out output/fst/subsets/mantel_iberia \
--silent;
grep 'samples\|variants\|remaining' output/fst/subsets/mantel_iberia.log
20968 variants loaded from .bim file. 20968 variants and 52 people pass filters and QC.
Import the data and covert it to genind format
# import the data
albo <-
read.PLINK(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/mantel_iberia.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
# convert to genind
albo2 <- gl2gi(albo, probar = TRUE, verbose = NULL)
Save
saveRDS(albo2, here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/iberia_MAF1.rds"
))
Load it
albo2 <- readRDS(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/iberia_MAF1.rds"
))
Import Sample locations
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
The fam file
fam_file <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/mantel_iberia.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 BAR 279 0 0 0 -9
## 2 BAR 280 0 0 0 -9
## 3 BAR 281 0 0 0 -9
## 4 BAR 282 0 0 0 -9
## 5 BAR 283 0 0 0 -9
## 6 BAR 284 0 0 0 -9
Merge
# Join with sampling_loc to get sampling localities
loc_albo <- fam_data |>
left_join(sampling_loc, by = c("FamilyID" = "Abbreviation"))
head(loc_albo)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype Pop_City Country
## 1 BAR 279 0 0 0 -9 Barcelona Spain
## 2 BAR 280 0 0 0 -9 Barcelona Spain
## 3 BAR 281 0 0 0 -9 Barcelona Spain
## 4 BAR 282 0 0 0 -9 Barcelona Spain
## 5 BAR 283 0 0 0 -9 Barcelona Spain
## 6 BAR 284 0 0 0 -9 Barcelona Spain
## Latitude Longitude Continent Year Region Subregion order order2
## 1 41.3851 2.1734 Europe 2018 Southern Europe West Europe 16 8
## 2 41.3851 2.1734 Europe 2018 Southern Europe West Europe 16 8
## 3 41.3851 2.1734 Europe 2018 Southern Europe West Europe 16 8
## 4 41.3851 2.1734 Europe 2018 Southern Europe West Europe 16 8
## 5 41.3851 2.1734 Europe 2018 Southern Europe West Europe 16 8
## 6 41.3851 2.1734 Europe 2018 Southern Europe West Europe 16 8
## orderold
## 1 8
## 2 8
## 3 8
## 4 8
## 5 8
## 6 8
Get the latitude and longitude
## [,1] [,2]
## [1,] 41.3851 2.1734
## [2,] 41.3851 2.1734
## [3,] 41.3851 2.1734
## [4,] 41.3851 2.1734
## [5,] 41.3851 2.1734
## [6,] 41.3851 2.1734
Add jitter
## x y
## [1,] 41.38322 2.154412
## [2,] 41.37141 2.194045
## [3,] 41.37607 2.173582
## [4,] 41.39055 2.159370
## [5,] 41.39245 2.175299
## [6,] 41.39631 2.163812
Add to object
Save
saveRDS(
albo2,
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/iberia_albo2.rds"
)
)
Convert the data
##
## Converting data from a genind to a genpop object...
##
## ...done.
Get 1 mosquito per population, it is just to get the geographical coordinates
unique_populations <- unique(albo2@pop)
selected_individuals <- integer(length(unique_populations))
for (i in seq_along(unique_populations)) {
inds_in_pop <- which(albo2@pop == unique_populations[i])
selected_individuals[i] <- sample(inds_in_pop, 1)
}
albo2_subset <- albo2[selected_individuals, ]
Mantel test
Dgen <- dist.genpop(toto,method=2)
Dgeo <- dist(albo2_subset$other$xy)
ibd <- mantel.randtest(Dgen,Dgeo)
ibd
## Monte-Carlo test
## Call: mantel.randtest(m1 = Dgen, m2 = Dgeo)
##
## Observation: 0.01596436
##
## Based on 999 replicates
## Simulated p-value: 0.41
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 0.104039999 -0.006539113 0.046784121
Observation: 0.01440828
Simulated p-value (0.434) is not statistically signficant. Alternative
hypothesis: greater. The test was one-sided, checking if the observed
correlation is greater than what would be expected by chance. Std.Obs
Expectation Variance 0.049392821 0.003712072 0.046895623
Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "subsets", "simIBD_iberia_MAF1.pdf"))
plot(ibd)
dev.off()
plot(Dgeo, Dgen)
# A linear regression model (lm stands for "linear model") is fitted, with the genetic distances (Dgen) as the response variable and the geographic distances (Dgeo) as the predictor. The distances are transformed into vectors using as.vector because the dist function produces a matrix-like structure, but the linear regression function lm requires vectors.
dist_lm <- lm(as.vector(Dgen) ~ as.vector(Dgeo))
abline(dist_lm, col="red", lty=2)
Save Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "subsets", "Genetic_v_Geog_distance_iberia_MAF_1.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
dev.off()
Use library MASS for plot
library(MASS)
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
#myPal <-
# colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
title("Isolation by distance")
Save plot
library(MASS)
CairoPDF(here(
"output", "europe", "fst", "subsets", "IDB_PlotFromMASS_density_iberia_MAF1.pdf"))
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
#myPal <-
# colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
png(here("output", "europe", "fst", "subsets", "ibd2_iberia_MAF1.png"),
width = 5,
height = 4,
units='in',
res = 300)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.4fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.01_b \
--keep-fam output/fst/subsets/pops_acg.txt \
--make-bed \
--out output/fst/subsets/mantel_acg \
--silent;
grep 'samples\|variants\|remaining' output/fst/subsets/mantel_acg.log
20968 variants loaded from .bim file. –keep-fam: 59 people remaining. Total genotyping rate in remaining samples is 0.973622. 20968 variants and 59 people pass filters and QC.
Then convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/fst/subsets/mantel_acg \
--recodeA \
--out output/fst/subsets/mantel_acg \
--silent;
grep 'samples\|variants\|remaining' output/fst/subsets/mantel_acg.log
20968 variants loaded from .bim file. 20968 variants and 59 people pass filters and QC.
Import the data and covert it to genind format
# import the data
albo <-
read.PLINK(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/mantel_acg.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
# convert to genind
albo2 <- gl2gi(albo, probar = TRUE, verbose = NULL)
Save
Load it
albo2 <- readRDS(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/agc_MAF1.rds"
))
Import Sample locations
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
The fam file
fam_file <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/mantel_acg.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 TIR 193 0 0 0 -9
## 2 TIR 194 0 0 0 -9
## 3 TIR 195 0 0 0 -9
## 4 TIR 196 0 0 0 -9
## 5 GRC 2202 0 0 1 -9
## 6 CRO 711 0 0 0 -9
Merge
# Join with sampling_loc to get sampling localities
loc_albo <- fam_data |>
left_join(sampling_loc, by = c("FamilyID" = "Abbreviation"))
head(loc_albo)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype Pop_City Country
## 1 TIR 193 0 0 0 -9 Tirana Albania
## 2 TIR 194 0 0 0 -9 Tirana Albania
## 3 TIR 195 0 0 0 -9 Tirana Albania
## 4 TIR 196 0 0 0 -9 Tirana Albania
## 5 GRC 2202 0 0 1 -9 Chania Greece
## 6 CRO 711 0 0 0 -9 Dubrovnik Croatia
## Latitude Longitude Continent Year Region Subregion order order2
## 1 41.31473 19.83172 Europe 2017 Southern Europe East Europe 34 26
## 2 41.31473 19.83172 Europe 2017 Southern Europe East Europe 34 26
## 3 41.31473 19.83172 Europe 2017 Southern Europe East Europe 34 26
## 4 41.31473 19.83172 Europe 2017 Southern Europe East Europe 34 26
## 5 35.51448 24.01796 Europe 2019 Southern Europe East Europe 37 29
## 6 42.60654 18.22661 Europe 2017 Southern Europe East Europe 31 23
## orderold
## 1 26
## 2 26
## 3 26
## 4 26
## 5 29
## 6 23
Get the latitude and longitude
## [,1] [,2]
## [1,] 41.31473 19.83172
## [2,] 41.31473 19.83172
## [3,] 41.31473 19.83172
## [4,] 41.31473 19.83172
## [5,] 35.51448 24.01796
## [6,] 42.60654 18.22661
Add jitter
## x y
## [1,] 41.31548 19.83117
## [2,] 41.31652 19.83194
## [3,] 41.31406 19.83051
## [4,] 41.31448 19.83003
## [5,] 35.51458 24.01847
## [6,] 42.60811 18.22540
Add to object
Save
saveRDS(
albo2,
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/acg_albo2.rds"
)
)
Convert the data
##
## Converting data from a genind to a genpop object...
##
## ...done.
Get 1 mosquito per population, it is just to get the geographical coordinates
unique_populations <- unique(albo2@pop)
selected_individuals <- integer(length(unique_populations))
for (i in seq_along(unique_populations)) {
inds_in_pop <- which(albo2@pop == unique_populations[i])
selected_individuals[i] <- sample(inds_in_pop, 1)
}
albo2_subset <- albo2[selected_individuals, ]
Mantel test
Dgen <- dist.genpop(toto,method=2)
Dgeo <- dist(albo2_subset$other$xy)
ibd <- mantel.randtest(Dgen,Dgeo)
ibd
## Monte-Carlo test
## Call: mantel.randtest(m1 = Dgen, m2 = Dgeo)
##
## Observation: -0.4976328
##
## Based on 999 replicates
## Simulated p-value: 0.985
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## -1.699080884 -0.001331756 0.085322243
Observation: -0.4978599
Simulated p-value (0.991) is not statistically signficant. Alternative
hypothesis: greater. The test was one-sided, checking if the observed
correlation is greater than what would be expected by chance. Std.Obs
Expectation Variance -1.688754683 0.007006367 0.089375735
Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "subsets", "simIBD_acg_MAF1.pdf"))
plot(ibd)
dev.off()
plot(Dgeo, Dgen)
# A linear regression model (lm stands for "linear model") is fitted, with the genetic distances (Dgen) as the response variable and the geographic distances (Dgeo) as the predictor. The distances are transformed into vectors using as.vector because the dist function produces a matrix-like structure, but the linear regression function lm requires vectors.
dist_lm <- lm(as.vector(Dgen) ~ as.vector(Dgeo))
abline(dist_lm, col="red", lty=2)
Save Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "subsets", "Genetic_v_Geog_distance_acg_MAF_1.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
dev.off()
Use library MASS for plot
library(MASS)
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
#myPal <-
# colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
title("Isolation by distance")
Save plot
library(MASS)
CairoPDF(here(
"output", "europe", "fst", "subsets", "IDB_PlotFromMASS_density_acg_MAF1.pdf"))
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
#myPal <-
# colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
png(here("output", "europe", "fst", "subsets", "ibd2_acg_MAF1.png"),
width = 5,
height = 4,
units='in',
res = 300)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.4fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.01_b \
--keep-fam output/fst/subsets/pops_italy.txt \
--make-bed \
--out output/fst/subsets/mantel_italy \
--silent;
grep 'samples\|variants\|remaining' output/fst/subsets/mantel_italy.log
20968 variants loaded from .bim file. –keep-fam: 97 people remaining. Total genotyping rate in remaining samples is 0.959471. 20968 variants and 97 people pass filters and QC.
Then convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/fst/subsets/mantel_italy \
--recodeA \
--out output/fst/subsets/mantel_italy \
--silent;
grep 'samples\|variants\|remaining' output/fst/subsets/mantel_italy.log
20968 variants loaded from .bim file. 20968 variants and 97 people pass filters and QC.
Import the data and covert it to genind format
# import the data
albo <-
read.PLINK(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/mantel_italy.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
# convert to genind
albo2 <- gl2gi(albo, probar = TRUE, verbose = NULL)
Save
saveRDS(albo2, here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/italy_MAF1.rds"
))
Load it
albo2 <- readRDS(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/italy_MAF1.rds"
))
Import Sample locations
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
The fam file
fam_file <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/mantel_italy.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 TRE 1185 0 0 0 -9
## 2 TRE 1186 0 0 0 -9
## 3 TRE 1187 0 0 0 -9
## 4 TRE 1188 0 0 0 -9
## 5 TRE 1189 0 0 2 -9
## 6 TRE 1190 0 0 2 -9
Merge
# Join with sampling_loc to get sampling localities
loc_albo <- fam_data |>
left_join(sampling_loc, by = c("FamilyID" = "Abbreviation"))
head(loc_albo)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype Pop_City Country
## 1 TRE 1185 0 0 0 -9 Trentino Italy
## 2 TRE 1186 0 0 0 -9 Trentino Italy
## 3 TRE 1187 0 0 0 -9 Trentino Italy
## 4 TRE 1188 0 0 0 -9 Trentino Italy
## 5 TRE 1189 0 0 2 -9 Trentino Italy
## 6 TRE 1190 0 0 2 -9 Trentino Italy
## Latitude Longitude Continent Year Region Subregion order order2
## 1 46.05917 11.11722 Europe 2020 Southern Europe West Europe 22 14
## 2 46.05917 11.11722 Europe 2020 Southern Europe West Europe 22 14
## 3 46.05917 11.11722 Europe 2020 Southern Europe West Europe 22 14
## 4 46.05917 11.11722 Europe 2020 Southern Europe West Europe 22 14
## 5 46.05917 11.11722 Europe 2020 Southern Europe West Europe 22 14
## 6 46.05917 11.11722 Europe 2020 Southern Europe West Europe 22 14
## orderold
## 1 14
## 2 14
## 3 14
## 4 14
## 5 14
## 6 14
Get the latitude and longitude
## [,1] [,2]
## [1,] 46.05917 11.11722
## [2,] 46.05917 11.11722
## [3,] 46.05917 11.11722
## [4,] 46.05917 11.11722
## [5,] 46.05917 11.11722
## [6,] 46.05917 11.11722
Add jitter
## x y
## [1,] 46.05617 11.10367
## [2,] 46.04763 11.11973
## [3,] 46.05780 11.12035
## [4,] 46.05396 11.10678
## [5,] 46.05544 11.11029
## [6,] 46.07178 11.10345
Add to object
Save
saveRDS(
albo2,
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/italy_albo2.rds"
)
)
Convert the data
##
## Converting data from a genind to a genpop object...
##
## ...done.
Get 1 mosquito per population, it is just to get the geographical coordinates
unique_populations <- unique(albo2@pop)
selected_individuals <- integer(length(unique_populations))
for (i in seq_along(unique_populations)) {
inds_in_pop <- which(albo2@pop == unique_populations[i])
selected_individuals[i] <- sample(inds_in_pop, 1)
}
albo2_subset <- albo2[selected_individuals, ]
Mantel test
Dgen <- dist.genpop(toto,method=2)
Dgeo <- dist(albo2_subset$other$xy)
ibd <- mantel.randtest(Dgen,Dgeo)
ibd
## Monte-Carlo test
## Call: mantel.randtest(m1 = Dgen, m2 = Dgeo)
##
## Observation: 0.03952138
##
## Based on 999 replicates
## Simulated p-value: 0.456
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 0.12647946 0.01022908 0.05363730
Observation: 0.03899139
Simulated p-value (0.431) is not statistically signficant. Alternative
hypothesis: greater. The test was one-sided, checking if the observed
correlation is greater than what would be expected by chance. Std.Obs
Expectation Variance 0.161017859 0.001150481 0.055229986
Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "subsets", "simIBD_italy_MAF1.pdf"))
plot(ibd)
dev.off()
plot(Dgeo, Dgen)
# A linear regression model (lm stands for "linear model") is fitted, with the genetic distances (Dgen) as the response variable and the geographic distances (Dgeo) as the predictor. The distances are transformed into vectors using as.vector because the dist function produces a matrix-like structure, but the linear regression function lm requires vectors.
dist_lm <- lm(as.vector(Dgen) ~ as.vector(Dgeo))
abline(dist_lm, col="red", lty=2)
Save Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "subsets", "Genetic_v_Geog_distance_italy_MAF_1.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
dev.off()
Use library MASS for plot
library(MASS)
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
#myPal <-
# colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
title("Isolation by distance")
Save plot
library(MASS)
CairoPDF(here(
"output", "europe", "fst", "subsets", "IDB_PlotFromMASS_density_italy_MAF1.pdf"))
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
#myPal <-
# colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
png(here("output", "europe", "fst", "subsets", "ibd2_italy_MAF1.png"),
width = 5,
height = 4,
units='in',
res = 300)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.4fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.01_b \
--keep-fam output/fst/subsets/pops_east.txt \
--make-bed \
--out output/fst/subsets/mantel_east \
--silent;
grep 'samples\|variants\|remaining' output/fst/subsets/mantel_east.log
20968 variants loaded from .bim file. –keep-fam: 106 people remaining. Total genotyping rate in remaining samples is 0.976328. 20968 variants and 106 people pass filters and QC.
Then convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/fst/subsets/mantel_east \
--recodeA \
--out output/fst/subsets/mantel_east \
--silent;
grep 'samples\|variants\|remaining' output/fst/subsets/mantel_east.log
20968 variants loaded from .bim file. 20968 variants and 106 people pass filters and QC.
Import the data and covert it to genind format
# import the data
albo <-
read.PLINK(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/mantel_east.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
# convert to genind
albo2 <- gl2gi(albo, probar = TRUE, verbose = NULL)
Save
Load it
albo2 <- readRDS(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/east_MAF1.rds"
))
Import Sample locations
## Pop_City Country Latitude Longitude Continent Abbreviation Year
## 1 Berlin, NJ USA 39.79081 -74.9291 Americas BER 2018
## 2 Columbus, OH USA 39.97170 -82.9071 Americas COL 2015
## 3 Palm Beach USA 26.70560 -80.0364 Americas PAL 2018
## 4 Houston, TX USA 29.75491 -95.3505 Americas HOU 2018
## 5 Los Angeles USA 34.05220 -118.2437 Americas LOS 2018
## 6 Manaus, AM Brazil -3.09161 -60.0325 Americas MAU 2017
## Region Subregion order order2 orderold
## 1 North America 1 NA 75
## 2 North America 2 NA 76
## 3 North America 3 NA 77
## 4 North America 4 NA 78
## 5 North America 5 NA 79
## 6 South America 6 NA 80
The fam file
fam_file <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/mantel_east.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 SOC 1065 0 0 0 -9
## 2 SOC 1066 0 0 0 -9
## 3 SOC 1067 0 0 0 -9
## 4 SOC 1068 0 0 0 -9
## 5 SOC 1069 0 0 0 -9
## 6 SOC 1070 0 0 0 -9
Merge
# Join with sampling_loc to get sampling localities
loc_albo <- fam_data |>
left_join(sampling_loc, by = c("FamilyID" = "Abbreviation"))
head(loc_albo)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype Pop_City Country
## 1 SOC 1065 0 0 0 -9 Sochi Russia
## 2 SOC 1066 0 0 0 -9 Sochi Russia
## 3 SOC 1067 0 0 0 -9 Sochi Russia
## 4 SOC 1068 0 0 0 -9 Sochi Russia
## 5 SOC 1069 0 0 0 -9 Sochi Russia
## 6 SOC 1070 0 0 0 -9 Sochi Russia
## Latitude Longitude Continent Year Region Subregion order order2
## 1 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 46 38
## 2 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 46 38
## 3 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 46 38
## 4 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 46 38
## 5 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 46 38
## 6 43.60042 39.74533 Europe 2021 Eastern Europe East Europe 46 38
## orderold
## 1 38
## 2 38
## 3 38
## 4 38
## 5 38
## 6 38
Get the latitude and longitude
## [,1] [,2]
## [1,] 43.60042 39.74533
## [2,] 43.60042 39.74533
## [3,] 43.60042 39.74533
## [4,] 43.60042 39.74533
## [5,] 43.60042 39.74533
## [6,] 43.60042 39.74533
Add jitter
## x y
## [1,] 43.60005 39.74616
## [2,] 43.60138 39.74376
## [3,] 43.60235 39.74629
## [4,] 43.60164 39.74395
## [5,] 43.60035 39.74344
## [6,] 43.60231 39.74547
Add to object
Save
saveRDS(
albo2,
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/subsets/east_albo2.rds"
)
)
Convert the data
##
## Converting data from a genind to a genpop object...
##
## ...done.
Get 1 mosquito per population, it is just to get the geographical coordinates
unique_populations <- unique(albo2@pop)
selected_individuals <- integer(length(unique_populations))
for (i in seq_along(unique_populations)) {
inds_in_pop <- which(albo2@pop == unique_populations[i])
selected_individuals[i] <- sample(inds_in_pop, 1)
}
albo2_subset <- albo2[selected_individuals, ]
Mantel test
Dgen <- dist.genpop(toto,method=2)
Dgeo <- dist(albo2_subset$other$xy)
ibd <- mantel.randtest(Dgen,Dgeo)
ibd
## Monte-Carlo test
## Call: mantel.randtest(m1 = Dgen, m2 = Dgeo)
##
## Observation: 0.149561
##
## Based on 999 replicates
## Simulated p-value: 0.296
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## 0.591579962 -0.004727425 0.068020487
Observation: 0.1495636
Simulated p-value (0.325) is not statistically signficant. Alternative
hypothesis: greater. The test was one-sided, checking if the observed
correlation is greater than what would be expected by chance. Std.Obs
Expectation Variance 0.55289454 0.00886304 0.06476002
Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "subsets", "simIBD_east_MAF1.pdf"))
plot(ibd)
dev.off()
plot(Dgeo, Dgen)
# A linear regression model (lm stands for "linear model") is fitted, with the genetic distances (Dgen) as the response variable and the geographic distances (Dgeo) as the predictor. The distances are transformed into vectors using as.vector because the dist function produces a matrix-like structure, but the linear regression function lm requires vectors.
dist_lm <- lm(as.vector(Dgen) ~ as.vector(Dgeo))
abline(dist_lm, col="red", lty=2)
Save Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"output", "europe", "fst", "subsets", "Genetic_v_Geog_distance_east_MAF_1.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
dev.off()
Use library MASS for plot
library(MASS)
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
#myPal <-
# colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
title("Isolation by distance")
Save plot
library(MASS)
CairoPDF(here(
"output", "europe", "fst", "subsets", "IDB_PlotFromMASS_density_east_MAF1_equ.pdf"))
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
#myPal <-
# colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
png(here("output", "europe", "fst", "subsets", "ibd2_east_MAF1.png"),
width = 5,
height = 4,
units='in',
res = 300)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.4fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 6350564 339.2 19014132 1015.5 23767665 1269.4
## Vcells 14639377 111.7 198256644 1512.6 309601531 2362.1
Import the data
Set 1
Set1 <- readRDS(
here(
"output", "europe", "fst", "r_01", "LD2_country_europe_r01.rds"
)
)
head(Set1)
## # A tibble: 6 × 2
## Country mean_fst
## <chr> <dbl>
## 1 Albania 0.1
## 2 Armenia 0.08
## 3 Bulgaria 0.1
## 4 Croatia 0.09
## 5 France 0.08
## 6 Georgia 0.11
Set 2
## # A tibble: 6 × 2
## Country mean_fst
## <chr> <dbl>
## 1 Albania 0.1
## 2 Armenia 0.08
## 3 Bulgaria 0.1
## 4 Croatia 0.09
## 5 France 0.08
## 6 Georgia 0.11
Set 3
## # A tibble: 6 × 2
## Country mean_fst
## <chr> <dbl>
## 1 Albania 0.1
## 2 Armenia 0.08
## 3 Bulgaria 0.1
## 4 Croatia 0.09
## 5 France 0.08
## 6 Georgia 0.11
Merge the datasets
# Add an identifier column to each dataset
Set1$dataset <- "Set1"
Set2$dataset <- "Set2"
Set3$dataset <- "Set3"
# Merge the datasets
combined_data <- rbind(Set1, Set2, Set3)
# Assign a numeric index for plotting based on ordered countries
combined_data <- combined_data %>%
arrange(Country) %>%
group_by(Country) %>%
mutate(index = dense_rank(Country))
# Custom color mapping
color_mapping <- scale_color_manual(
values = c(Set1 = "green3", Set2 = "blue", Set3 = "orange"),
name = "Dataset"
)
# Shapes
shape_mapping <- scale_shape_manual(
values = c(Set1 = 16, Set2 = 17, Set3 = 18),
name = "Dataset"
)
# Scatter plot with custom colors and shapes
plot <- ggplot(combined_data, aes(x = Country, y = mean_fst, color = dataset, shape = dataset)) +
geom_point(size = 3) +
geom_smooth(method = "lm", se = FALSE, aes(group = dataset)) + # Fitted line per dataset
labs(
title = "",
x = "Countries",
y = "Mean Fst"
) +
color_mapping +
shape_mapping +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(legend.position = "top")
# Display the plot
print(plot)
## `geom_smooth()` using formula = 'y ~ x'
Create list SNP pops with at least 4 individuals shared with microsats
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
awk '{print $1}' output/snps_sets/r2_0.01_b.fam | sort | uniq -c | awk '{print $2, $1}' | awk '$2 >= 4 {print}' | awk '{print $1}' > output/fst/pops_4bfst.txt;
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
head output/fst/pops_4bfst.txt;
wc -l output/fst/pops_4bfst.txt
## ALD
## ALU
## ALV
## ARM
## BAR
## BRE
## BUL
## CES
## CRO
## DES
## 40 output/fst/pops_4bfst.txt
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
head output/fst/pops_4bfst_overlap.txt;
wc output/fst/pops_4bfst_overlap.txt
## ALD
## BAR
## BUL
## CRO
## FRS
## GES
## GRA
## GRC
## ITB
## ITP
## 25 25 125 output/fst/pops_4bfst_overlap.txt
We have 25 populations with both SNP & microsat data
First load plink
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.01_b \
--keep-fam output/fst/pops_4bfst_overlap.txt \
--recodeA \
--out output/fst/overlap \
--silent;
grep 'samples\|variants\|remaining' output/fst/overlap.log
20968 variants loaded from .bim file. –keep-fam: 242 people remaining. Total genotyping rate in remaining samples is 0.973943. 20968 variants and 242 people pass filters and QC.
Look at https://rdrr.io/cran/StAMPP/man/stamppFst.html for details of Fst estimations
LD2 <-
read.PLINK(
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/overlap.raw"
),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
##
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## Length Class Mode
## 1 genlight S4
This chunk will take a couple minutes to run.
# run stampp. If you want to run with bootstraps and nclusters use the HPC. It will run out of memory on a 32Gb laptop
LD2_3 <- stamppFst(LD2_2, 1, 95, 1)
Save it
saveRDS(
LD2_3, here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/LD2_MAF1_overlap.rds"
)
)
To load it
LD2_3 <- readRDS(
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/LD2_MAF1_overlap.rds"
)
)
Now lets look at the object
## SOC GES STS ITR
## Min. :0.01127 Min. :0.09666 Min. :0.03010 Min. :0.01703
## 1st Qu.:0.10175 1st Qu.:0.10817 1st Qu.:0.04447 1st Qu.:0.03965
## Median :0.11561 Median :0.12145 Median :0.06819 Median :0.05552
## Mean :0.11638 Mean :0.12689 Mean :0.06935 Mean :0.05977
## 3rd Qu.:0.13102 3rd Qu.:0.13765 3rd Qu.:0.07530 3rd Qu.:0.07254
## Max. :0.22672 Max. :0.23290 Max. :0.17530 Max. :0.15959
## NA's :1 NA's :2 NA's :3 NA's :4
## GRC BAR BUL CRO
## Min. :0.02418 Min. :0.04376 Min. :0.05680 Min. :0.04540
## 1st Qu.:0.06578 1st Qu.:0.08255 1st Qu.:0.06811 1st Qu.:0.05812
## Median :0.07974 Median :0.10140 Median :0.09262 Median :0.07012
## Mean :0.08455 Mean :0.10467 Mean :0.09302 Mean :0.08014
## 3rd Qu.:0.09938 3rd Qu.:0.12147 3rd Qu.:0.10293 3rd Qu.:0.09397
## Max. :0.18931 Max. :0.21778 Max. :0.19614 Max. :0.17329
## NA's :5 NA's :6 NA's :7 NA's :8
## GRA ITB MAL SPM
## Min. :0.05215 Min. :0.05193 Min. :0.02244 Min. :0.006262
## 1st Qu.:0.06488 1st Qu.:0.06287 1st Qu.:0.03897 1st Qu.:0.052607
## Median :0.07909 Median :0.08386 Median :0.06256 Median :0.066148
## Mean :0.08549 Mean :0.09413 Mean :0.06264 Mean :0.077446
## 3rd Qu.:0.09821 3rd Qu.:0.10322 3rd Qu.:0.07392 3rd Qu.:0.093330
## Max. :0.18592 Max. :0.21971 Max. :0.16130 Max. :0.213896
## NA's :9 NA's :10 NA's :11 NA's :12
## TUA TUH ALD FRS
## Min. :0.02844 Min. :0.03796 Min. :0.07934 Min. :0.03937
## 1st Qu.:0.05808 1st Qu.:0.04592 1st Qu.:0.09148 1st Qu.:0.04284
## Median :0.06820 Median :0.07927 Median :0.11690 Median :0.06241
## Mean :0.08408 Mean :0.07715 Mean :0.11822 Mean :0.07281
## 3rd Qu.:0.10014 3rd Qu.:0.08713 3rd Qu.:0.12858 3rd Qu.:0.07965
## Max. :0.19864 Max. :0.17311 Max. :0.21576 Max. :0.16404
## NA's :13 NA's :14 NA's :15 NA's :16
## ITP POP ROS SER
## Min. :0.03615 Min. :0.03586 Min. :0.05906 Min. :0.1529
## 1st Qu.:0.05294 1st Qu.:0.05241 1st Qu.:0.09236 1st Qu.:0.1877
## Median :0.07859 Median :0.06361 Median :0.09956 Median :0.2100
## Mean :0.08048 Mean :0.07861 Mean :0.11744 Mean :0.2029
## 3rd Qu.:0.08405 3rd Qu.:0.08702 3rd Qu.:0.13313 3rd Qu.:0.2252
## Max. :0.17461 Max. :0.16508 Max. :0.20308 Max. :0.2386
## NA's :17 NA's :18 NA's :19 NA's :20
## SLO SPC SPB SPS
## Min. :0.05438 Min. :0.007113 Min. :0.1281 Min. : NA
## 1st Qu.:0.05841 1st Qu.:0.039494 1st Qu.:0.1281 1st Qu.: NA
## Median :0.06244 Median :0.071875 Median :0.1281 Median : NA
## Mean :0.06970 Mean :0.071875 Mean :0.1281 Mean :NaN
## 3rd Qu.:0.07737 3rd Qu.:0.104256 3rd Qu.:0.1281 3rd Qu.: NA
## Max. :0.09230 Max. :0.136637 Max. :0.1281 Max. : NA
## NA's :21 NA's :22 NA's :23 NA's :24
If you want you can save the fst values as csv.
# Save it
write.csv(LD2_df, file = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/LD2_df_01_overlap.csv"))
Check the Fst values
## SOC GES STS ITR GRC BAR BUL CRO GRA ITB
## SOC NA NA NA NA NA NA NA NA NA NA
## GES 0.01126978 NA NA NA NA NA NA NA NA NA
## STS 0.10591967 0.1092791 NA NA NA NA NA NA NA NA
## ITR 0.09551819 0.1005738 0.03982659 NA NA NA NA NA NA NA
## GRC 0.11671692 0.1220816 0.07434949 0.05710568 NA NA NA NA NA NA
## BAR 0.13602044 0.1427050 0.09268239 0.06549046 0.1009452 NA NA NA NA NA
## MAL SPM TUA TUH ALD FRS ITP POP ROS SER SLO SPC SPB SPS
## SOC NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## GES NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## STS NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## ITR NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## GRC NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## BAR NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Convert the data into a matrix.
## SOC GES STS ITR GRC BAR
## SOC NA 0.01126978 0.10591967 0.09551819 0.11671692 0.13602044
## GES 0.01126978 NA 0.10927914 0.10057375 0.12208159 0.14270495
## STS 0.10591967 0.10927914 NA 0.03982659 0.07434949 0.09268239
## ITR 0.09551819 0.10057375 0.03982659 NA 0.05710568 0.06549046
## GRC 0.11671692 0.12208159 0.07434949 0.05710568 NA 0.10094517
## BAR 0.13602044 0.14270495 0.09268239 0.06549046 0.10094517 NA
## BUL CRO GRA ITB MAL SPM
## SOC 0.12832355 0.11495017 0.10889177 0.13371682 0.09458982 0.12205750
## GES 0.13384104 0.12036716 0.11376844 0.13892193 0.10125102 0.12971049
## STS 0.07799253 0.06354859 0.07077473 0.07120783 0.03980952 0.06532393
## ITR 0.05392586 0.04949314 0.04798060 0.06838917 0.01702597 0.06977034
## GRC 0.09338818 0.05248230 0.02417956 0.09782363 0.05730552 0.09532555
## BAR 0.10885970 0.09393694 0.09225317 0.12946121 0.07823715 0.11048101
## TUA TUH ALD FRS ITP POP
## SOC 0.11826743 0.10435177 0.13932531 0.10279814 0.10070244 0.09884633
## GES 0.12401729 0.11137798 0.14452571 0.10779885 0.10598420 0.10461590
## STS 0.07272436 0.05322703 0.09733023 0.03009784 0.04109490 0.04320132
## ITR 0.03907520 0.03526920 0.08060398 0.03984728 0.02050277 0.03645268
## GRC 0.07973727 0.07020895 0.07388877 0.07086343 0.06244349 0.06910929
## BAR 0.06317696 0.04376254 0.12083140 0.08889532 0.08100898 0.08718046
## ROS SER SLO SPC SPB SPS
## SOC 0.13423764 0.2267217 0.09140753 0.12576249 0.11561471 0.15080720
## GES 0.13912010 0.2329000 0.09665973 0.13312607 0.12082033 0.15814054
## STS 0.07530389 0.1752962 0.04446887 0.06818588 0.06120598 0.09859755
## ITR 0.07758959 0.1595922 0.04133080 0.08683449 0.07820355 0.07085827
## GRC 0.10336915 0.1893113 0.05999989 0.10519477 0.09488816 0.10597149
## BAR 0.12469756 0.2177834 0.07855252 0.12926803 0.12167900 0.11394015
#sampling_loc <- read_csv("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/sampling_loc_SNPs_fst.csv")
Save it
saveRDS(
sampling_loc, here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/sampling_loc_overlap.rds"
)
)
Import sample locations
sampling_loc <- readRDS(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/sampling_loc_overlap.rds"))
# Arrange by region
sampling_loc <- sampling_loc |>
dplyr::arrange(
order
)
# Check it
head(sampling_loc)
## # A tibble: 6 × 10
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region Marker
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Saint-M… France 45.2 5.77 Europe FRS 2019 Weste… SNPs
## 2 Strasbo… France 48.6 7.75 Europe STS 2019 Weste… SNPs
## 3 Penafiel Portug… 41.2 -8.33 Europe POP 2017 South… SNPs
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South… SNPs
## 5 San Roq… Spain 36.2 -5.37 Europe SPS 2017 South… SNPs
## 6 Catarro… Spain 39.4 -0.396 Europe SPC 2017 South… SNPs
## # ℹ 1 more variable: order <dbl>
Order
## [1] "FRS" "STS" "POP" "SPB" "SPS" "SPC" "BAR" "SPM" "ITB" "ITR" "ITP" "MAL"
## [13] "SLO" "CRO" "ALD" "SER" "GRA" "GRC" "ROS" "BUL" "TUA" "TUH" "SOC" "GES"
Create vector with order of populations
# Extract the populations that appear in LD2_df
populations_in_LD2 <- colnames(LD2_df)
# Reorder the populations based on order_pops
poporder <- populations_in_LD2[populations_in_LD2 %in% order_pops]
#LD2_df[match(poporder, LD2_df$Abbreviation),] #this also doesn't reorder it
# Print the reordered populations
print(poporder)
## [1] "SOC" "GES" "STS" "ITR" "GRC" "BAR" "BUL" "CRO" "GRA" "ITB" "MAL" "SPM"
## [13] "TUA" "TUH" "ALD" "FRS" "ITP" "POP" "ROS" "SER" "SLO" "SPC" "SPB" "SPS"
Lets check if the matrix is symmetric.
## [1] TRUE
Order the matrix using poporder. We will also add NA on the upper left side of the matrix.
Now we have to convert the matrix to a data frame to plot it with ggplot.
## Var1 Var2 value
## FRS : 24 FRS : 24 Min. :0.00626
## STS : 24 STS : 24 1st Qu.:0.06090
## POP : 24 POP : 24 Median :0.08701
## SPB : 24 SPB : 24 Mean :0.09085
## SPS : 24 SPS : 24 3rd Qu.:0.10948
## SPC : 24 SPC : 24 Max. :0.23859
## (Other):432 (Other):432 NA's :300
Now lets plot the data with ggplot. You can click in the little square on the top left of the plot to open it on a new window. It will have the right proportions. Now lets plot the data with ggplot. You can click in the little square on the top left of the plot to open it on a new window. It will have the right proportions.
pairfst.f <- ggplot(pairfst.long, aes(Var1, Var2)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(
low = "white",
high = "#71b6ff",
name = "Fst",
na.value = "white",
limits = c(0, 0.5)
) +
scale_x_discrete(position = "top") +
theme_bw() +
geom_text(aes(label = ifelse(
is.na(value), "", formatC(value, digits = 2, format = "f")
)), size = 3) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.y = element_text(hjust = 0)
)
pairfst.f
Save it
# Step 1: Map abbreviation to country
abbreviation_to_country <- sampling_loc %>% dplyr::select(Abbreviation, Country)
# Step 2: Calculate mean Fst for each pair of countries
# Convert the matrix to a data frame and add row names as a new column
fst_df <- as.data.frame(as.matrix(LD2_df))
fst_df$Abbreviation1 <- rownames(fst_df)
# Gather columns into rows
fst_long <- fst_df %>% gather(key = "Abbreviation2", value = "Fst", -Abbreviation1)
# Merge with country mapping
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation1", by.y = "Abbreviation")
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation2", by.y = "Abbreviation", suffixes = c("_1", "_2"))
# Calculate mean Fst for each pair of countries
fst_summary <- fst_long %>%
group_by(Country_1, Country_2) %>%
summarize(Mean_Fst = mean(Fst, na.rm = TRUE), .groups = 'drop') %>%
filter(Country_1 != Country_2)
# Convert summary back to a matrix form, avoiding the use of tibbles for row names
fst_matrix_summary <- as.data.frame(spread(fst_summary, key = Country_2, value = Mean_Fst))
rownames(fst_matrix_summary) <- fst_matrix_summary$Country_1
fst_matrix_summary <- fst_matrix_summary[, -1]
fst_matrix_summary <- as.matrix(fst_matrix_summary)
# Make the matrix symmetric by averaging the off-diagonal elements
symmetric_fst_matrix <- matrix(nrow = nrow(fst_matrix_summary), ncol = ncol(fst_matrix_summary))
rownames(symmetric_fst_matrix) <- rownames(fst_matrix_summary)
colnames(symmetric_fst_matrix) <- colnames(fst_matrix_summary)
for(i in 1:nrow(fst_matrix_summary)) {
for(j in i:nrow(fst_matrix_summary)) {
if (i == j) {
symmetric_fst_matrix[i, j] <- fst_matrix_summary[i, j]
} else {
avg_value <- mean(c(fst_matrix_summary[i, j], fst_matrix_summary[j, i]), na.rm = TRUE)
symmetric_fst_matrix[i, j] <- avg_value
symmetric_fst_matrix[j, i] <- avg_value
}
}
}
# Check if the matrix is symmetric
2
## [1] 2
# print(isSymmetric(symmetric_fst_matrix))
# Your symmetric Fst matrix by country is now in symmetric_fst_matrix
print(symmetric_fst_matrix)
## Albania Bulgaria Croatia France Georgia Greece
## Albania NA 0.11279822 0.04539978 0.09472371 0.14452571 0.07649130
## Bulgaria 0.11279822 NA 0.08722725 0.07492144 0.13384104 0.09300545
## Croatia 0.04539978 0.08722725 NA 0.06302575 0.12036716 0.05544159
## France 0.09472371 0.07492144 0.06302575 NA 0.10853899 0.07064597
## Georgia 0.14452571 0.13384104 0.12036716 0.10853899 NA 0.11792501
## Greece 0.07649130 0.09300545 0.05544159 0.07064597 0.11792501 NA
## Italy 0.09154900 0.06636464 0.05965627 0.04890522 0.11515996 0.06500279
## Malta 0.08316641 0.05679897 0.05500955 0.03897441 0.10125102 0.05472746
## Portugal 0.09147718 0.06636208 0.06402375 0.04227483 0.10461590 0.06798589
## Romania 0.12354519 0.10293539 0.09925514 0.07490660 0.13912010 0.10150763
## Russia 0.13932531 0.12832355 0.11495017 0.10435891 0.01126978 0.11280435
## Serbia 0.21575958 0.19614135 0.17328819 0.16966823 0.23289996 0.18761510
## Slovenia 0.07933804 0.06530144 0.05729126 0.04390333 0.09665973 0.05726520
## Spain 0.12227116 0.10696192 0.09537205 0.07523979 0.13690047 0.09579872
## Turkey 0.09399600 0.07568412 0.07012296 0.05986011 0.11769763 0.07154177
## Italy Malta Portugal Romania Russia Serbia
## Albania 0.09154900 0.08316641 0.09147718 0.12354519 0.13932531 0.2157596
## Bulgaria 0.06636464 0.05679897 0.06636208 0.10293539 0.12832355 0.1961414
## Croatia 0.05965627 0.05500955 0.06402375 0.09925514 0.11495017 0.1732882
## France 0.04890522 0.03897441 0.04227483 0.07490660 0.10435891 0.1696682
## Georgia 0.11515996 0.10125102 0.10461590 0.13912010 0.01126978 0.2329000
## Greece 0.06500279 0.05472746 0.06798589 0.10150763 0.11280435 0.1876151
## Italy NA 0.03035905 0.04157277 0.08661136 0.10997915 0.1846366
## Malta 0.03035905 NA 0.02975032 0.07357926 0.09458982 0.1612972
## Portugal 0.04157277 0.02975032 NA 0.07259454 0.09884633 0.1650812
## Romania 0.08661136 0.07357926 0.07259454 NA 0.13423764 0.2030822
## Russia 0.10997915 0.09458982 0.09884633 0.13423764 NA 0.2267217
## Serbia 0.18463665 0.16129723 0.16508121 0.20308218 0.22672167 NA
## Slovenia 0.04478448 0.03916767 0.03586147 0.05906033 0.09140753 0.1528773
## Spain 0.08665648 0.07372547 0.06801861 0.10896586 0.13005247 0.2176911
## Turkey 0.05623848 0.04497299 0.05514368 0.09174178 0.11130960 0.1858761
## Slovenia Spain Turkey
## Albania 0.07933804 0.12227116 0.09399600
## Bulgaria 0.06530144 0.10696192 0.07568412
## Croatia 0.05729126 0.09537205 0.07012296
## France 0.04390333 0.07523979 0.05986011
## Georgia 0.09665973 0.13690047 0.11769763
## Greece 0.05726520 0.09579872 0.07154177
## Italy 0.04478448 0.08665648 0.05623848
## Malta 0.03916767 0.07372547 0.04497299
## Portugal 0.03586147 0.06801861 0.05514368
## Romania 0.05906033 0.10896586 0.09174178
## Russia 0.09140753 0.13005247 0.11130960
## Serbia 0.15287729 0.21769108 0.18587606
## Slovenia NA 0.06748962 0.05168875
## Spain 0.06748962 NA 0.07429569
## Turkey 0.05168875 0.07429569 NA
## Albania Bulgaria Croatia France Georgia Greece
## Albania NA 0.1127982 0.04539978 0.09472371 0.1445257 0.07649130
## Bulgaria NA NA 0.08722725 0.07492144 0.1338410 0.09300545
## Croatia NA NA NA 0.06302575 0.1203672 0.05544159
## France NA NA NA NA 0.1085390 0.07064597
## Georgia NA NA NA NA NA 0.11792501
## Greece NA NA NA NA NA NA
## Italy NA NA NA NA NA NA
## Malta NA NA NA NA NA NA
## Portugal NA NA NA NA NA NA
## Romania NA NA NA NA NA NA
## Russia NA NA NA NA NA NA
## Serbia NA NA NA NA NA NA
## Slovenia NA NA NA NA NA NA
## Spain NA NA NA NA NA NA
## Turkey NA NA NA NA NA NA
## Italy Malta Portugal Romania Russia Serbia
## Albania 0.09154900 0.08316641 0.09147718 0.12354519 0.13932531 0.2157596
## Bulgaria 0.06636464 0.05679897 0.06636208 0.10293539 0.12832355 0.1961414
## Croatia 0.05965627 0.05500955 0.06402375 0.09925514 0.11495017 0.1732882
## France 0.04890522 0.03897441 0.04227483 0.07490660 0.10435891 0.1696682
## Georgia 0.11515996 0.10125102 0.10461590 0.13912010 0.01126978 0.2329000
## Greece 0.06500279 0.05472746 0.06798589 0.10150763 0.11280435 0.1876151
## Italy NA 0.03035905 0.04157277 0.08661136 0.10997915 0.1846366
## Malta NA NA 0.02975032 0.07357926 0.09458982 0.1612972
## Portugal NA NA NA 0.07259454 0.09884633 0.1650812
## Romania NA NA NA NA 0.13423764 0.2030822
## Russia NA NA NA NA NA 0.2267217
## Serbia NA NA NA NA NA NA
## Slovenia NA NA NA NA NA NA
## Spain NA NA NA NA NA NA
## Turkey NA NA NA NA NA NA
## Slovenia Spain Turkey
## Albania 0.07933804 0.12227116 0.09399600
## Bulgaria 0.06530144 0.10696192 0.07568412
## Croatia 0.05729126 0.09537205 0.07012296
## France 0.04390333 0.07523979 0.05986011
## Georgia 0.09665973 0.13690047 0.11769763
## Greece 0.05726520 0.09579872 0.07154177
## Italy 0.04478448 0.08665648 0.05623848
## Malta 0.03916767 0.07372547 0.04497299
## Portugal 0.03586147 0.06801861 0.05514368
## Romania 0.05906033 0.10896586 0.09174178
## Russia 0.09140753 0.13005247 0.11130960
## Serbia 0.15287729 0.21769108 0.18587606
## Slovenia NA 0.06748962 0.05168875
## Spain NA NA 0.07429569
## Turkey NA NA NA
Now we have to convert the matrix to a data frame to plot it with ggplot.
## Var1 Var2 value
## Albania : 15 Albania : 15 Min. :0.01127
## Bulgaria: 15 Bulgaria: 15 1st Qu.:0.06402
## Croatia : 15 Croatia : 15 Median :0.09148
## France : 15 France : 15 Mean :0.09696
## Georgia : 15 Georgia : 15 3rd Qu.:0.11770
## Greece : 15 Greece : 15 Max. :0.23290
## (Other) :135 (Other) :135 NA's :120
You can click in the little square on the top left of the plot to open it on a new window. It will have the right proportions.
pairfst.f2 <- ggplot(pairfst.long2, aes(Var1, Var2)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(
low = "white",
high = "#71b6ff",
name = "Fst",
na.value = "white",
limits = c(0, 0.5)
) +
scale_x_discrete(position = "top") +
theme_bw() +
geom_text(aes(label = ifelse(
is.na(value), "", formatC(value, digits = 2, format = "f")
)), size = 3) +
theme(
axis.text.x = element_text(angle = 90, hjust = 0),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.y = element_text(hjust = 1)
)
pairfst.f2
ggsave(
filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/fst_matrix_europe_MAF01_by_country_overlap.pdf"),
pairfst.f2,
width = 6,
height = 5,
units = "in"
)
Remove NAs and rename columns
# remove NAs
fst2 <-
pairfst.long |>
drop_na()
# rename columns
fst2 <-
fst2 |>
dplyr::rename(pop1 = 1,
pop2 = 2,
fst = 3)
# Split the data into two data frames, one for pop1 and one for pop2
df_pop1 <- fst2 |>
dplyr::select(pop = pop1, fst)
df_pop2 <- fst2 |>
dplyr::select(pop = pop2, fst)
# Combine the two data frames
df_combined <- bind_rows(df_pop1, df_pop2)
# Calculate the mean fst for each population
mean_fst <- df_combined |>
group_by(pop) |>
summarise(mean_fst = mean(fst))
print(mean_fst)
## # A tibble: 24 × 2
## pop mean_fst
## <fct> <dbl>
## 1 FRS 0.0690
## 2 STS 0.0727
## 3 POP 0.0662
## 4 SPB 0.0861
## 5 SPS 0.112
## 6 SPC 0.0955
## 7 BAR 0.105
## 8 SPM 0.0846
## 9 ITB 0.0975
## 10 ITR 0.0622
## # ℹ 14 more rows
Merge
fst3 <-
sampling_loc |>
left_join(
mean_fst,
by = c("Abbreviation" = "pop")
) |>
drop_na()
# check output
head(fst3)
## # A tibble: 6 × 11
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region Marker
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Saint-M… France 45.2 5.77 Europe FRS 2019 Weste… SNPs
## 2 Strasbo… France 48.6 7.75 Europe STS 2019 Weste… SNPs
## 3 Penafiel Portug… 41.2 -8.33 Europe POP 2017 South… SNPs
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South… SNPs
## 5 San Roq… Spain 36.2 -5.37 Europe SPS 2017 South… SNPs
## 6 Catarro… Spain 39.4 -0.396 Europe SPC 2017 South… SNPs
## # ℹ 2 more variables: order <dbl>, mean_fst <dbl>
Mean by region
# Group by Region and calculate the mean_fst by Region
region_means <- fst3 |>
group_by(Region) |>
summarize(mean_fst_by_region = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_region column to the fst3 tibble
fst3 <- fst3 |>
left_join(region_means, by = "Region")
# Print the modified fst3 tibble
print(fst3)
## # A tibble: 24 × 12
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr>
## 1 Saint-Martin-… France 45.2 5.77 Europe FRS 2019 Weste…
## 2 Strasbourg France 48.6 7.75 Europe STS 2019 Weste…
## 3 Penafiel Portug… 41.2 -8.33 Europe POP 2017 South…
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South…
## 5 San Roque Spain 36.2 -5.37 Europe SPS 2017 South…
## 6 Catarroja Spain 39.4 -0.396 Europe SPC 2017 South…
## 7 Barcelona Spain 41.4 2.17 Europe BAR 2018 South…
## 8 Magaluf Spain 39.5 2.53 Europe SPM 2017 South…
## 9 Bologna Italy 44.5 11.4 Europe ITB 2017 South…
## 10 Rome Italy 41.9 12.5 Europe ITR 2013 South…
## # ℹ 14 more rows
## # ℹ 4 more variables: Marker <chr>, order <dbl>, mean_fst <dbl>,
## # mean_fst_by_region <dbl>
Mean By country
# Group by Country and calculate the mean_fst by Country
country_means <- fst3 |>
group_by(Country) |>
summarize(mean_fst_by_country = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_country column to the fst3 tibble
fst3 <- fst3 |>
left_join(country_means, by = "Country")
# Print the modified fst3 tibble
print(fst3)
## # A tibble: 24 × 13
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr>
## 1 Saint-Martin-… France 45.2 5.77 Europe FRS 2019 Weste…
## 2 Strasbourg France 48.6 7.75 Europe STS 2019 Weste…
## 3 Penafiel Portug… 41.2 -8.33 Europe POP 2017 South…
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South…
## 5 San Roque Spain 36.2 -5.37 Europe SPS 2017 South…
## 6 Catarroja Spain 39.4 -0.396 Europe SPC 2017 South…
## 7 Barcelona Spain 41.4 2.17 Europe BAR 2018 South…
## 8 Magaluf Spain 39.5 2.53 Europe SPM 2017 South…
## 9 Bologna Italy 44.5 11.4 Europe ITB 2017 South…
## 10 Rome Italy 41.9 12.5 Europe ITR 2013 South…
## # ℹ 14 more rows
## # ℹ 5 more variables: Marker <chr>, order <dbl>, mean_fst <dbl>,
## # mean_fst_by_region <dbl>, mean_fst_by_country <dbl>
# Rename columns
fst3 <- fst3 |>
dplyr::rename(
City = Pop_City)
# Print the modified fst3 tibble
print(fst3)
## # A tibble: 24 × 13
## City Country Latitude Longitude Continent Abbreviation Year Region Marker
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Saint-… France 45.2 5.77 Europe FRS 2019 Weste… SNPs
## 2 Strasb… France 48.6 7.75 Europe STS 2019 Weste… SNPs
## 3 Penafi… Portug… 41.2 -8.33 Europe POP 2017 South… SNPs
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South… SNPs
## 5 San Ro… Spain 36.2 -5.37 Europe SPS 2017 South… SNPs
## 6 Catarr… Spain 39.4 -0.396 Europe SPC 2017 South… SNPs
## 7 Barcel… Spain 41.4 2.17 Europe BAR 2018 South… SNPs
## 8 Magaluf Spain 39.5 2.53 Europe SPM 2017 South… SNPs
## 9 Bologna Italy 44.5 11.4 Europe ITB 2017 South… SNPs
## 10 Rome Italy 41.9 12.5 Europe ITR 2013 South… SNPs
## # ℹ 14 more rows
## # ℹ 4 more variables: order <dbl>, mean_fst <dbl>, mean_fst_by_region <dbl>,
## # mean_fst_by_country <dbl>
Mean By continent
# Group by Country and calculate the mean_fst by Country
country_means <- fst3 |>
group_by(Continent) |>
summarize(mean_fst_by_continent = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_country column to the fst3 tibble
fst3 <- fst3 |>
left_join(country_means, by = "Continent")
# Print the modified fst3 tibble
print(fst3)
## # A tibble: 24 × 14
## City Country Latitude Longitude Continent Abbreviation Year Region Marker
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Saint-… France 45.2 5.77 Europe FRS 2019 Weste… SNPs
## 2 Strasb… France 48.6 7.75 Europe STS 2019 Weste… SNPs
## 3 Penafi… Portug… 41.2 -8.33 Europe POP 2017 South… SNPs
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South… SNPs
## 5 San Ro… Spain 36.2 -5.37 Europe SPS 2017 South… SNPs
## 6 Catarr… Spain 39.4 -0.396 Europe SPC 2017 South… SNPs
## 7 Barcel… Spain 41.4 2.17 Europe BAR 2018 South… SNPs
## 8 Magaluf Spain 39.5 2.53 Europe SPM 2017 South… SNPs
## 9 Bologna Italy 44.5 11.4 Europe ITB 2017 South… SNPs
## 10 Rome Italy 41.9 12.5 Europe ITR 2013 South… SNPs
## # ℹ 14 more rows
## # ℹ 5 more variables: order <dbl>, mean_fst <dbl>, mean_fst_by_region <dbl>,
## # mean_fst_by_country <dbl>, mean_fst_by_continent <dbl>
fst4 <- fst3 |>
dplyr::select(
Region, mean_fst_by_region, Country, mean_fst_by_country, City, Abbreviation, mean_fst,
)
fst4 <- fst4 |>
arrange(
Region, Country, City
)
# Round
fst4 <- fst4 |>
mutate_if(is.numeric, ~ round(., 2))
head(fst4)
## # A tibble: 6 × 7
## Region mean_fst_by_region Country mean_fst_by_country City Abbreviation
## <chr> <dbl> <chr> <dbl> <chr> <chr>
## 1 Eastern Eur… 0.11 Bulgar… 0.09 Lom BUL
## 2 Eastern Eur… 0.11 Georgia 0.12 Sakh… GES
## 3 Eastern Eur… 0.11 Romania 0.1 Satu… ROS
## 4 Eastern Eur… 0.11 Russia 0.12 Sochi SOC
## 5 Eastern Eur… 0.11 Serbia 0.19 Novi… SER
## 6 Eastern Eur… 0.11 Turkey 0.08 Alia… TUA
## # ℹ 1 more variable: mean_fst <dbl>
# Set theme if you want to use something different from the previous table
set_flextable_defaults(
font.family = "Arial",
font.size = 9,
big.mark = ",",
theme_fun = "theme_zebra" # try the themes: theme_alafoli(), theme_apa(), theme_booktabs(), theme_box(), theme_tron_legacy(), theme_tron(), theme_vader(), theme_vanilla(), theme_zebra()
)
# Then create the flextable object
flex_table <- flextable(fst4) |>
set_caption(caption = as_paragraph(
as_chunk(
"Table 1. Fst values for European SNPs overlapping with microsat dataset.",
props = fp_text_default(color = "#000000", font.size = 14)
)
),
fp_p = fp_par(text.align = "center", padding = 5))
# Print the flextable
flex_table
Region | mean_fst_by_region | Country | mean_fst_by_country | City | Abbreviation | mean_fst |
---|---|---|---|---|---|---|
Eastern Europe | 0.11 | Bulgaria | 0.09 | Lom | BUL | 0.09 |
Eastern Europe | 0.11 | Georgia | 0.12 | Sakhumi, Abkhazia | GES | 0.12 |
Eastern Europe | 0.11 | Romania | 0.10 | Satu Mare | ROS | 0.10 |
Eastern Europe | 0.11 | Russia | 0.12 | Sochi | SOC | 0.12 |
Eastern Europe | 0.11 | Serbia | 0.19 | Novi Sad | SER | 0.19 |
Eastern Europe | 0.11 | Turkey | 0.08 | Aliaga | TUA | 0.08 |
Eastern Europe | 0.11 | Turkey | 0.08 | Hopa | TUH | 0.07 |
Southern Europe | 0.08 | Albania | 0.11 | Durres | ALD | 0.11 |
Southern Europe | 0.08 | Croatia | 0.08 | Dubrovnik | CRO | 0.08 |
Southern Europe | 0.08 | Greece | 0.08 | Athens | GRA | 0.08 |
Southern Europe | 0.08 | Greece | 0.08 | Chania | GRC | 0.09 |
Southern Europe | 0.08 | Italy | 0.08 | Bologna | ITB | 0.10 |
Southern Europe | 0.08 | Italy | 0.08 | Puglia | ITP | 0.07 |
Southern Europe | 0.08 | Italy | 0.08 | Rome | ITR | 0.06 |
Southern Europe | 0.08 | Malta | 0.06 | Luqa | MAL | 0.06 |
Southern Europe | 0.08 | Portugal | 0.07 | Penafiel | POP | 0.07 |
Southern Europe | 0.08 | Slovenia | 0.06 | Ajdovscina | SLO | 0.06 |
Southern Europe | 0.08 | Spain | 0.10 | Badajoz | SPB | 0.09 |
Southern Europe | 0.08 | Spain | 0.10 | Barcelona | BAR | 0.11 |
Southern Europe | 0.08 | Spain | 0.10 | Catarroja | SPC | 0.10 |
Southern Europe | 0.08 | Spain | 0.10 | Magaluf | SPM | 0.08 |
Southern Europe | 0.08 | Spain | 0.10 | San Roque | SPS | 0.11 |
Western Europe | 0.07 | France | 0.07 | Saint-Martin-d'Heres | FRS | 0.07 |
Western Europe | 0.07 | France | 0.07 | Strasbourg | STS | 0.07 |
# Initialize Word document
doc <-
read_docx() |>
body_add_flextable(value = flex_table)
# Define the output path with 'here' library
output_path <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/fstTable_Europe_SNPs_MAF01_overlap.docx"
)
# Save the Word document
print(doc, target = output_path)
To make scatter plot
# Group by Country and calculate the mean for mean_fst_by_country
aggregated_data <- fst4 |>
dplyr::group_by(Country) |>
dplyr::summarise(mean_fst = mean(mean_fst_by_country, na.rm = TRUE))
# save the data
saveRDS(aggregated_data, here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/LD2_country_europe_MAF01_overlap.rds"
))
# Order the aggregated data
aggregated_data <- aggregated_data[order(aggregated_data$mean_fst), ]
# Assign a numeric index for plotting
aggregated_data$index <- 1:nrow(aggregated_data)
# Fit a linear model
lm_fit <- lm(mean_fst ~ index, data = aggregated_data)
# Predicted values from the linear model
aggregated_data$fitted_values <- predict(lm_fit)
ggplot(aggregated_data, aes(x = index, y = mean_fst)) +
geom_point(aes(color = Country), size = 3) +
geom_line(aes(y = fitted_values), color = "blue") + # Fitted line
labs(
title = "Mean Fst by Country",
x = "Ordered Countries",
y = "Mean Fst Value"
) +
scale_x_continuous(breaks = aggregated_data$index, labels = aggregated_data$Country) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(legend.position = "none")
Save it
ggsave(
filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/mean_fst_by_country_SNPs_MAF01_overlap.pdf"),
width = 10,
height = 10,
units = "in"
)
Estimate distances
# Grab the population names from the matrix aa
populations_with_fst <- colnames(aa)
# Subset the sampling_loc dataframe to only include populations with FST estimates
filtered_sampling_loc <- sampling_loc %>% filter(Abbreviation %in% populations_with_fst)
# Create an empty matrix to store the distances
n <- nrow(filtered_sampling_loc)
distance_matrix <- matrix(0, n, n)
rownames(distance_matrix) <- filtered_sampling_loc$Abbreviation
colnames(distance_matrix) <- filtered_sampling_loc$Abbreviation
# Calculate the distances
for (i in 1:n) {
for (j in 1:n) {
if (i != j) {
coord1 <- c(filtered_sampling_loc$Longitude[i], filtered_sampling_loc$Latitude[i])
coord2 <- c(filtered_sampling_loc$Longitude[j], filtered_sampling_loc$Latitude[j])
distance_matrix[i, j] <- distHaversine(coord1, coord2) / 1000 # distance in km
}
}
}
# Print the distance matrix
head(distance_matrix)
## FRS STS POP SPB SPS SPC BAR
## FRS 0.0000 412.1522 1225.4383 1263.7518 1371.4448 817.6702 511.8716
## STS 412.1522 0.0000 1509.1745 1601.1573 1750.4874 1213.5639 915.9614
## POP 1225.4383 1509.1745 0.0000 282.8409 614.5101 701.9619 878.2910
## SPB 1263.7518 1601.1573 282.8409 0.0000 331.7685 571.0509 827.0716
## SPS 1371.4448 1750.4874 614.5101 331.7685 0.0000 566.5153 874.4050
## SPC 817.6702 1213.5639 701.9619 571.0509 566.5153 0.0000 310.0074
## SPM ITB ITR ITP MAL SLO CRO ALD
## FRS 683.8845 448.0871 654.1062 1004.474 1269.904 628.7258 1037.845 1192.210
## STS 1095.6072 536.0256 834.5057 1098.126 1522.503 546.2343 1052.803 1230.239
## POP 939.4811 1644.8695 1734.4434 2102.566 2066.730 1851.8481 2197.100 2319.742
## SPB 822.8201 1643.6694 1683.0686 2040.301 1923.429 1869.2123 2157.626 2262.516
## SPS 787.3372 1690.3169 1669.1413 2002.327 1785.308 1929.6259 2144.409 2225.649
## SPC 251.7726 1124.6677 1124.2692 1474.434 1368.139 1363.2153 1600.981 1697.515
## SER GRA GRC ROS BUL TUA TUH SOC
## FRS 1100.2591 1709.174 1877.379 1341.427 1392.500 1884.818 2897.030 2688.509
## STS 988.6774 1763.684 1974.712 1124.852 1304.105 1885.544 2745.357 2511.584
## POP 2315.5228 2777.366 2876.039 2566.256 2591.385 3000.557 4103.736 3906.072
## SPB 2313.3538 2686.566 2760.206 2594.045 2569.099 2924.845 4076.402 3894.554
## SPS 2340.8167 2601.355 2642.916 2651.131 2568.416 2854.982 4053.790 3891.912
## SPC 1780.6744 2115.539 2193.176 2085.530 2019.617 2354.477 3518.971 3346.038
## GES
## FRS 2793.592
## STS 2619.862
## POP 4009.674
## SPB 3995.220
## SPS 3988.376
## SPC 3444.704
Compare distance and FST
# Fill lower triangle of 'aa' matrix
aa[lower.tri(aa)] <- t(aa)[lower.tri(aa)]
# Fill diagonal with 0 (or another value that makes sense in your context)
diag(aa) <- 0
# Combine 'aa' and 'distance_matrix'
data <- data.frame(Distance = as.vector(distance_matrix), FST = as.vector(aa))
# Add row and column indices for easier tracking
data$row_index <- rep(rownames(distance_matrix), each = ncol(distance_matrix))
data$col_index <- rep(colnames(distance_matrix), nrow(distance_matrix))
data <- data |>
dplyr::arrange(
Distance
)
head(data)
## Distance FST row_index col_index
## 1 0 0 FRS FRS
## 2 0 0 STS STS
## 3 0 0 POP POP
## 4 0 0 SPB SPB
## 5 0 0 SPS SPS
## 6 0 0 SPC SPC
Fit linear regression
data <- data[data$Distance > 0, ]
# Fit linear model
lm_model <- lm(FST/(1-FST) ~ log(Distance), data = data)
equation_text <- sprintf("y = %.6fx + %.3f", coef(lm_model)[2], coef(lm_model)[1])
r2_text <- sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
# source the plotting function
source(here("analyses", "my_theme2.R"))
# Plot
ggplot(data, aes(x = log(Distance), y = FST/(1-FST))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
annotate("text", x = max(log((data$Distance))) * 0.85, y = max(data$FST/(1-data$FST)) * 0.95, label = paste(equation_text, r2_text, sep = "\n"), size = 4, color = "black") +
labs(title = "FST vs Distance - All populations",
x = "Log(Distance)",
y = "FST(1-FST)") +
scale_x_continuous(labels = scales::comma) +
theme_classic() +
theme(axis.text.x = element_text(size = 14), # Increase font size for x-axis
axis.text.y = element_text(size = 14 )) # Increase font size for y-axi
## `geom_smooth()` using formula = 'y ~ x'
ggsave(
filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/fst_by_distance_SNPs_MAF01_overlap_with_equ.pdf"),
width = 6,
height = 4,
units = "in"
)
Without equation
data <- data[data$Distance > 0, ]
# Fit linear model
lm_model <- lm(FST/(1-FST) ~ log(Distance), data = data)
equation_text <- sprintf("y = %.6fx + %.3f", coef(lm_model)[2], coef(lm_model)[1])
r2_text <- sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
# source the plotting function
source(here("analyses", "my_theme2.R"))
# Plot
ggplot(data, aes(x = log(Distance), y = FST/(1-FST))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
# annotate("text", x = max(log((data$Distance))) * 0.85, y = max(data$FST/(1-data$FST)) * 0.95, label = paste(equation_text, #r2_text, sep = "\n"), size = 4, color = "black") +
labs(title = "FST vs Distance - All populations",
x = "Log(Distance)",
y = "FST(1-FST)") +
scale_x_continuous(labels = scales::comma) +
theme_classic() +
theme(axis.text.x = element_text(size = 14), # Increase font size for x-axis
axis.text.y = element_text(size = 14 )) # Increase font size for y-axi
## `geom_smooth()` using formula = 'y ~ x'
ggsave(
filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/fst_by_distance_SNPs_MAF01_overlap.pdf"),
width = 6,
height = 4,
units = "in"
)
We can merge the FST and distance matrices
# Ensure the matrices have the same names in the same order
common_names <- intersect(rownames(distance_matrix), rownames(aa))
sorted_names <- sort(common_names)
# Reorder the matrices
distance_matrix <- distance_matrix[sorted_names, sorted_names]
aa <- aa[sorted_names, sorted_names]
# Initialize the final merged matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names
colnames(merged_matrix) <- sorted_names
# Fill the upper triangular part from aa
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
# Fill the lower triangular part from distance_matrix
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]
# Format the matrix (Fst two decimals and distance in Km with zero decimals)
# Format the elements based on their position in the matrix
for(i in 1:nrow(merged_matrix)) {
for(j in 1:ncol(merged_matrix)) {
if (i < j) {
# Upper triangular - Fst values with two decimal places
merged_matrix[i, j] <- sprintf("%.2f", as.numeric(merged_matrix[i, j]))
} else if (i > j) {
# Lower triangular - Distance values with zero decimal places
merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
}
}
}
# Now the merged_matrix should be formatted as you need
print(merged_matrix)
## ALD BAR BUL CRO FRS GES GRA GRC ITB ITP
## ALD NA "0.12" "0.11" "0.05" "0.09" "0.14" "0.08" "0.07" "0.12" "0.08"
## BAR "1446" NA "0.11" "0.09" "0.09" "0.14" "0.09" "0.10" "0.13" "0.08"
## BUL "414" "1742" NA "0.09" "0.07" "0.13" "0.09" "0.09" "0.10" "0.06"
## CRO "180" "1333" "428" NA "0.06" "0.12" "0.06" "0.05" "0.09" "0.05"
## FRS "1192" "512" "1393" "1038" NA "0.11" "0.07" "0.07" "0.06" "0.04"
## GES "1770" "3168" "1426" "1845" "2794" NA "0.11" "0.12" "0.14" "0.11"
## GRA "534" "1900" "656" "711" "1709" "1540" NA "0.02" "0.10" "0.05"
## GRC "754" "2007" "925" "934" "1877" "1675" "270" NA "0.10" "0.06"
## ITB "752" "824" "950" "591" "448" "2365" "1279" "1467" NA "0.07"
## ITP "224" "1227" "604" "201" "1004" "1991" "705" "884" "583" NA
## ITR "585" "862" "899" "477" "654" "2321" "1070" "1224" "302" "371"
## MAL "746" "1233" "1157" "817" "1270" "2395" "873" "862" "996" "621"
## POP "2320" "878" "2591" "2197" "1225" "4010" "2777" "2876" "1645" "2103"
## ROS "771" "1783" "445" "683" "1341" "1496" "1100" "1370" "961" "884"
## SER "442" "1489" "316" "322" "1100" "1695" "884" "1141" "672" "520"
## SLO "689" "1059" "782" "509" "629" "2166" "1220" "1440" "245" "585"
## SOC "1678" "3068" "1327" "1748" "2689" "109" "1470" "1618" "2262" "1898"
## SPB "2263" "827" "2569" "2158" "1264" "3995" "2687" "2760" "1644" "2040"
## SPC "1698" "310" "2020" "1601" "818" "3445" "2116" "2193" "1125" "1474"
## SPM "1450" "211" "1782" "1360" "684" "3204" "1864" "1944" "916" "1227"
## SPS "2226" "874" "2568" "2144" "1371" "3988" "2601" "2643" "1690" "2002"
## STS "1230" "916" "1304" "1053" "412" "2620" "1764" "1975" "536" "1098"
## TUA "694" "2123" "641" "851" "1885" "1265" "277" "445" "1441" "901"
## TUH "1828" "3251" "1512" "1919" "2897" "194" "1544" "1650" "2461" "2051"
## ITR MAL POP ROS SER SLO SOC SPB SPC SPM
## ALD "0.08" "0.08" "0.09" "0.12" "0.22" "0.08" "0.14" "0.12" "0.13" "0.11"
## BAR "0.07" "0.08" "0.09" "0.12" "0.22" "0.08" "0.14" "0.12" "0.13" "0.11"
## BUL "0.05" "0.06" "0.07" "0.10" "0.20" "0.07" "0.13" "0.10" "0.10" "0.10"
## CRO "0.05" "0.06" "0.06" "0.10" "0.17" "0.06" "0.11" "0.09" "0.10" "0.09"
## FRS "0.04" "0.04" "0.04" "0.07" "0.16" "0.04" "0.10" "0.06" "0.07" "0.06"
## GES "0.10" "0.10" "0.10" "0.14" "0.23" "0.10" "0.01" "0.12" "0.13" "0.13"
## GRA "0.05" "0.05" "0.07" "0.10" "0.19" "0.05" "0.11" "0.09" "0.10" "0.09"
## GRC "0.06" "0.06" "0.07" "0.10" "0.19" "0.06" "0.12" "0.09" "0.11" "0.10"
## ITB "0.07" "0.06" "0.05" "0.10" "0.22" "0.06" "0.13" "0.08" "0.09" "0.09"
## ITP "0.02" "0.02" "0.04" "0.08" "0.17" "0.04" "0.10" "0.07" "0.08" "0.06"
## ITR NA "0.02" "0.04" "0.08" "0.16" "0.04" "0.10" "0.08" "0.09" "0.07"
## MAL "694" NA "0.03" "0.07" "0.16" "0.04" "0.09" "0.07" "0.07" "0.06"
## POP "1734" "2067" NA "0.07" "0.17" "0.04" "0.10" "0.05" "0.05" "0.05"
## ROS "1047" "1498" "2566" NA "0.20" "0.06" "0.13" "0.09" "0.10" "0.09"
## SER "697" "1139" "2316" "367" NA "0.15" "0.23" "0.20" "0.22" "0.21"
## SLO "455" "1118" "1852" "726" "476" NA "0.09" "0.05" "0.06" "0.05"
## SOC "2224" "2316" "3906" "1388" "1591" "2060" NA "0.12" "0.13" "0.12"
## SPB "1683" "1923" "283" "2594" "2313" "1869" "3895" NA "0.01" "0.01"
## SPC "1124" "1368" "702" "2086" "1781" "1363" "3346" "571" NA "0.03"
## SPM "883" "1128" "939" "1873" "1555" "1160" "3107" "823" "252" NA
## SPS "1669" "1785" "615" "2651" "2341" "1930" "3892" "332" "567" "787"
## STS "835" "1523" "1509" "1125" "989" "546" "2512" "1601" "1214" "1096"
## TUA "1271" "1148" "3001" "1057" "932" "1340" "1198" "2925" "2354" "2103"
## TUH "2395" "2412" "4104" "1629" "1797" "2272" "283" "4076" "3519" "3275"
## SPS STS TUA TUH
## ALD "0.14" "0.10" "0.10" "0.09"
## BAR "0.11" "0.09" "0.06" "0.04"
## BUL "0.12" "0.08" "0.08" "0.07"
## CRO "0.11" "0.06" "0.07" "0.07"
## FRS "0.10" "0.03" "0.07" "0.05"
## GES "0.16" "0.11" "0.12" "0.11"
## GRA "0.10" "0.07" "0.07" "0.06"
## GRC "0.11" "0.07" "0.08" "0.07"
## ITB "0.13" "0.07" "0.10" "0.08"
## ITP "0.09" "0.04" "0.06" "0.04"
## ITR "0.07" "0.04" "0.04" "0.04"
## MAL "0.07" "0.04" "0.05" "0.04"
## POP "0.09" "0.04" "0.06" "0.05"
## ROS "0.13" "0.08" "0.10" "0.08"
## SER "0.24" "0.18" "0.20" "0.17"
## SLO "0.09" "0.04" "0.06" "0.04"
## SOC "0.15" "0.11" "0.12" "0.10"
## SPB "0.13" "0.06" "0.10" "0.08"
## SPC "0.14" "0.07" "0.11" "0.08"
## SPM "0.08" "0.07" "0.09" "0.07"
## SPS NA "0.10" "0.03" "0.09"
## STS "1750" NA "0.07" "0.05"
## TUA "2855" "1886" NA "0.05"
## TUH "4054" "2745" "1267" NA
cities <- readRDS(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/sampling_loc_overlap.rds"))
cities <- as_tibble(cities)
head(cities)
## # A tibble: 6 × 10
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region Marker
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Saint-M… France 45.2 5.77 Europe FRS 2019 Weste… SNPs
## 2 Strasbo… France 48.6 7.75 Europe STS 2019 Weste… SNPs
## 3 Penafiel Portug… 41.2 -8.33 Europe POP 2017 South… SNPs
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South… SNPs
## 5 San Roq… Spain 36.2 -5.37 Europe SPS 2017 South… SNPs
## 6 Catarro… Spain 39.4 -0.396 Europe SPC 2017 South… SNPs
## # ℹ 1 more variable: order <dbl>
We can sort by distance
# Calculate row-wise mean distances (excluding diagonal)
row_means <- rowMeans(distance_matrix, na.rm=TRUE)
# Sort row names by mean distances
sorted_names_by_distance <- names(sort(row_means))
# Reorder distance_matrix and aa matrices based on these sorted names
distance_matrix <- distance_matrix[sorted_names_by_distance, sorted_names_by_distance]
aa <- aa[sorted_names_by_distance, sorted_names_by_distance]
# Your existing code to initialize and fill the merged_matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names_by_distance
colnames(merged_matrix) <- sorted_names_by_distance
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]
# Formatting code with absolute value for upper triangular part
for(i in 1:nrow(merged_matrix)) {
for(j in 1:ncol(merged_matrix)) {
if (i < j) {
merged_matrix[i, j] <- sprintf("%.2f", abs(as.numeric(merged_matrix[i, j])))
} else if (i > j) {
merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
}
}
}
# Print the merged matrix
print(merged_matrix)
## CRO ITP ITR ALD ITB SLO SER BUL FRS MAL
## CRO NA "0.05" "0.05" "0.05" "0.09" "0.06" "0.17" "0.09" "0.06" "0.06"
## ITP "201" NA "0.02" "0.08" "0.07" "0.04" "0.17" "0.06" "0.04" "0.02"
## ITR "477" "371" NA "0.08" "0.07" "0.04" "0.16" "0.05" "0.04" "0.02"
## ALD "180" "224" "585" NA "0.12" "0.08" "0.22" "0.11" "0.09" "0.08"
## ITB "591" "583" "302" "752" NA "0.06" "0.22" "0.10" "0.06" "0.06"
## SLO "509" "585" "455" "689" "245" NA "0.15" "0.07" "0.04" "0.04"
## SER "322" "520" "697" "442" "672" "476" NA "0.20" "0.16" "0.16"
## BUL "428" "604" "899" "414" "950" "782" "316" NA "0.07" "0.06"
## FRS "1038" "1004" "654" "1192" "448" "629" "1100" "1393" NA "0.04"
## MAL "817" "621" "694" "746" "996" "1118" "1139" "1157" "1270" NA
## ROS "683" "884" "1047" "771" "961" "726" "367" "445" "1341" "1498"
## GRA "711" "705" "1070" "534" "1279" "1220" "884" "656" "1709" "873"
## STS "1053" "1098" "835" "1230" "536" "546" "989" "1304" "412" "1523"
## BAR "1333" "1227" "862" "1446" "824" "1059" "1489" "1742" "512" "1233"
## SPM "1360" "1227" "883" "1450" "916" "1160" "1555" "1782" "684" "1128"
## TUA "851" "901" "1271" "694" "1441" "1340" "932" "641" "1885" "1148"
## GRC "934" "884" "1224" "754" "1467" "1440" "1141" "925" "1877" "862"
## SPC "1601" "1474" "1124" "1698" "1125" "1363" "1781" "2020" "818" "1368"
## SPB "2158" "2040" "1683" "2263" "1644" "1869" "2313" "2569" "1264" "1923"
## SPS "2144" "2002" "1669" "2226" "1690" "1930" "2341" "2568" "1371" "1785"
## POP "2197" "2103" "1734" "2320" "1645" "1852" "2316" "2591" "1225" "2067"
## SOC "1748" "1898" "2224" "1678" "2262" "2060" "1591" "1327" "2689" "2316"
## GES "1845" "1991" "2321" "1770" "2365" "2166" "1695" "1426" "2794" "2395"
## TUH "1919" "2051" "2395" "1828" "2461" "2272" "1797" "1512" "2897" "2412"
## ROS GRA STS BAR SPM TUA GRC SPC SPB SPS
## CRO "0.10" "0.06" "0.06" "0.09" "0.09" "0.07" "0.05" "0.10" "0.09" "0.11"
## ITP "0.08" "0.05" "0.04" "0.08" "0.06" "0.06" "0.06" "0.08" "0.07" "0.09"
## ITR "0.08" "0.05" "0.04" "0.07" "0.07" "0.04" "0.06" "0.09" "0.08" "0.07"
## ALD "0.12" "0.08" "0.10" "0.12" "0.11" "0.10" "0.07" "0.13" "0.12" "0.14"
## ITB "0.10" "0.10" "0.07" "0.13" "0.09" "0.10" "0.10" "0.09" "0.08" "0.13"
## SLO "0.06" "0.05" "0.04" "0.08" "0.05" "0.06" "0.06" "0.06" "0.05" "0.09"
## SER "0.20" "0.19" "0.18" "0.22" "0.21" "0.20" "0.19" "0.22" "0.20" "0.24"
## BUL "0.10" "0.09" "0.08" "0.11" "0.10" "0.08" "0.09" "0.10" "0.10" "0.12"
## FRS "0.07" "0.07" "0.03" "0.09" "0.06" "0.07" "0.07" "0.07" "0.06" "0.10"
## MAL "0.07" "0.05" "0.04" "0.08" "0.06" "0.05" "0.06" "0.07" "0.07" "0.07"
## ROS NA "0.10" "0.08" "0.12" "0.09" "0.10" "0.10" "0.10" "0.09" "0.13"
## GRA "1100" NA "0.07" "0.09" "0.09" "0.07" "0.02" "0.10" "0.09" "0.10"
## STS "1125" "1764" NA "0.09" "0.07" "0.07" "0.07" "0.07" "0.06" "0.10"
## BAR "1783" "1900" "916" NA "0.11" "0.06" "0.10" "0.13" "0.12" "0.11"
## SPM "1873" "1864" "1096" "211" NA "0.09" "0.10" "0.03" "0.01" "0.08"
## TUA "1057" "277" "1886" "2123" "2103" NA "0.08" "0.11" "0.10" "0.03"
## GRC "1370" "270" "1975" "2007" "1944" "445" NA "0.11" "0.09" "0.11"
## SPC "2086" "2116" "1214" "310" "252" "2354" "2193" NA "0.01" "0.14"
## SPB "2594" "2687" "1601" "827" "823" "2925" "2760" "571" NA "0.13"
## SPS "2651" "2601" "1750" "874" "787" "2855" "2643" "567" "332" NA
## POP "2566" "2777" "1509" "878" "939" "3001" "2876" "702" "283" "615"
## SOC "1388" "1470" "2512" "3068" "3107" "1198" "1618" "3346" "3895" "3892"
## GES "1496" "1540" "2620" "3168" "3204" "1265" "1675" "3445" "3995" "3988"
## TUH "1629" "1544" "2745" "3251" "3275" "1267" "1650" "3519" "4076" "4054"
## POP SOC GES TUH
## CRO "0.06" "0.11" "0.12" "0.07"
## ITP "0.04" "0.10" "0.11" "0.04"
## ITR "0.04" "0.10" "0.10" "0.04"
## ALD "0.09" "0.14" "0.14" "0.09"
## ITB "0.05" "0.13" "0.14" "0.08"
## SLO "0.04" "0.09" "0.10" "0.04"
## SER "0.17" "0.23" "0.23" "0.17"
## BUL "0.07" "0.13" "0.13" "0.07"
## FRS "0.04" "0.10" "0.11" "0.05"
## MAL "0.03" "0.09" "0.10" "0.04"
## ROS "0.07" "0.13" "0.14" "0.08"
## GRA "0.07" "0.11" "0.11" "0.06"
## STS "0.04" "0.11" "0.11" "0.05"
## BAR "0.09" "0.14" "0.14" "0.04"
## SPM "0.05" "0.12" "0.13" "0.07"
## TUA "0.06" "0.12" "0.12" "0.05"
## GRC "0.07" "0.12" "0.12" "0.07"
## SPC "0.05" "0.13" "0.13" "0.08"
## SPB "0.05" "0.12" "0.12" "0.08"
## SPS "0.09" "0.15" "0.16" "0.09"
## POP NA "0.10" "0.10" "0.05"
## SOC "3906" NA "0.01" "0.10"
## GES "4010" "109" NA "0.11"
## TUH "4104" "283" "194" NA
Make a table and save it as a word document
# Convert the matrix to a data frame and add a column with row names
merged_df <- as.data.frame(merged_matrix)
merged_df$Population <- rownames(merged_matrix)
# Reorder columns to have RowNames as the first column
merged_df <- merged_df[, c("Population", colnames(merged_matrix))]
merged_df1 <- as.data.frame(merged_df)
write.csv(merged_df1, "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/merged_df_MAF01_overlap.csv")
# Create a flextable object from the merged_matrix
ft <- qflextable(as.data.frame(merged_df))
ft
Population | CRO | ITP | ITR | ALD | ITB | SLO | SER | BUL | FRS | MAL | ROS | GRA | STS | BAR | SPM | TUA | GRC | SPC | SPB | SPS | POP | SOC | GES | TUH |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CRO | 0.05 | 0.05 | 0.05 | 0.09 | 0.06 | 0.17 | 0.09 | 0.06 | 0.06 | 0.10 | 0.06 | 0.06 | 0.09 | 0.09 | 0.07 | 0.05 | 0.10 | 0.09 | 0.11 | 0.06 | 0.11 | 0.12 | 0.07 | |
ITP | 201 | 0.02 | 0.08 | 0.07 | 0.04 | 0.17 | 0.06 | 0.04 | 0.02 | 0.08 | 0.05 | 0.04 | 0.08 | 0.06 | 0.06 | 0.06 | 0.08 | 0.07 | 0.09 | 0.04 | 0.10 | 0.11 | 0.04 | |
ITR | 477 | 371 | 0.08 | 0.07 | 0.04 | 0.16 | 0.05 | 0.04 | 0.02 | 0.08 | 0.05 | 0.04 | 0.07 | 0.07 | 0.04 | 0.06 | 0.09 | 0.08 | 0.07 | 0.04 | 0.10 | 0.10 | 0.04 | |
ALD | 180 | 224 | 585 | 0.12 | 0.08 | 0.22 | 0.11 | 0.09 | 0.08 | 0.12 | 0.08 | 0.10 | 0.12 | 0.11 | 0.10 | 0.07 | 0.13 | 0.12 | 0.14 | 0.09 | 0.14 | 0.14 | 0.09 | |
ITB | 591 | 583 | 302 | 752 | 0.06 | 0.22 | 0.10 | 0.06 | 0.06 | 0.10 | 0.10 | 0.07 | 0.13 | 0.09 | 0.10 | 0.10 | 0.09 | 0.08 | 0.13 | 0.05 | 0.13 | 0.14 | 0.08 | |
SLO | 509 | 585 | 455 | 689 | 245 | 0.15 | 0.07 | 0.04 | 0.04 | 0.06 | 0.05 | 0.04 | 0.08 | 0.05 | 0.06 | 0.06 | 0.06 | 0.05 | 0.09 | 0.04 | 0.09 | 0.10 | 0.04 | |
SER | 322 | 520 | 697 | 442 | 672 | 476 | 0.20 | 0.16 | 0.16 | 0.20 | 0.19 | 0.18 | 0.22 | 0.21 | 0.20 | 0.19 | 0.22 | 0.20 | 0.24 | 0.17 | 0.23 | 0.23 | 0.17 | |
BUL | 428 | 604 | 899 | 414 | 950 | 782 | 316 | 0.07 | 0.06 | 0.10 | 0.09 | 0.08 | 0.11 | 0.10 | 0.08 | 0.09 | 0.10 | 0.10 | 0.12 | 0.07 | 0.13 | 0.13 | 0.07 | |
FRS | 1038 | 1004 | 654 | 1192 | 448 | 629 | 1100 | 1393 | 0.04 | 0.07 | 0.07 | 0.03 | 0.09 | 0.06 | 0.07 | 0.07 | 0.07 | 0.06 | 0.10 | 0.04 | 0.10 | 0.11 | 0.05 | |
MAL | 817 | 621 | 694 | 746 | 996 | 1118 | 1139 | 1157 | 1270 | 0.07 | 0.05 | 0.04 | 0.08 | 0.06 | 0.05 | 0.06 | 0.07 | 0.07 | 0.07 | 0.03 | 0.09 | 0.10 | 0.04 | |
ROS | 683 | 884 | 1047 | 771 | 961 | 726 | 367 | 445 | 1341 | 1498 | 0.10 | 0.08 | 0.12 | 0.09 | 0.10 | 0.10 | 0.10 | 0.09 | 0.13 | 0.07 | 0.13 | 0.14 | 0.08 | |
GRA | 711 | 705 | 1070 | 534 | 1279 | 1220 | 884 | 656 | 1709 | 873 | 1100 | 0.07 | 0.09 | 0.09 | 0.07 | 0.02 | 0.10 | 0.09 | 0.10 | 0.07 | 0.11 | 0.11 | 0.06 | |
STS | 1053 | 1098 | 835 | 1230 | 536 | 546 | 989 | 1304 | 412 | 1523 | 1125 | 1764 | 0.09 | 0.07 | 0.07 | 0.07 | 0.07 | 0.06 | 0.10 | 0.04 | 0.11 | 0.11 | 0.05 | |
BAR | 1333 | 1227 | 862 | 1446 | 824 | 1059 | 1489 | 1742 | 512 | 1233 | 1783 | 1900 | 916 | 0.11 | 0.06 | 0.10 | 0.13 | 0.12 | 0.11 | 0.09 | 0.14 | 0.14 | 0.04 | |
SPM | 1360 | 1227 | 883 | 1450 | 916 | 1160 | 1555 | 1782 | 684 | 1128 | 1873 | 1864 | 1096 | 211 | 0.09 | 0.10 | 0.03 | 0.01 | 0.08 | 0.05 | 0.12 | 0.13 | 0.07 | |
TUA | 851 | 901 | 1271 | 694 | 1441 | 1340 | 932 | 641 | 1885 | 1148 | 1057 | 277 | 1886 | 2123 | 2103 | 0.08 | 0.11 | 0.10 | 0.03 | 0.06 | 0.12 | 0.12 | 0.05 | |
GRC | 934 | 884 | 1224 | 754 | 1467 | 1440 | 1141 | 925 | 1877 | 862 | 1370 | 270 | 1975 | 2007 | 1944 | 445 | 0.11 | 0.09 | 0.11 | 0.07 | 0.12 | 0.12 | 0.07 | |
SPC | 1601 | 1474 | 1124 | 1698 | 1125 | 1363 | 1781 | 2020 | 818 | 1368 | 2086 | 2116 | 1214 | 310 | 252 | 2354 | 2193 | 0.01 | 0.14 | 0.05 | 0.13 | 0.13 | 0.08 | |
SPB | 2158 | 2040 | 1683 | 2263 | 1644 | 1869 | 2313 | 2569 | 1264 | 1923 | 2594 | 2687 | 1601 | 827 | 823 | 2925 | 2760 | 571 | 0.13 | 0.05 | 0.12 | 0.12 | 0.08 | |
SPS | 2144 | 2002 | 1669 | 2226 | 1690 | 1930 | 2341 | 2568 | 1371 | 1785 | 2651 | 2601 | 1750 | 874 | 787 | 2855 | 2643 | 567 | 332 | 0.09 | 0.15 | 0.16 | 0.09 | |
POP | 2197 | 2103 | 1734 | 2320 | 1645 | 1852 | 2316 | 2591 | 1225 | 2067 | 2566 | 2777 | 1509 | 878 | 939 | 3001 | 2876 | 702 | 283 | 615 | 0.10 | 0.10 | 0.05 | |
SOC | 1748 | 1898 | 2224 | 1678 | 2262 | 2060 | 1591 | 1327 | 2689 | 2316 | 1388 | 1470 | 2512 | 3068 | 3107 | 1198 | 1618 | 3346 | 3895 | 3892 | 3906 | 0.01 | 0.10 | |
GES | 1845 | 1991 | 2321 | 1770 | 2365 | 2166 | 1695 | 1426 | 2794 | 2395 | 1496 | 1540 | 2620 | 3168 | 3204 | 1265 | 1675 | 3445 | 3995 | 3988 | 4010 | 109 | 0.11 | |
TUH | 1919 | 2051 | 2395 | 1828 | 2461 | 2272 | 1797 | 1512 | 2897 | 2412 | 1629 | 1544 | 2745 | 3251 | 3275 | 1267 | 1650 | 3519 | 4076 | 4054 | 4104 | 283 | 194 |
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/snps_sets/r2_0.01_b \
--keep-fam output/fst/pops_4bfst_overlap.txt \
--make-bed \
--out output/fst/mantel_overlap \
--silent;
grep 'samples\|variants\|remaining' output/fst/mantel_overlap.log
20968 variants loaded from .bim file. –keep-fam: 242 people remaining. Total genotyping rate in remaining samples is 0.973943. 20968 variants and 242 people pass filters and QC.
Then convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/fst/mantel_overlap \
--recodeA \
--out output/fst/mantel_overlap \
--silent;
grep 'samples\|variants\|remaining' output/fst/mantel_overlap.log
20968 variants loaded from .bim file. 20968 variants and 242 people pass filters and QC.
Import the data and covert it to genind format
# import the data
albo <-
read.PLINK(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/mantel_overlap.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
# convert to genind
albo2 <- gl2gi(albo, probar = TRUE, verbose = NULL)
Save
saveRDS(albo2, here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/albo2_overlap_MAF01.rds"
))
Load it
albo2 <- readRDS(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/albo2_overlap_MAF01.rds"
))
Import Sample locations
sampling_loc <- readRDS(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/sampling_loc_overlap.rds"))
head(sampling_loc)
## # A tibble: 6 × 10
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region Marker
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Saint-M… France 45.2 5.77 Europe FRS 2019 Weste… SNPs
## 2 Strasbo… France 48.6 7.75 Europe STS 2019 Weste… SNPs
## 3 Penafiel Portug… 41.2 -8.33 Europe POP 2017 South… SNPs
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South… SNPs
## 5 San Roq… Spain 36.2 -5.37 Europe SPS 2017 South… SNPs
## 6 Catarro… Spain 39.4 -0.396 Europe SPC 2017 South… SNPs
## # ℹ 1 more variable: order <dbl>
The fam file
fam_file <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/mantel_overlap.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# # Replace the BEN a with BEN b (remember to never name samples with the same ID... I change it manually in the fam file.)
# fam_data <- fam_data %>%
# mutate(IndividualID = ifelse(FamilyID == "BEN" & IndividualID == "a", "b", IndividualID))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 SOC 1065 0 0 0 -9
## 2 SOC 1066 0 0 0 -9
## 3 SOC 1067 0 0 0 -9
## 4 SOC 1068 0 0 0 -9
## 5 SOC 1069 0 0 0 -9
## 6 SOC 1070 0 0 0 -9
Merge
# Join with sampling_loc to get sampling localities
loc_albo <- fam_data |>
left_join(sampling_loc, by = c("FamilyID" = "Abbreviation"))
head(loc_albo)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype Pop_City Country
## 1 SOC 1065 0 0 0 -9 Sochi Russia
## 2 SOC 1066 0 0 0 -9 Sochi Russia
## 3 SOC 1067 0 0 0 -9 Sochi Russia
## 4 SOC 1068 0 0 0 -9 Sochi Russia
## 5 SOC 1069 0 0 0 -9 Sochi Russia
## 6 SOC 1070 0 0 0 -9 Sochi Russia
## Latitude Longitude Continent Year Region Marker order
## 1 43.60042 39.74533 Europe 2021 Eastern Europe SNPs 46
## 2 43.60042 39.74533 Europe 2021 Eastern Europe SNPs 46
## 3 43.60042 39.74533 Europe 2021 Eastern Europe SNPs 46
## 4 43.60042 39.74533 Europe 2021 Eastern Europe SNPs 46
## 5 43.60042 39.74533 Europe 2021 Eastern Europe SNPs 46
## 6 43.60042 39.74533 Europe 2021 Eastern Europe SNPs 46
Get the latitude and longitude
## [,1] [,2]
## [1,] 43.60042 39.74533
## [2,] 43.60042 39.74533
## [3,] 43.60042 39.74533
## [4,] 43.60042 39.74533
## [5,] 43.60042 39.74533
## [6,] 43.60042 39.74533
Add jitter
## x y
## [1,] 43.59569 39.74613
## [2,] 43.59772 39.74774
## [3,] 43.60103 39.74744
## [4,] 43.60969 39.74969
## [5,] 43.59586 39.75527
## [6,] 43.59112 39.75388
Add to object
Save
saveRDS(
albo2,
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/albo2_SNP_overlap_MAF1.rds"
)
)
Convert the data
##
## Converting data from a genind to a genpop object...
##
## ...done.
Get 1 mosquito per population, it is just to get the geographical coordinates
unique_populations <- unique(albo2@pop)
selected_individuals <- integer(length(unique_populations))
for (i in seq_along(unique_populations)) {
inds_in_pop <- which(albo2@pop == unique_populations[i])
selected_individuals[i] <- sample(inds_in_pop, 1)
}
albo2_subset <- albo2[selected_individuals, ]
Mantel test
Dgen <- dist.genpop(toto,method=2)
Dgeo <- dist(albo2_subset$other$xy)
ibd <- mantel.randtest(Dgen,Dgeo)
ibd
## Monte-Carlo test
## Call: mantel.randtest(m1 = Dgen, m2 = Dgeo)
##
## Observation: -0.09969185
##
## Based on 999 replicates
## Simulated p-value: 0.76
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## -0.749420367 -0.001071686 0.017317312
Monte-Carlo test Call: mantel.randtest(m1 = Dgen, m2 = Dgeo)
Observation: 0.08178366 Based on 999 replicates Simulated p-value: 0.225 Alternative hypothesis: greater
Std.Obs Expectation Variance
0.68997045 -0.00749107 0.01674157
Simulated p-value is not statistically signficant (0.225). Alternative hypothesis: greater. The test was one-sided, checking if the observed correlation is greater than what would be expected by chance.
Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/simIBD_SNPs_MAF01_overlap.pdf"))
plot(ibd)
dev.off()
plot(Dgeo, Dgen)
# A linear regression model (lm stands for "linear model") is fitted, with the genetic distances (Dgen) as the response variable and the geographic distances (Dgeo) as the predictor. The distances are transformed into vectors using as.vector because the dist function produces a matrix-like structure, but the linear regression function lm requires vectors.
dist_lm <- lm(as.vector(Dgen) ~ as.vector(Dgeo))
abline(dist_lm, col="red", lty=2)
Save Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/Genetic_v_Geog_distance_SNPs_MAF01_overlap.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
dev.off()
Save Plot without equation
# Plot it
# Start the PDF device
CairoPDF(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/Genetic_v_Geog_distance_SNPs_MAF01_overlap_noequ.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
#text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
dev.off()
Use library MASS for plot
library(MASS)
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
#myPal <-
# colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
title("Isolation by distance")
Save plot
library(MASS)
CairoPDF(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/IDB_PlotFromMASS_density_SNPs_MAF01_overlap.pdf"))
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
# CairoPDF(here("output", "fst", "ibd.pdf"),
# width = 5,
# height = 4)
# png(here("output", "fst", "ibd2.png"),
# width = 5,
# height = 4,
# units='in',
# res = 300)
#myPal <-
# colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
Save plot with equation
png(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/IDB_PlotFromMASS_density_SNPs_MAF01_overlap_equation.png"),
width = 5,
height = 4,
units='in',
res = 300)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.4fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
#The fam file is the same for both data sets with the default or new priors
fam1 <-
read.delim(
file = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/r2_0.01_b.fam"),
header = FALSE,
)
head(fam1)
## V1 V2 V3 V4 V5 V6
## 1 SOC 1065 0 0 0 -9
## 2 SOC 1066 0 0 0 -9
## 3 SOC 1067 0 0 0 -9
## 4 SOC 1068 0 0 0 -9
## 5 SOC 1069 0 0 0 -9
## 6 SOC 1070 0 0 0 -9
Create list SNP pops with at least 4 individuals shared with microsats
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
awk '{print $1}' output/snps_sets/r2_0.01_b.fam | sort | uniq -c | awk '{print $2, $1}' | awk '$2 >= 4 {print}' | awk '{print $1}' > output/fst/pops_4bfst.txt;
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
head output/fst/pops_4bfst.txt;
wc -l output/fst/pops_4bfst.txt
## ALD
## ALU
## ALV
## ARM
## BAR
## BRE
## BUL
## CES
## CRO
## DES
## 40 output/fst/pops_4bfst.txt
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
head output/fst/pops_4bfst_overlap2.txt;
wc output/fst/pops_4bfst_overlap2.txt
## ALD
## BAR
## BUL
## CRO
## FRS
## GES
## GRA
## GRC
## ITB
## ITP
## 24 24 120 output/fst/pops_4bfst_overlap2.txt
Now we have 24 populations with both SNP & microsat data (with at least 4 individuals)
First load plink
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/fst/overlap/r2_0.01_b \
--keep-fam output/fst/pops_4bfst_overlap2.txt \
--recodeA \
--out output/fst/overlap/overlap \
--silent;
grep 'samples\|variants\|remaining' output/fst/overlap/overlap.log
20968 variants loaded from .bim file. –keep-fam: 242 people remaining. Total genotyping rate in remaining samples is 0.973943. 20968 variants and 242 people pass filters and QC.
Look at https://rdrr.io/cran/StAMPP/man/stamppFst.html for details of Fst estimations
LD2 <-
read.PLINK(
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/overlap.raw"
),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
##
## Reading PLINK raw format into a genlight object...
##
##
## Reading loci information...
##
## Reading and converting genotypes...
## .
## Building final object...
##
## ...done.
## Length Class Mode
## 1 genlight S4
This chunk will take a couple minutes to run.
# run stampp. If you want to run with bootstraps and nclusters use the HPC. It will run out of memory on a 32Gb laptop
LD2_3 <- stamppFst(LD2_2, 1, 95, 1)
Save it
saveRDS(
LD2_3, here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/LD2_MAF1_overlap.rds"
)
)
To load it
LD2_3 <- readRDS(
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/LD2_MAF1_overlap.rds"
)
)
Now lets look at the object
## SOC GES STS ITR
## Min. :0.01127 Min. :0.09666 Min. :0.03010 Min. :0.01703
## 1st Qu.:0.10175 1st Qu.:0.10817 1st Qu.:0.04447 1st Qu.:0.03965
## Median :0.11561 Median :0.12145 Median :0.06819 Median :0.05552
## Mean :0.11638 Mean :0.12689 Mean :0.06935 Mean :0.05977
## 3rd Qu.:0.13102 3rd Qu.:0.13765 3rd Qu.:0.07530 3rd Qu.:0.07254
## Max. :0.22672 Max. :0.23290 Max. :0.17530 Max. :0.15959
## NA's :1 NA's :2 NA's :3 NA's :4
## GRC BAR BUL CRO
## Min. :0.02418 Min. :0.04376 Min. :0.05680 Min. :0.04540
## 1st Qu.:0.06578 1st Qu.:0.08255 1st Qu.:0.06811 1st Qu.:0.05812
## Median :0.07974 Median :0.10140 Median :0.09262 Median :0.07012
## Mean :0.08455 Mean :0.10467 Mean :0.09302 Mean :0.08014
## 3rd Qu.:0.09938 3rd Qu.:0.12147 3rd Qu.:0.10293 3rd Qu.:0.09397
## Max. :0.18931 Max. :0.21778 Max. :0.19614 Max. :0.17329
## NA's :5 NA's :6 NA's :7 NA's :8
## GRA ITB MAL SPM
## Min. :0.05215 Min. :0.05193 Min. :0.02244 Min. :0.006262
## 1st Qu.:0.06488 1st Qu.:0.06287 1st Qu.:0.03897 1st Qu.:0.052607
## Median :0.07909 Median :0.08386 Median :0.06256 Median :0.066148
## Mean :0.08549 Mean :0.09413 Mean :0.06264 Mean :0.077446
## 3rd Qu.:0.09821 3rd Qu.:0.10322 3rd Qu.:0.07392 3rd Qu.:0.093330
## Max. :0.18592 Max. :0.21971 Max. :0.16130 Max. :0.213896
## NA's :9 NA's :10 NA's :11 NA's :12
## TUA TUH ALD FRS
## Min. :0.02844 Min. :0.03796 Min. :0.07934 Min. :0.03937
## 1st Qu.:0.05808 1st Qu.:0.04592 1st Qu.:0.09148 1st Qu.:0.04284
## Median :0.06820 Median :0.07927 Median :0.11690 Median :0.06241
## Mean :0.08408 Mean :0.07715 Mean :0.11822 Mean :0.07281
## 3rd Qu.:0.10014 3rd Qu.:0.08713 3rd Qu.:0.12858 3rd Qu.:0.07965
## Max. :0.19864 Max. :0.17311 Max. :0.21576 Max. :0.16404
## NA's :13 NA's :14 NA's :15 NA's :16
## ITP POP ROS SER
## Min. :0.03615 Min. :0.03586 Min. :0.05906 Min. :0.1529
## 1st Qu.:0.05294 1st Qu.:0.05241 1st Qu.:0.09236 1st Qu.:0.1877
## Median :0.07859 Median :0.06361 Median :0.09956 Median :0.2100
## Mean :0.08048 Mean :0.07861 Mean :0.11744 Mean :0.2029
## 3rd Qu.:0.08405 3rd Qu.:0.08702 3rd Qu.:0.13313 3rd Qu.:0.2252
## Max. :0.17461 Max. :0.16508 Max. :0.20308 Max. :0.2386
## NA's :17 NA's :18 NA's :19 NA's :20
## SLO SPC SPB SPS
## Min. :0.05438 Min. :0.007113 Min. :0.1281 Min. : NA
## 1st Qu.:0.05841 1st Qu.:0.039494 1st Qu.:0.1281 1st Qu.: NA
## Median :0.06244 Median :0.071875 Median :0.1281 Median : NA
## Mean :0.06970 Mean :0.071875 Mean :0.1281 Mean :NaN
## 3rd Qu.:0.07737 3rd Qu.:0.104256 3rd Qu.:0.1281 3rd Qu.: NA
## Max. :0.09230 Max. :0.136637 Max. :0.1281 Max. : NA
## NA's :21 NA's :22 NA's :23 NA's :24
If you want you can save the fst values as csv.
# Save it
write.csv(LD2_df, file = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/LD2_df_01_overlap.csv"))
Check the Fst values
## SOC GES STS ITR GRC BAR BUL CRO GRA ITB
## SOC NA NA NA NA NA NA NA NA NA NA
## GES 0.01126978 NA NA NA NA NA NA NA NA NA
## STS 0.10591967 0.1092791 NA NA NA NA NA NA NA NA
## ITR 0.09551819 0.1005738 0.03982659 NA NA NA NA NA NA NA
## GRC 0.11671692 0.1220816 0.07434949 0.05710568 NA NA NA NA NA NA
## BAR 0.13602044 0.1427050 0.09268239 0.06549046 0.1009452 NA NA NA NA NA
## MAL SPM TUA TUH ALD FRS ITP POP ROS SER SLO SPC SPB SPS
## SOC NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## GES NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## STS NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## ITR NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## GRC NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## BAR NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Convert the data into a matrix.
## SOC GES STS ITR GRC BAR
## SOC NA 0.01126978 0.10591967 0.09551819 0.11671692 0.13602044
## GES 0.01126978 NA 0.10927914 0.10057375 0.12208159 0.14270495
## STS 0.10591967 0.10927914 NA 0.03982659 0.07434949 0.09268239
## ITR 0.09551819 0.10057375 0.03982659 NA 0.05710568 0.06549046
## GRC 0.11671692 0.12208159 0.07434949 0.05710568 NA 0.10094517
## BAR 0.13602044 0.14270495 0.09268239 0.06549046 0.10094517 NA
## BUL CRO GRA ITB MAL SPM
## SOC 0.12832355 0.11495017 0.10889177 0.13371682 0.09458982 0.12205750
## GES 0.13384104 0.12036716 0.11376844 0.13892193 0.10125102 0.12971049
## STS 0.07799253 0.06354859 0.07077473 0.07120783 0.03980952 0.06532393
## ITR 0.05392586 0.04949314 0.04798060 0.06838917 0.01702597 0.06977034
## GRC 0.09338818 0.05248230 0.02417956 0.09782363 0.05730552 0.09532555
## BAR 0.10885970 0.09393694 0.09225317 0.12946121 0.07823715 0.11048101
## TUA TUH ALD FRS ITP POP
## SOC 0.11826743 0.10435177 0.13932531 0.10279814 0.10070244 0.09884633
## GES 0.12401729 0.11137798 0.14452571 0.10779885 0.10598420 0.10461590
## STS 0.07272436 0.05322703 0.09733023 0.03009784 0.04109490 0.04320132
## ITR 0.03907520 0.03526920 0.08060398 0.03984728 0.02050277 0.03645268
## GRC 0.07973727 0.07020895 0.07388877 0.07086343 0.06244349 0.06910929
## BAR 0.06317696 0.04376254 0.12083140 0.08889532 0.08100898 0.08718046
## ROS SER SLO SPC SPB SPS
## SOC 0.13423764 0.2267217 0.09140753 0.12576249 0.11561471 0.15080720
## GES 0.13912010 0.2329000 0.09665973 0.13312607 0.12082033 0.15814054
## STS 0.07530389 0.1752962 0.04446887 0.06818588 0.06120598 0.09859755
## ITR 0.07758959 0.1595922 0.04133080 0.08683449 0.07820355 0.07085827
## GRC 0.10336915 0.1893113 0.05999989 0.10519477 0.09488816 0.10597149
## BAR 0.12469756 0.2177834 0.07855252 0.12926803 0.12167900 0.11394015
#sampling_loc <- read_csv("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/sampling_loc_SNPs_fst.csv")
Save it
saveRDS(
sampling_loc, here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/sampling_loc_overlap.rds"
)
)
Import sample locations
sampling_loc <- readRDS(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/sampling_loc_overlap.rds"))
# Arrange by region
sampling_loc <- sampling_loc |>
dplyr::arrange(
order
)
# Check it
head(sampling_loc)
## # A tibble: 6 × 10
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region Marker
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Saint-M… France 45.2 5.77 Europe FRS 2019 Weste… SNPs
## 2 Strasbo… France 48.6 7.75 Europe STS 2019 Weste… SNPs
## 3 Penafiel Portug… 41.2 -8.33 Europe POP 2017 South… SNPs
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South… SNPs
## 5 San Roq… Spain 36.2 -5.37 Europe SPS 2017 South… SNPs
## 6 Catarro… Spain 39.4 -0.396 Europe SPC 2017 South… SNPs
## # ℹ 1 more variable: order <dbl>
Order
## [1] "FRS" "STS" "POP" "SPB" "SPS" "SPC" "BAR" "SPM" "ITB" "ITR" "ITP" "MAL"
## [13] "SLO" "CRO" "ALD" "SER" "GRA" "GRC" "ROS" "BUL" "TUA" "TUH" "SOC" "GES"
Create vector with order of populations
# Extract the populations that appear in LD2_df
populations_in_LD2 <- colnames(LD2_df)
# Reorder the populations based on order_pops
poporder <- populations_in_LD2[populations_in_LD2 %in% order_pops]
#LD2_df[match(poporder, LD2_df$Abbreviation),] #this also doesn't reorder it
# Print the reordered populations
print(poporder)
## [1] "SOC" "GES" "STS" "ITR" "GRC" "BAR" "BUL" "CRO" "GRA" "ITB" "MAL" "SPM"
## [13] "TUA" "TUH" "ALD" "FRS" "ITP" "POP" "ROS" "SER" "SLO" "SPC" "SPB" "SPS"
Lets check if the matrix is symmetric.
## [1] TRUE
Order the matrix using poporder. We will also add NA on the upper left side of the matrix.
Now we have to convert the matrix to a data frame to plot it with ggplot.
## Var1 Var2 value
## FRS : 24 FRS : 24 Min. :0.00626
## STS : 24 STS : 24 1st Qu.:0.06090
## POP : 24 POP : 24 Median :0.08701
## SPB : 24 SPB : 24 Mean :0.09085
## SPS : 24 SPS : 24 3rd Qu.:0.10948
## SPC : 24 SPC : 24 Max. :0.23859
## (Other):432 (Other):432 NA's :300
Now lets plot the data with ggplot. You can click in the little square on the top left of the plot to open it on a new window. It will have the right proportions.
pairfst.f <- ggplot(pairfst.long, aes(Var1, Var2)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(
low = "white",
high = "#71b6ff",
name = "Fst",
na.value = "white",
limits = c(0, 0.5)
) +
scale_x_discrete(position = "top") +
theme_bw() +
geom_text(aes(label = ifelse(
is.na(value), "", formatC(value, digits = 2, format = "f")
)), size = 3) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.y = element_text(hjust = 0)
)
pairfst.f
Save it
# Step 1: Map abbreviation to country
abbreviation_to_country <- sampling_loc %>% dplyr::select(Abbreviation, Country)
# Step 2: Calculate mean Fst for each pair of countries
# Convert the matrix to a data frame and add row names as a new column
fst_df <- as.data.frame(as.matrix(LD2_df))
fst_df$Abbreviation1 <- rownames(fst_df)
# Gather columns into rows
fst_long <- fst_df %>% gather(key = "Abbreviation2", value = "Fst", -Abbreviation1)
# Merge with country mapping
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation1", by.y = "Abbreviation")
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation2", by.y = "Abbreviation", suffixes = c("_1", "_2"))
# Calculate mean Fst for each pair of countries
fst_summary <- fst_long %>%
group_by(Country_1, Country_2) %>%
summarize(Mean_Fst = mean(Fst, na.rm = TRUE), .groups = 'drop') %>%
filter(Country_1 != Country_2)
# Convert summary back to a matrix form, avoiding the use of tibbles for row names
fst_matrix_summary <- as.data.frame(spread(fst_summary, key = Country_2, value = Mean_Fst))
rownames(fst_matrix_summary) <- fst_matrix_summary$Country_1
fst_matrix_summary <- fst_matrix_summary[, -1]
fst_matrix_summary <- as.matrix(fst_matrix_summary)
# Make the matrix symmetric by averaging the off-diagonal elements
symmetric_fst_matrix <- matrix(nrow = nrow(fst_matrix_summary), ncol = ncol(fst_matrix_summary))
rownames(symmetric_fst_matrix) <- rownames(fst_matrix_summary)
colnames(symmetric_fst_matrix) <- colnames(fst_matrix_summary)
for(i in 1:nrow(fst_matrix_summary)) {
for(j in i:nrow(fst_matrix_summary)) {
if (i == j) {
symmetric_fst_matrix[i, j] <- fst_matrix_summary[i, j]
} else {
avg_value <- mean(c(fst_matrix_summary[i, j], fst_matrix_summary[j, i]), na.rm = TRUE)
symmetric_fst_matrix[i, j] <- avg_value
symmetric_fst_matrix[j, i] <- avg_value
}
}
}
# Check if the matrix is symmetric
2
## [1] 2
# print(isSymmetric(symmetric_fst_matrix))
# Your symmetric Fst matrix by country is now in symmetric_fst_matrix
print(symmetric_fst_matrix)
## Albania Bulgaria Croatia France Georgia Greece
## Albania NA 0.11279822 0.04539978 0.09472371 0.14452571 0.07649130
## Bulgaria 0.11279822 NA 0.08722725 0.07492144 0.13384104 0.09300545
## Croatia 0.04539978 0.08722725 NA 0.06302575 0.12036716 0.05544159
## France 0.09472371 0.07492144 0.06302575 NA 0.10853899 0.07064597
## Georgia 0.14452571 0.13384104 0.12036716 0.10853899 NA 0.11792501
## Greece 0.07649130 0.09300545 0.05544159 0.07064597 0.11792501 NA
## Italy 0.09154900 0.06636464 0.05965627 0.04890522 0.11515996 0.06500279
## Malta 0.08316641 0.05679897 0.05500955 0.03897441 0.10125102 0.05472746
## Portugal 0.09147718 0.06636208 0.06402375 0.04227483 0.10461590 0.06798589
## Romania 0.12354519 0.10293539 0.09925514 0.07490660 0.13912010 0.10150763
## Russia 0.13932531 0.12832355 0.11495017 0.10435891 0.01126978 0.11280435
## Serbia 0.21575958 0.19614135 0.17328819 0.16966823 0.23289996 0.18761510
## Slovenia 0.07933804 0.06530144 0.05729126 0.04390333 0.09665973 0.05726520
## Spain 0.12227116 0.10696192 0.09537205 0.07523979 0.13690047 0.09579872
## Turkey 0.09399600 0.07568412 0.07012296 0.05986011 0.11769763 0.07154177
## Italy Malta Portugal Romania Russia Serbia
## Albania 0.09154900 0.08316641 0.09147718 0.12354519 0.13932531 0.2157596
## Bulgaria 0.06636464 0.05679897 0.06636208 0.10293539 0.12832355 0.1961414
## Croatia 0.05965627 0.05500955 0.06402375 0.09925514 0.11495017 0.1732882
## France 0.04890522 0.03897441 0.04227483 0.07490660 0.10435891 0.1696682
## Georgia 0.11515996 0.10125102 0.10461590 0.13912010 0.01126978 0.2329000
## Greece 0.06500279 0.05472746 0.06798589 0.10150763 0.11280435 0.1876151
## Italy NA 0.03035905 0.04157277 0.08661136 0.10997915 0.1846366
## Malta 0.03035905 NA 0.02975032 0.07357926 0.09458982 0.1612972
## Portugal 0.04157277 0.02975032 NA 0.07259454 0.09884633 0.1650812
## Romania 0.08661136 0.07357926 0.07259454 NA 0.13423764 0.2030822
## Russia 0.10997915 0.09458982 0.09884633 0.13423764 NA 0.2267217
## Serbia 0.18463665 0.16129723 0.16508121 0.20308218 0.22672167 NA
## Slovenia 0.04478448 0.03916767 0.03586147 0.05906033 0.09140753 0.1528773
## Spain 0.08665648 0.07372547 0.06801861 0.10896586 0.13005247 0.2176911
## Turkey 0.05623848 0.04497299 0.05514368 0.09174178 0.11130960 0.1858761
## Slovenia Spain Turkey
## Albania 0.07933804 0.12227116 0.09399600
## Bulgaria 0.06530144 0.10696192 0.07568412
## Croatia 0.05729126 0.09537205 0.07012296
## France 0.04390333 0.07523979 0.05986011
## Georgia 0.09665973 0.13690047 0.11769763
## Greece 0.05726520 0.09579872 0.07154177
## Italy 0.04478448 0.08665648 0.05623848
## Malta 0.03916767 0.07372547 0.04497299
## Portugal 0.03586147 0.06801861 0.05514368
## Romania 0.05906033 0.10896586 0.09174178
## Russia 0.09140753 0.13005247 0.11130960
## Serbia 0.15287729 0.21769108 0.18587606
## Slovenia NA 0.06748962 0.05168875
## Spain 0.06748962 NA 0.07429569
## Turkey 0.05168875 0.07429569 NA
## Albania Bulgaria Croatia France Georgia Greece
## Albania NA 0.1127982 0.04539978 0.09472371 0.1445257 0.07649130
## Bulgaria NA NA 0.08722725 0.07492144 0.1338410 0.09300545
## Croatia NA NA NA 0.06302575 0.1203672 0.05544159
## France NA NA NA NA 0.1085390 0.07064597
## Georgia NA NA NA NA NA 0.11792501
## Greece NA NA NA NA NA NA
## Italy NA NA NA NA NA NA
## Malta NA NA NA NA NA NA
## Portugal NA NA NA NA NA NA
## Romania NA NA NA NA NA NA
## Russia NA NA NA NA NA NA
## Serbia NA NA NA NA NA NA
## Slovenia NA NA NA NA NA NA
## Spain NA NA NA NA NA NA
## Turkey NA NA NA NA NA NA
## Italy Malta Portugal Romania Russia Serbia
## Albania 0.09154900 0.08316641 0.09147718 0.12354519 0.13932531 0.2157596
## Bulgaria 0.06636464 0.05679897 0.06636208 0.10293539 0.12832355 0.1961414
## Croatia 0.05965627 0.05500955 0.06402375 0.09925514 0.11495017 0.1732882
## France 0.04890522 0.03897441 0.04227483 0.07490660 0.10435891 0.1696682
## Georgia 0.11515996 0.10125102 0.10461590 0.13912010 0.01126978 0.2329000
## Greece 0.06500279 0.05472746 0.06798589 0.10150763 0.11280435 0.1876151
## Italy NA 0.03035905 0.04157277 0.08661136 0.10997915 0.1846366
## Malta NA NA 0.02975032 0.07357926 0.09458982 0.1612972
## Portugal NA NA NA 0.07259454 0.09884633 0.1650812
## Romania NA NA NA NA 0.13423764 0.2030822
## Russia NA NA NA NA NA 0.2267217
## Serbia NA NA NA NA NA NA
## Slovenia NA NA NA NA NA NA
## Spain NA NA NA NA NA NA
## Turkey NA NA NA NA NA NA
## Slovenia Spain Turkey
## Albania 0.07933804 0.12227116 0.09399600
## Bulgaria 0.06530144 0.10696192 0.07568412
## Croatia 0.05729126 0.09537205 0.07012296
## France 0.04390333 0.07523979 0.05986011
## Georgia 0.09665973 0.13690047 0.11769763
## Greece 0.05726520 0.09579872 0.07154177
## Italy 0.04478448 0.08665648 0.05623848
## Malta 0.03916767 0.07372547 0.04497299
## Portugal 0.03586147 0.06801861 0.05514368
## Romania 0.05906033 0.10896586 0.09174178
## Russia 0.09140753 0.13005247 0.11130960
## Serbia 0.15287729 0.21769108 0.18587606
## Slovenia NA 0.06748962 0.05168875
## Spain NA NA 0.07429569
## Turkey NA NA NA
Now we have to convert the matrix to a data frame to plot it with ggplot.
## Var1 Var2 value
## Albania : 15 Albania : 15 Min. :0.01127
## Bulgaria: 15 Bulgaria: 15 1st Qu.:0.06402
## Croatia : 15 Croatia : 15 Median :0.09148
## France : 15 France : 15 Mean :0.09696
## Georgia : 15 Georgia : 15 3rd Qu.:0.11770
## Greece : 15 Greece : 15 Max. :0.23290
## (Other) :135 (Other) :135 NA's :120
You can click in the little square on the top left of the plot to open it on a new window. It will have the right proportions.
pairfst.f2 <- ggplot(pairfst.long2, aes(Var1, Var2)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(
low = "white",
high = "#71b6ff",
name = "Fst",
na.value = "white",
limits = c(0, 0.5)
) +
scale_x_discrete(position = "top") +
theme_bw() +
geom_text(aes(label = ifelse(
is.na(value), "", formatC(value, digits = 2, format = "f")
)), size = 3) +
theme(
axis.text.x = element_text(angle = 90, hjust = 0),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.y = element_text(hjust = 1)
)
pairfst.f2
ggsave(
filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/fst_matrix_europe_MAF01_by_country_overlap.pdf"),
pairfst.f2,
width = 6,
height = 5,
units = "in"
)
Remove NAs and rename columns
# remove NAs
fst2 <-
pairfst.long |>
drop_na()
# rename columns
fst2 <-
fst2 |>
dplyr::rename(pop1 = 1,
pop2 = 2,
fst = 3)
# Split the data into two data frames, one for pop1 and one for pop2
df_pop1 <- fst2 |>
dplyr::select(pop = pop1, fst)
df_pop2 <- fst2 |>
dplyr::select(pop = pop2, fst)
# Combine the two data frames
df_combined <- bind_rows(df_pop1, df_pop2)
# Calculate the mean fst for each population
mean_fst <- df_combined |>
group_by(pop) |>
summarise(mean_fst = mean(fst))
print(mean_fst)
## # A tibble: 24 × 2
## pop mean_fst
## <fct> <dbl>
## 1 FRS 0.0690
## 2 STS 0.0727
## 3 POP 0.0662
## 4 SPB 0.0861
## 5 SPS 0.112
## 6 SPC 0.0955
## 7 BAR 0.105
## 8 SPM 0.0846
## 9 ITB 0.0975
## 10 ITR 0.0622
## # ℹ 14 more rows
Merge
fst3 <-
sampling_loc |>
left_join(
mean_fst,
by = c("Abbreviation" = "pop")
) |>
drop_na()
# check output
head(fst3)
## # A tibble: 6 × 11
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region Marker
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Saint-M… France 45.2 5.77 Europe FRS 2019 Weste… SNPs
## 2 Strasbo… France 48.6 7.75 Europe STS 2019 Weste… SNPs
## 3 Penafiel Portug… 41.2 -8.33 Europe POP 2017 South… SNPs
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South… SNPs
## 5 San Roq… Spain 36.2 -5.37 Europe SPS 2017 South… SNPs
## 6 Catarro… Spain 39.4 -0.396 Europe SPC 2017 South… SNPs
## # ℹ 2 more variables: order <dbl>, mean_fst <dbl>
Mean by region
# Group by Region and calculate the mean_fst by Region
region_means <- fst3 |>
group_by(Region) |>
summarize(mean_fst_by_region = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_region column to the fst3 tibble
fst3 <- fst3 |>
left_join(region_means, by = "Region")
# Print the modified fst3 tibble
print(fst3)
## # A tibble: 24 × 12
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr>
## 1 Saint-Martin-… France 45.2 5.77 Europe FRS 2019 Weste…
## 2 Strasbourg France 48.6 7.75 Europe STS 2019 Weste…
## 3 Penafiel Portug… 41.2 -8.33 Europe POP 2017 South…
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South…
## 5 San Roque Spain 36.2 -5.37 Europe SPS 2017 South…
## 6 Catarroja Spain 39.4 -0.396 Europe SPC 2017 South…
## 7 Barcelona Spain 41.4 2.17 Europe BAR 2018 South…
## 8 Magaluf Spain 39.5 2.53 Europe SPM 2017 South…
## 9 Bologna Italy 44.5 11.4 Europe ITB 2017 South…
## 10 Rome Italy 41.9 12.5 Europe ITR 2013 South…
## # ℹ 14 more rows
## # ℹ 4 more variables: Marker <chr>, order <dbl>, mean_fst <dbl>,
## # mean_fst_by_region <dbl>
Mean By country
# Group by Country and calculate the mean_fst by Country
country_means <- fst3 |>
group_by(Country) |>
summarize(mean_fst_by_country = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_country column to the fst3 tibble
fst3 <- fst3 |>
left_join(country_means, by = "Country")
# Print the modified fst3 tibble
print(fst3)
## # A tibble: 24 × 13
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr>
## 1 Saint-Martin-… France 45.2 5.77 Europe FRS 2019 Weste…
## 2 Strasbourg France 48.6 7.75 Europe STS 2019 Weste…
## 3 Penafiel Portug… 41.2 -8.33 Europe POP 2017 South…
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South…
## 5 San Roque Spain 36.2 -5.37 Europe SPS 2017 South…
## 6 Catarroja Spain 39.4 -0.396 Europe SPC 2017 South…
## 7 Barcelona Spain 41.4 2.17 Europe BAR 2018 South…
## 8 Magaluf Spain 39.5 2.53 Europe SPM 2017 South…
## 9 Bologna Italy 44.5 11.4 Europe ITB 2017 South…
## 10 Rome Italy 41.9 12.5 Europe ITR 2013 South…
## # ℹ 14 more rows
## # ℹ 5 more variables: Marker <chr>, order <dbl>, mean_fst <dbl>,
## # mean_fst_by_region <dbl>, mean_fst_by_country <dbl>
# Rename columns
fst3 <- fst3 |>
dplyr::rename(
City = Pop_City)
# Print the modified fst3 tibble
print(fst3)
## # A tibble: 24 × 13
## City Country Latitude Longitude Continent Abbreviation Year Region Marker
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Saint-… France 45.2 5.77 Europe FRS 2019 Weste… SNPs
## 2 Strasb… France 48.6 7.75 Europe STS 2019 Weste… SNPs
## 3 Penafi… Portug… 41.2 -8.33 Europe POP 2017 South… SNPs
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South… SNPs
## 5 San Ro… Spain 36.2 -5.37 Europe SPS 2017 South… SNPs
## 6 Catarr… Spain 39.4 -0.396 Europe SPC 2017 South… SNPs
## 7 Barcel… Spain 41.4 2.17 Europe BAR 2018 South… SNPs
## 8 Magaluf Spain 39.5 2.53 Europe SPM 2017 South… SNPs
## 9 Bologna Italy 44.5 11.4 Europe ITB 2017 South… SNPs
## 10 Rome Italy 41.9 12.5 Europe ITR 2013 South… SNPs
## # ℹ 14 more rows
## # ℹ 4 more variables: order <dbl>, mean_fst <dbl>, mean_fst_by_region <dbl>,
## # mean_fst_by_country <dbl>
Mean By continent
# Group by Country and calculate the mean_fst by Country
country_means <- fst3 |>
group_by(Continent) |>
summarize(mean_fst_by_continent = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_country column to the fst3 tibble
fst3 <- fst3 |>
left_join(country_means, by = "Continent")
# Print the modified fst3 tibble
print(fst3)
## # A tibble: 24 × 14
## City Country Latitude Longitude Continent Abbreviation Year Region Marker
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Saint-… France 45.2 5.77 Europe FRS 2019 Weste… SNPs
## 2 Strasb… France 48.6 7.75 Europe STS 2019 Weste… SNPs
## 3 Penafi… Portug… 41.2 -8.33 Europe POP 2017 South… SNPs
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South… SNPs
## 5 San Ro… Spain 36.2 -5.37 Europe SPS 2017 South… SNPs
## 6 Catarr… Spain 39.4 -0.396 Europe SPC 2017 South… SNPs
## 7 Barcel… Spain 41.4 2.17 Europe BAR 2018 South… SNPs
## 8 Magaluf Spain 39.5 2.53 Europe SPM 2017 South… SNPs
## 9 Bologna Italy 44.5 11.4 Europe ITB 2017 South… SNPs
## 10 Rome Italy 41.9 12.5 Europe ITR 2013 South… SNPs
## # ℹ 14 more rows
## # ℹ 5 more variables: order <dbl>, mean_fst <dbl>, mean_fst_by_region <dbl>,
## # mean_fst_by_country <dbl>, mean_fst_by_continent <dbl>
fst4 <- fst3 |>
dplyr::select(
Region, mean_fst_by_region, Country, mean_fst_by_country, City, Abbreviation, mean_fst,
)
fst4 <- fst4 |>
arrange(
Region, Country, City
)
# Round
fst4 <- fst4 |>
mutate_if(is.numeric, ~ round(., 2))
head(fst4)
## # A tibble: 6 × 7
## Region mean_fst_by_region Country mean_fst_by_country City Abbreviation
## <chr> <dbl> <chr> <dbl> <chr> <chr>
## 1 Eastern Eur… 0.11 Bulgar… 0.09 Lom BUL
## 2 Eastern Eur… 0.11 Georgia 0.12 Sakh… GES
## 3 Eastern Eur… 0.11 Romania 0.1 Satu… ROS
## 4 Eastern Eur… 0.11 Russia 0.12 Sochi SOC
## 5 Eastern Eur… 0.11 Serbia 0.19 Novi… SER
## 6 Eastern Eur… 0.11 Turkey 0.08 Alia… TUA
## # ℹ 1 more variable: mean_fst <dbl>
# Set theme if you want to use something different from the previous table
set_flextable_defaults(
font.family = "Arial",
font.size = 9,
big.mark = ",",
theme_fun = "theme_zebra" # try the themes: theme_alafoli(), theme_apa(), theme_booktabs(), theme_box(), theme_tron_legacy(), theme_tron(), theme_vader(), theme_vanilla(), theme_zebra()
)
# Then create the flextable object
flex_table <- flextable(fst4) |>
set_caption(caption = as_paragraph(
as_chunk(
"Table 1. Fst values for European SNPs overlapping with microsat dataset.",
props = fp_text_default(color = "#000000", font.size = 14)
)
),
fp_p = fp_par(text.align = "center", padding = 5))
# Print the flextable
flex_table
Region | mean_fst_by_region | Country | mean_fst_by_country | City | Abbreviation | mean_fst |
---|---|---|---|---|---|---|
Eastern Europe | 0.11 | Bulgaria | 0.09 | Lom | BUL | 0.09 |
Eastern Europe | 0.11 | Georgia | 0.12 | Sakhumi, Abkhazia | GES | 0.12 |
Eastern Europe | 0.11 | Romania | 0.10 | Satu Mare | ROS | 0.10 |
Eastern Europe | 0.11 | Russia | 0.12 | Sochi | SOC | 0.12 |
Eastern Europe | 0.11 | Serbia | 0.19 | Novi Sad | SER | 0.19 |
Eastern Europe | 0.11 | Turkey | 0.08 | Aliaga | TUA | 0.08 |
Eastern Europe | 0.11 | Turkey | 0.08 | Hopa | TUH | 0.07 |
Southern Europe | 0.08 | Albania | 0.11 | Durres | ALD | 0.11 |
Southern Europe | 0.08 | Croatia | 0.08 | Dubrovnik | CRO | 0.08 |
Southern Europe | 0.08 | Greece | 0.08 | Athens | GRA | 0.08 |
Southern Europe | 0.08 | Greece | 0.08 | Chania | GRC | 0.09 |
Southern Europe | 0.08 | Italy | 0.08 | Bologna | ITB | 0.10 |
Southern Europe | 0.08 | Italy | 0.08 | Puglia | ITP | 0.07 |
Southern Europe | 0.08 | Italy | 0.08 | Rome | ITR | 0.06 |
Southern Europe | 0.08 | Malta | 0.06 | Luqa | MAL | 0.06 |
Southern Europe | 0.08 | Portugal | 0.07 | Penafiel | POP | 0.07 |
Southern Europe | 0.08 | Slovenia | 0.06 | Ajdovscina | SLO | 0.06 |
Southern Europe | 0.08 | Spain | 0.10 | Badajoz | SPB | 0.09 |
Southern Europe | 0.08 | Spain | 0.10 | Barcelona | BAR | 0.11 |
Southern Europe | 0.08 | Spain | 0.10 | Catarroja | SPC | 0.10 |
Southern Europe | 0.08 | Spain | 0.10 | Magaluf | SPM | 0.08 |
Southern Europe | 0.08 | Spain | 0.10 | San Roque | SPS | 0.11 |
Western Europe | 0.07 | France | 0.07 | Saint-Martin-d'Heres | FRS | 0.07 |
Western Europe | 0.07 | France | 0.07 | Strasbourg | STS | 0.07 |
# Initialize Word document
doc <-
read_docx() |>
body_add_flextable(value = flex_table)
# Define the output path with 'here' library
output_path <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/fstTable_Europe_SNPs_MAF01_overlap.docx"
)
# Save the Word document
print(doc, target = output_path)
To make scatter plot
# Group by Country and calculate the mean for mean_fst_by_country
aggregated_data <- fst4 |>
dplyr::group_by(Country) |>
dplyr::summarise(mean_fst = mean(mean_fst_by_country, na.rm = TRUE))
# save the data
saveRDS(aggregated_data, here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/LD2_country_europe_MAF01_overlap.rds"
))
# Order the aggregated data
aggregated_data <- aggregated_data[order(aggregated_data$mean_fst), ]
# Assign a numeric index for plotting
aggregated_data$index <- 1:nrow(aggregated_data)
# Fit a linear model
lm_fit <- lm(mean_fst ~ index, data = aggregated_data)
# Predicted values from the linear model
aggregated_data$fitted_values <- predict(lm_fit)
ggplot(aggregated_data, aes(x = index, y = mean_fst)) +
geom_point(aes(color = Country), size = 3) +
geom_line(aes(y = fitted_values), color = "blue") + # Fitted line
labs(
title = "Mean Fst by Country",
x = "Ordered Countries",
y = "Mean Fst Value"
) +
scale_x_continuous(breaks = aggregated_data$index, labels = aggregated_data$Country) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(legend.position = "none")
Save it
ggsave(
filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/mean_fst_by_country_SNPs_MAF01_overlap.pdf"),
width = 10,
height = 10,
units = "in"
)
Estimate distances
# Grab the population names from the matrix aa
populations_with_fst <- colnames(aa)
# Subset the sampling_loc dataframe to only include populations with FST estimates
filtered_sampling_loc <- sampling_loc %>% filter(Abbreviation %in% populations_with_fst)
# Create an empty matrix to store the distances
n <- nrow(filtered_sampling_loc)
distance_matrix <- matrix(0, n, n)
rownames(distance_matrix) <- filtered_sampling_loc$Abbreviation
colnames(distance_matrix) <- filtered_sampling_loc$Abbreviation
# Calculate the distances
for (i in 1:n) {
for (j in 1:n) {
if (i != j) {
coord1 <- c(filtered_sampling_loc$Longitude[i], filtered_sampling_loc$Latitude[i])
coord2 <- c(filtered_sampling_loc$Longitude[j], filtered_sampling_loc$Latitude[j])
distance_matrix[i, j] <- distHaversine(coord1, coord2) / 1000 # distance in km
}
}
}
# Print the distance matrix
head(distance_matrix)
## FRS STS POP SPB SPS SPC BAR
## FRS 0.0000 412.1522 1225.4383 1263.7518 1371.4448 817.6702 511.8716
## STS 412.1522 0.0000 1509.1745 1601.1573 1750.4874 1213.5639 915.9614
## POP 1225.4383 1509.1745 0.0000 282.8409 614.5101 701.9619 878.2910
## SPB 1263.7518 1601.1573 282.8409 0.0000 331.7685 571.0509 827.0716
## SPS 1371.4448 1750.4874 614.5101 331.7685 0.0000 566.5153 874.4050
## SPC 817.6702 1213.5639 701.9619 571.0509 566.5153 0.0000 310.0074
## SPM ITB ITR ITP MAL SLO CRO ALD
## FRS 683.8845 448.0871 654.1062 1004.474 1269.904 628.7258 1037.845 1192.210
## STS 1095.6072 536.0256 834.5057 1098.126 1522.503 546.2343 1052.803 1230.239
## POP 939.4811 1644.8695 1734.4434 2102.566 2066.730 1851.8481 2197.100 2319.742
## SPB 822.8201 1643.6694 1683.0686 2040.301 1923.429 1869.2123 2157.626 2262.516
## SPS 787.3372 1690.3169 1669.1413 2002.327 1785.308 1929.6259 2144.409 2225.649
## SPC 251.7726 1124.6677 1124.2692 1474.434 1368.139 1363.2153 1600.981 1697.515
## SER GRA GRC ROS BUL TUA TUH SOC
## FRS 1100.2591 1709.174 1877.379 1341.427 1392.500 1884.818 2897.030 2688.509
## STS 988.6774 1763.684 1974.712 1124.852 1304.105 1885.544 2745.357 2511.584
## POP 2315.5228 2777.366 2876.039 2566.256 2591.385 3000.557 4103.736 3906.072
## SPB 2313.3538 2686.566 2760.206 2594.045 2569.099 2924.845 4076.402 3894.554
## SPS 2340.8167 2601.355 2642.916 2651.131 2568.416 2854.982 4053.790 3891.912
## SPC 1780.6744 2115.539 2193.176 2085.530 2019.617 2354.477 3518.971 3346.038
## GES
## FRS 2793.592
## STS 2619.862
## POP 4009.674
## SPB 3995.220
## SPS 3988.376
## SPC 3444.704
Compare distance and FST
# Fill lower triangle of 'aa' matrix
aa[lower.tri(aa)] <- t(aa)[lower.tri(aa)]
# Fill diagonal with 0 (or another value that makes sense in your context)
diag(aa) <- 0
# Combine 'aa' and 'distance_matrix'
data <- data.frame(Distance = as.vector(distance_matrix), FST = as.vector(aa))
# Add row and column indices for easier tracking
data$row_index <- rep(rownames(distance_matrix), each = ncol(distance_matrix))
data$col_index <- rep(colnames(distance_matrix), nrow(distance_matrix))
data <- data |>
dplyr::arrange(
Distance
)
head(data)
## Distance FST row_index col_index
## 1 0 0 FRS FRS
## 2 0 0 STS STS
## 3 0 0 POP POP
## 4 0 0 SPB SPB
## 5 0 0 SPS SPS
## 6 0 0 SPC SPC
Fit linear regression
data <- data[data$Distance > 0, ]
# Fit linear model
lm_model <- lm(FST/(1-FST) ~ log(Distance), data = data)
equation_text <- sprintf("y = %.6fx + %.3f", coef(lm_model)[2], coef(lm_model)[1])
r2_text <- sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
# source the plotting function
source(here("analyses", "my_theme2.R"))
# Plot
ggplot(data, aes(x = log(Distance), y = FST/(1-FST))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
annotate("text", x = max(log((data$Distance))) * 0.85, y = max(data$FST/(1-data$FST)) * 0.95, label = paste(equation_text, r2_text, sep = "\n"), size = 4, color = "black") +
labs(title = "FST vs Distance - All populations",
x = "Log(Distance)",
y = "FST(1-FST)") +
scale_x_continuous(labels = scales::comma) +
theme_classic() +
theme(axis.text.x = element_text(size = 14), # Increase font size for x-axis
axis.text.y = element_text(size = 14 )) # Increase font size for y-axi
## `geom_smooth()` using formula = 'y ~ x'
ggsave(
filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/fst_by_distance_SNPs_MAF01_overlap_with_equ.pdf"),
width = 6,
height = 4,
units = "in"
)
Without equation
data <- data[data$Distance > 0, ]
# Fit linear model
lm_model <- lm(FST/(1-FST) ~ log(Distance), data = data)
equation_text <- sprintf("y = %.6fx + %.3f", coef(lm_model)[2], coef(lm_model)[1])
r2_text <- sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
# source the plotting function
source(here("analyses", "my_theme2.R"))
# Plot
ggplot(data, aes(x = log(Distance), y = FST/(1-FST))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
# annotate("text", x = max(log((data$Distance))) * 0.85, y = max(data$FST/(1-data$FST)) * 0.95, label = paste(equation_text, #r2_text, sep = "\n"), size = 4, color = "black") +
labs(title = "FST vs Distance - All populations",
x = "Log(Distance)",
y = "FST(1-FST)") +
scale_x_continuous(labels = scales::comma) +
theme_classic() +
theme(axis.text.x = element_text(size = 14), # Increase font size for x-axis
axis.text.y = element_text(size = 14 )) # Increase font size for y-axi
## `geom_smooth()` using formula = 'y ~ x'
ggsave(
filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/fst_by_distance_SNPs_MAF01_overlap.pdf"),
width = 6,
height = 4,
units = "in"
)
We can merge the FST and distance matrices
# Ensure the matrices have the same names in the same order
common_names <- intersect(rownames(distance_matrix), rownames(aa))
sorted_names <- sort(common_names)
# Reorder the matrices
distance_matrix <- distance_matrix[sorted_names, sorted_names]
aa <- aa[sorted_names, sorted_names]
# Initialize the final merged matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names
colnames(merged_matrix) <- sorted_names
# Fill the upper triangular part from aa
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
# Fill the lower triangular part from distance_matrix
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]
# Format the matrix (Fst two decimals and distance in Km with zero decimals)
# Format the elements based on their position in the matrix
for(i in 1:nrow(merged_matrix)) {
for(j in 1:ncol(merged_matrix)) {
if (i < j) {
# Upper triangular - Fst values with two decimal places
merged_matrix[i, j] <- sprintf("%.2f", as.numeric(merged_matrix[i, j]))
} else if (i > j) {
# Lower triangular - Distance values with zero decimal places
merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
}
}
}
# Now the merged_matrix should be formatted as you need
print(merged_matrix)
## ALD BAR BUL CRO FRS GES GRA GRC ITB ITP
## ALD NA "0.12" "0.11" "0.05" "0.09" "0.14" "0.08" "0.07" "0.12" "0.08"
## BAR "1446" NA "0.11" "0.09" "0.09" "0.14" "0.09" "0.10" "0.13" "0.08"
## BUL "414" "1742" NA "0.09" "0.07" "0.13" "0.09" "0.09" "0.10" "0.06"
## CRO "180" "1333" "428" NA "0.06" "0.12" "0.06" "0.05" "0.09" "0.05"
## FRS "1192" "512" "1393" "1038" NA "0.11" "0.07" "0.07" "0.06" "0.04"
## GES "1770" "3168" "1426" "1845" "2794" NA "0.11" "0.12" "0.14" "0.11"
## GRA "534" "1900" "656" "711" "1709" "1540" NA "0.02" "0.10" "0.05"
## GRC "754" "2007" "925" "934" "1877" "1675" "270" NA "0.10" "0.06"
## ITB "752" "824" "950" "591" "448" "2365" "1279" "1467" NA "0.07"
## ITP "224" "1227" "604" "201" "1004" "1991" "705" "884" "583" NA
## ITR "585" "862" "899" "477" "654" "2321" "1070" "1224" "302" "371"
## MAL "746" "1233" "1157" "817" "1270" "2395" "873" "862" "996" "621"
## POP "2320" "878" "2591" "2197" "1225" "4010" "2777" "2876" "1645" "2103"
## ROS "771" "1783" "445" "683" "1341" "1496" "1100" "1370" "961" "884"
## SER "442" "1489" "316" "322" "1100" "1695" "884" "1141" "672" "520"
## SLO "689" "1059" "782" "509" "629" "2166" "1220" "1440" "245" "585"
## SOC "1678" "3068" "1327" "1748" "2689" "109" "1470" "1618" "2262" "1898"
## SPB "2263" "827" "2569" "2158" "1264" "3995" "2687" "2760" "1644" "2040"
## SPC "1698" "310" "2020" "1601" "818" "3445" "2116" "2193" "1125" "1474"
## SPM "1450" "211" "1782" "1360" "684" "3204" "1864" "1944" "916" "1227"
## SPS "2226" "874" "2568" "2144" "1371" "3988" "2601" "2643" "1690" "2002"
## STS "1230" "916" "1304" "1053" "412" "2620" "1764" "1975" "536" "1098"
## TUA "694" "2123" "641" "851" "1885" "1265" "277" "445" "1441" "901"
## TUH "1828" "3251" "1512" "1919" "2897" "194" "1544" "1650" "2461" "2051"
## ITR MAL POP ROS SER SLO SOC SPB SPC SPM
## ALD "0.08" "0.08" "0.09" "0.12" "0.22" "0.08" "0.14" "0.12" "0.13" "0.11"
## BAR "0.07" "0.08" "0.09" "0.12" "0.22" "0.08" "0.14" "0.12" "0.13" "0.11"
## BUL "0.05" "0.06" "0.07" "0.10" "0.20" "0.07" "0.13" "0.10" "0.10" "0.10"
## CRO "0.05" "0.06" "0.06" "0.10" "0.17" "0.06" "0.11" "0.09" "0.10" "0.09"
## FRS "0.04" "0.04" "0.04" "0.07" "0.16" "0.04" "0.10" "0.06" "0.07" "0.06"
## GES "0.10" "0.10" "0.10" "0.14" "0.23" "0.10" "0.01" "0.12" "0.13" "0.13"
## GRA "0.05" "0.05" "0.07" "0.10" "0.19" "0.05" "0.11" "0.09" "0.10" "0.09"
## GRC "0.06" "0.06" "0.07" "0.10" "0.19" "0.06" "0.12" "0.09" "0.11" "0.10"
## ITB "0.07" "0.06" "0.05" "0.10" "0.22" "0.06" "0.13" "0.08" "0.09" "0.09"
## ITP "0.02" "0.02" "0.04" "0.08" "0.17" "0.04" "0.10" "0.07" "0.08" "0.06"
## ITR NA "0.02" "0.04" "0.08" "0.16" "0.04" "0.10" "0.08" "0.09" "0.07"
## MAL "694" NA "0.03" "0.07" "0.16" "0.04" "0.09" "0.07" "0.07" "0.06"
## POP "1734" "2067" NA "0.07" "0.17" "0.04" "0.10" "0.05" "0.05" "0.05"
## ROS "1047" "1498" "2566" NA "0.20" "0.06" "0.13" "0.09" "0.10" "0.09"
## SER "697" "1139" "2316" "367" NA "0.15" "0.23" "0.20" "0.22" "0.21"
## SLO "455" "1118" "1852" "726" "476" NA "0.09" "0.05" "0.06" "0.05"
## SOC "2224" "2316" "3906" "1388" "1591" "2060" NA "0.12" "0.13" "0.12"
## SPB "1683" "1923" "283" "2594" "2313" "1869" "3895" NA "0.01" "0.01"
## SPC "1124" "1368" "702" "2086" "1781" "1363" "3346" "571" NA "0.03"
## SPM "883" "1128" "939" "1873" "1555" "1160" "3107" "823" "252" NA
## SPS "1669" "1785" "615" "2651" "2341" "1930" "3892" "332" "567" "787"
## STS "835" "1523" "1509" "1125" "989" "546" "2512" "1601" "1214" "1096"
## TUA "1271" "1148" "3001" "1057" "932" "1340" "1198" "2925" "2354" "2103"
## TUH "2395" "2412" "4104" "1629" "1797" "2272" "283" "4076" "3519" "3275"
## SPS STS TUA TUH
## ALD "0.14" "0.10" "0.10" "0.09"
## BAR "0.11" "0.09" "0.06" "0.04"
## BUL "0.12" "0.08" "0.08" "0.07"
## CRO "0.11" "0.06" "0.07" "0.07"
## FRS "0.10" "0.03" "0.07" "0.05"
## GES "0.16" "0.11" "0.12" "0.11"
## GRA "0.10" "0.07" "0.07" "0.06"
## GRC "0.11" "0.07" "0.08" "0.07"
## ITB "0.13" "0.07" "0.10" "0.08"
## ITP "0.09" "0.04" "0.06" "0.04"
## ITR "0.07" "0.04" "0.04" "0.04"
## MAL "0.07" "0.04" "0.05" "0.04"
## POP "0.09" "0.04" "0.06" "0.05"
## ROS "0.13" "0.08" "0.10" "0.08"
## SER "0.24" "0.18" "0.20" "0.17"
## SLO "0.09" "0.04" "0.06" "0.04"
## SOC "0.15" "0.11" "0.12" "0.10"
## SPB "0.13" "0.06" "0.10" "0.08"
## SPC "0.14" "0.07" "0.11" "0.08"
## SPM "0.08" "0.07" "0.09" "0.07"
## SPS NA "0.10" "0.03" "0.09"
## STS "1750" NA "0.07" "0.05"
## TUA "2855" "1886" NA "0.05"
## TUH "4054" "2745" "1267" NA
cities <- readRDS(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/sampling_loc_overlap.rds"))
cities <- as_tibble(cities)
head(cities)
## # A tibble: 6 × 10
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region Marker
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Saint-M… France 45.2 5.77 Europe FRS 2019 Weste… SNPs
## 2 Strasbo… France 48.6 7.75 Europe STS 2019 Weste… SNPs
## 3 Penafiel Portug… 41.2 -8.33 Europe POP 2017 South… SNPs
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South… SNPs
## 5 San Roq… Spain 36.2 -5.37 Europe SPS 2017 South… SNPs
## 6 Catarro… Spain 39.4 -0.396 Europe SPC 2017 South… SNPs
## # ℹ 1 more variable: order <dbl>
We can sort by distance
# Calculate row-wise mean distances (excluding diagonal)
row_means <- rowMeans(distance_matrix, na.rm=TRUE)
# Sort row names by mean distances
sorted_names_by_distance <- names(sort(row_means))
# Reorder distance_matrix and aa matrices based on these sorted names
distance_matrix <- distance_matrix[sorted_names_by_distance, sorted_names_by_distance]
aa <- aa[sorted_names_by_distance, sorted_names_by_distance]
# Your existing code to initialize and fill the merged_matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names_by_distance
colnames(merged_matrix) <- sorted_names_by_distance
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]
# Formatting code with absolute value for upper triangular part
for(i in 1:nrow(merged_matrix)) {
for(j in 1:ncol(merged_matrix)) {
if (i < j) {
merged_matrix[i, j] <- sprintf("%.2f", abs(as.numeric(merged_matrix[i, j])))
} else if (i > j) {
merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
}
}
}
# Print the merged matrix
print(merged_matrix)
## CRO ITP ITR ALD ITB SLO SER BUL FRS MAL
## CRO NA "0.05" "0.05" "0.05" "0.09" "0.06" "0.17" "0.09" "0.06" "0.06"
## ITP "201" NA "0.02" "0.08" "0.07" "0.04" "0.17" "0.06" "0.04" "0.02"
## ITR "477" "371" NA "0.08" "0.07" "0.04" "0.16" "0.05" "0.04" "0.02"
## ALD "180" "224" "585" NA "0.12" "0.08" "0.22" "0.11" "0.09" "0.08"
## ITB "591" "583" "302" "752" NA "0.06" "0.22" "0.10" "0.06" "0.06"
## SLO "509" "585" "455" "689" "245" NA "0.15" "0.07" "0.04" "0.04"
## SER "322" "520" "697" "442" "672" "476" NA "0.20" "0.16" "0.16"
## BUL "428" "604" "899" "414" "950" "782" "316" NA "0.07" "0.06"
## FRS "1038" "1004" "654" "1192" "448" "629" "1100" "1393" NA "0.04"
## MAL "817" "621" "694" "746" "996" "1118" "1139" "1157" "1270" NA
## ROS "683" "884" "1047" "771" "961" "726" "367" "445" "1341" "1498"
## GRA "711" "705" "1070" "534" "1279" "1220" "884" "656" "1709" "873"
## STS "1053" "1098" "835" "1230" "536" "546" "989" "1304" "412" "1523"
## BAR "1333" "1227" "862" "1446" "824" "1059" "1489" "1742" "512" "1233"
## SPM "1360" "1227" "883" "1450" "916" "1160" "1555" "1782" "684" "1128"
## TUA "851" "901" "1271" "694" "1441" "1340" "932" "641" "1885" "1148"
## GRC "934" "884" "1224" "754" "1467" "1440" "1141" "925" "1877" "862"
## SPC "1601" "1474" "1124" "1698" "1125" "1363" "1781" "2020" "818" "1368"
## SPB "2158" "2040" "1683" "2263" "1644" "1869" "2313" "2569" "1264" "1923"
## SPS "2144" "2002" "1669" "2226" "1690" "1930" "2341" "2568" "1371" "1785"
## POP "2197" "2103" "1734" "2320" "1645" "1852" "2316" "2591" "1225" "2067"
## SOC "1748" "1898" "2224" "1678" "2262" "2060" "1591" "1327" "2689" "2316"
## GES "1845" "1991" "2321" "1770" "2365" "2166" "1695" "1426" "2794" "2395"
## TUH "1919" "2051" "2395" "1828" "2461" "2272" "1797" "1512" "2897" "2412"
## ROS GRA STS BAR SPM TUA GRC SPC SPB SPS
## CRO "0.10" "0.06" "0.06" "0.09" "0.09" "0.07" "0.05" "0.10" "0.09" "0.11"
## ITP "0.08" "0.05" "0.04" "0.08" "0.06" "0.06" "0.06" "0.08" "0.07" "0.09"
## ITR "0.08" "0.05" "0.04" "0.07" "0.07" "0.04" "0.06" "0.09" "0.08" "0.07"
## ALD "0.12" "0.08" "0.10" "0.12" "0.11" "0.10" "0.07" "0.13" "0.12" "0.14"
## ITB "0.10" "0.10" "0.07" "0.13" "0.09" "0.10" "0.10" "0.09" "0.08" "0.13"
## SLO "0.06" "0.05" "0.04" "0.08" "0.05" "0.06" "0.06" "0.06" "0.05" "0.09"
## SER "0.20" "0.19" "0.18" "0.22" "0.21" "0.20" "0.19" "0.22" "0.20" "0.24"
## BUL "0.10" "0.09" "0.08" "0.11" "0.10" "0.08" "0.09" "0.10" "0.10" "0.12"
## FRS "0.07" "0.07" "0.03" "0.09" "0.06" "0.07" "0.07" "0.07" "0.06" "0.10"
## MAL "0.07" "0.05" "0.04" "0.08" "0.06" "0.05" "0.06" "0.07" "0.07" "0.07"
## ROS NA "0.10" "0.08" "0.12" "0.09" "0.10" "0.10" "0.10" "0.09" "0.13"
## GRA "1100" NA "0.07" "0.09" "0.09" "0.07" "0.02" "0.10" "0.09" "0.10"
## STS "1125" "1764" NA "0.09" "0.07" "0.07" "0.07" "0.07" "0.06" "0.10"
## BAR "1783" "1900" "916" NA "0.11" "0.06" "0.10" "0.13" "0.12" "0.11"
## SPM "1873" "1864" "1096" "211" NA "0.09" "0.10" "0.03" "0.01" "0.08"
## TUA "1057" "277" "1886" "2123" "2103" NA "0.08" "0.11" "0.10" "0.03"
## GRC "1370" "270" "1975" "2007" "1944" "445" NA "0.11" "0.09" "0.11"
## SPC "2086" "2116" "1214" "310" "252" "2354" "2193" NA "0.01" "0.14"
## SPB "2594" "2687" "1601" "827" "823" "2925" "2760" "571" NA "0.13"
## SPS "2651" "2601" "1750" "874" "787" "2855" "2643" "567" "332" NA
## POP "2566" "2777" "1509" "878" "939" "3001" "2876" "702" "283" "615"
## SOC "1388" "1470" "2512" "3068" "3107" "1198" "1618" "3346" "3895" "3892"
## GES "1496" "1540" "2620" "3168" "3204" "1265" "1675" "3445" "3995" "3988"
## TUH "1629" "1544" "2745" "3251" "3275" "1267" "1650" "3519" "4076" "4054"
## POP SOC GES TUH
## CRO "0.06" "0.11" "0.12" "0.07"
## ITP "0.04" "0.10" "0.11" "0.04"
## ITR "0.04" "0.10" "0.10" "0.04"
## ALD "0.09" "0.14" "0.14" "0.09"
## ITB "0.05" "0.13" "0.14" "0.08"
## SLO "0.04" "0.09" "0.10" "0.04"
## SER "0.17" "0.23" "0.23" "0.17"
## BUL "0.07" "0.13" "0.13" "0.07"
## FRS "0.04" "0.10" "0.11" "0.05"
## MAL "0.03" "0.09" "0.10" "0.04"
## ROS "0.07" "0.13" "0.14" "0.08"
## GRA "0.07" "0.11" "0.11" "0.06"
## STS "0.04" "0.11" "0.11" "0.05"
## BAR "0.09" "0.14" "0.14" "0.04"
## SPM "0.05" "0.12" "0.13" "0.07"
## TUA "0.06" "0.12" "0.12" "0.05"
## GRC "0.07" "0.12" "0.12" "0.07"
## SPC "0.05" "0.13" "0.13" "0.08"
## SPB "0.05" "0.12" "0.12" "0.08"
## SPS "0.09" "0.15" "0.16" "0.09"
## POP NA "0.10" "0.10" "0.05"
## SOC "3906" NA "0.01" "0.10"
## GES "4010" "109" NA "0.11"
## TUH "4104" "283" "194" NA
Make a table and save it as a word document
# Convert the matrix to a data frame and add a column with row names
merged_df <- as.data.frame(merged_matrix)
merged_df$Population <- rownames(merged_matrix)
# Reorder columns to have RowNames as the first column
merged_df <- merged_df[, c("Population", colnames(merged_matrix))]
merged_df1 <- as.data.frame(merged_df)
write.csv(merged_df1, "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/merged_df_MAF01_overlap.csv")
# Create a flextable object from the merged_matrix
ft <- qflextable(as.data.frame(merged_df))
ft
Population | CRO | ITP | ITR | ALD | ITB | SLO | SER | BUL | FRS | MAL | ROS | GRA | STS | BAR | SPM | TUA | GRC | SPC | SPB | SPS | POP | SOC | GES | TUH |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CRO | 0.05 | 0.05 | 0.05 | 0.09 | 0.06 | 0.17 | 0.09 | 0.06 | 0.06 | 0.10 | 0.06 | 0.06 | 0.09 | 0.09 | 0.07 | 0.05 | 0.10 | 0.09 | 0.11 | 0.06 | 0.11 | 0.12 | 0.07 | |
ITP | 201 | 0.02 | 0.08 | 0.07 | 0.04 | 0.17 | 0.06 | 0.04 | 0.02 | 0.08 | 0.05 | 0.04 | 0.08 | 0.06 | 0.06 | 0.06 | 0.08 | 0.07 | 0.09 | 0.04 | 0.10 | 0.11 | 0.04 | |
ITR | 477 | 371 | 0.08 | 0.07 | 0.04 | 0.16 | 0.05 | 0.04 | 0.02 | 0.08 | 0.05 | 0.04 | 0.07 | 0.07 | 0.04 | 0.06 | 0.09 | 0.08 | 0.07 | 0.04 | 0.10 | 0.10 | 0.04 | |
ALD | 180 | 224 | 585 | 0.12 | 0.08 | 0.22 | 0.11 | 0.09 | 0.08 | 0.12 | 0.08 | 0.10 | 0.12 | 0.11 | 0.10 | 0.07 | 0.13 | 0.12 | 0.14 | 0.09 | 0.14 | 0.14 | 0.09 | |
ITB | 591 | 583 | 302 | 752 | 0.06 | 0.22 | 0.10 | 0.06 | 0.06 | 0.10 | 0.10 | 0.07 | 0.13 | 0.09 | 0.10 | 0.10 | 0.09 | 0.08 | 0.13 | 0.05 | 0.13 | 0.14 | 0.08 | |
SLO | 509 | 585 | 455 | 689 | 245 | 0.15 | 0.07 | 0.04 | 0.04 | 0.06 | 0.05 | 0.04 | 0.08 | 0.05 | 0.06 | 0.06 | 0.06 | 0.05 | 0.09 | 0.04 | 0.09 | 0.10 | 0.04 | |
SER | 322 | 520 | 697 | 442 | 672 | 476 | 0.20 | 0.16 | 0.16 | 0.20 | 0.19 | 0.18 | 0.22 | 0.21 | 0.20 | 0.19 | 0.22 | 0.20 | 0.24 | 0.17 | 0.23 | 0.23 | 0.17 | |
BUL | 428 | 604 | 899 | 414 | 950 | 782 | 316 | 0.07 | 0.06 | 0.10 | 0.09 | 0.08 | 0.11 | 0.10 | 0.08 | 0.09 | 0.10 | 0.10 | 0.12 | 0.07 | 0.13 | 0.13 | 0.07 | |
FRS | 1038 | 1004 | 654 | 1192 | 448 | 629 | 1100 | 1393 | 0.04 | 0.07 | 0.07 | 0.03 | 0.09 | 0.06 | 0.07 | 0.07 | 0.07 | 0.06 | 0.10 | 0.04 | 0.10 | 0.11 | 0.05 | |
MAL | 817 | 621 | 694 | 746 | 996 | 1118 | 1139 | 1157 | 1270 | 0.07 | 0.05 | 0.04 | 0.08 | 0.06 | 0.05 | 0.06 | 0.07 | 0.07 | 0.07 | 0.03 | 0.09 | 0.10 | 0.04 | |
ROS | 683 | 884 | 1047 | 771 | 961 | 726 | 367 | 445 | 1341 | 1498 | 0.10 | 0.08 | 0.12 | 0.09 | 0.10 | 0.10 | 0.10 | 0.09 | 0.13 | 0.07 | 0.13 | 0.14 | 0.08 | |
GRA | 711 | 705 | 1070 | 534 | 1279 | 1220 | 884 | 656 | 1709 | 873 | 1100 | 0.07 | 0.09 | 0.09 | 0.07 | 0.02 | 0.10 | 0.09 | 0.10 | 0.07 | 0.11 | 0.11 | 0.06 | |
STS | 1053 | 1098 | 835 | 1230 | 536 | 546 | 989 | 1304 | 412 | 1523 | 1125 | 1764 | 0.09 | 0.07 | 0.07 | 0.07 | 0.07 | 0.06 | 0.10 | 0.04 | 0.11 | 0.11 | 0.05 | |
BAR | 1333 | 1227 | 862 | 1446 | 824 | 1059 | 1489 | 1742 | 512 | 1233 | 1783 | 1900 | 916 | 0.11 | 0.06 | 0.10 | 0.13 | 0.12 | 0.11 | 0.09 | 0.14 | 0.14 | 0.04 | |
SPM | 1360 | 1227 | 883 | 1450 | 916 | 1160 | 1555 | 1782 | 684 | 1128 | 1873 | 1864 | 1096 | 211 | 0.09 | 0.10 | 0.03 | 0.01 | 0.08 | 0.05 | 0.12 | 0.13 | 0.07 | |
TUA | 851 | 901 | 1271 | 694 | 1441 | 1340 | 932 | 641 | 1885 | 1148 | 1057 | 277 | 1886 | 2123 | 2103 | 0.08 | 0.11 | 0.10 | 0.03 | 0.06 | 0.12 | 0.12 | 0.05 | |
GRC | 934 | 884 | 1224 | 754 | 1467 | 1440 | 1141 | 925 | 1877 | 862 | 1370 | 270 | 1975 | 2007 | 1944 | 445 | 0.11 | 0.09 | 0.11 | 0.07 | 0.12 | 0.12 | 0.07 | |
SPC | 1601 | 1474 | 1124 | 1698 | 1125 | 1363 | 1781 | 2020 | 818 | 1368 | 2086 | 2116 | 1214 | 310 | 252 | 2354 | 2193 | 0.01 | 0.14 | 0.05 | 0.13 | 0.13 | 0.08 | |
SPB | 2158 | 2040 | 1683 | 2263 | 1644 | 1869 | 2313 | 2569 | 1264 | 1923 | 2594 | 2687 | 1601 | 827 | 823 | 2925 | 2760 | 571 | 0.13 | 0.05 | 0.12 | 0.12 | 0.08 | |
SPS | 2144 | 2002 | 1669 | 2226 | 1690 | 1930 | 2341 | 2568 | 1371 | 1785 | 2651 | 2601 | 1750 | 874 | 787 | 2855 | 2643 | 567 | 332 | 0.09 | 0.15 | 0.16 | 0.09 | |
POP | 2197 | 2103 | 1734 | 2320 | 1645 | 1852 | 2316 | 2591 | 1225 | 2067 | 2566 | 2777 | 1509 | 878 | 939 | 3001 | 2876 | 702 | 283 | 615 | 0.10 | 0.10 | 0.05 | |
SOC | 1748 | 1898 | 2224 | 1678 | 2262 | 2060 | 1591 | 1327 | 2689 | 2316 | 1388 | 1470 | 2512 | 3068 | 3107 | 1198 | 1618 | 3346 | 3895 | 3892 | 3906 | 0.01 | 0.10 | |
GES | 1845 | 1991 | 2321 | 1770 | 2365 | 2166 | 1695 | 1426 | 2794 | 2395 | 1496 | 1540 | 2620 | 3168 | 3204 | 1265 | 1675 | 3445 | 3995 | 3988 | 4010 | 109 | 0.11 | |
TUH | 1919 | 2051 | 2395 | 1828 | 2461 | 2272 | 1797 | 1512 | 2897 | 2412 | 1629 | 1544 | 2745 | 3251 | 3275 | 1267 | 1650 | 3519 | 4076 | 4054 | 4104 | 283 | 194 |
cd /gpfs/gibbs/pi/caccone/mkc54/albo/europe
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/fst/overlap/r2_0.01_b \
--keep-fam output/fst/pops_4bfst_overlap2.txt \
--make-bed \
--out output/fst/overlap/mantel_overlap \
--silent;
grep 'samples\|variants\|remaining' output/fst/overlap/mantel_overlap.log
20968 variants loaded from .bim file. –keep-fam: 242 people remaining. Total genotyping rate in remaining samples is 0.973943. 20968 variants and 242 people pass filters and QC.
Then convert to raw format
plink \
--allow-extra-chr \
--keep-allele-order \
--bfile output/fst/overlap/mantel_overlap \
--recodeA \
--out output/fst/overlap/mantel_overlap \
--silent;
grep 'samples\|variants\|remaining' output/fst/overlap/mantel_overlap.log
20968 variants loaded from .bim file. 20968 variants and 242 people pass filters and QC.
Import the data and covert it to genind format
# import the data
albo <-
read.PLINK(
here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/mantel_overlap.raw"),
quiet = FALSE,
chunkSize = 1000,
parallel = require("parallel"),
n.cores = 4
)
# convert to genind
albo2 <- gl2gi(albo, probar = TRUE, verbose = NULL)
Save
saveRDS(albo2, here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/albo2_overlap_MAF01.rds"
))
Load it
albo2 <- readRDS(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/albo2_overlap_MAF01.rds"
))
Import Sample locations
sampling_loc <- readRDS(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/sampling_loc_overlap.rds"))
head(sampling_loc)
## # A tibble: 6 × 10
## Pop_City Country Latitude Longitude Continent Abbreviation Year Region Marker
## <chr> <chr> <dbl> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Saint-M… France 45.2 5.77 Europe FRS 2019 Weste… SNPs
## 2 Strasbo… France 48.6 7.75 Europe STS 2019 Weste… SNPs
## 3 Penafiel Portug… 41.2 -8.33 Europe POP 2017 South… SNPs
## 4 Badajoz Spain 38.9 -6.97 Europe SPB 2018 South… SNPs
## 5 San Roq… Spain 36.2 -5.37 Europe SPS 2017 South… SNPs
## 6 Catarro… Spain 39.4 -0.396 Europe SPC 2017 South… SNPs
## # ℹ 1 more variable: order <dbl>
The fam file
fam_file <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/mantel_overlap.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
# # Replace the BEN a with BEN b (remember to never name samples with the same ID... I change it manually in the fam file.)
# fam_data <- fam_data %>%
# mutate(IndividualID = ifelse(FamilyID == "BEN" & IndividualID == "a", "b", IndividualID))
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 SOC 1065 0 0 0 -9
## 2 SOC 1066 0 0 0 -9
## 3 SOC 1067 0 0 0 -9
## 4 SOC 1068 0 0 0 -9
## 5 SOC 1069 0 0 0 -9
## 6 SOC 1070 0 0 0 -9
Merge
# Join with sampling_loc to get sampling localities
loc_albo <- fam_data |>
left_join(sampling_loc, by = c("FamilyID" = "Abbreviation"))
head(loc_albo)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype Pop_City Country
## 1 SOC 1065 0 0 0 -9 Sochi Russia
## 2 SOC 1066 0 0 0 -9 Sochi Russia
## 3 SOC 1067 0 0 0 -9 Sochi Russia
## 4 SOC 1068 0 0 0 -9 Sochi Russia
## 5 SOC 1069 0 0 0 -9 Sochi Russia
## 6 SOC 1070 0 0 0 -9 Sochi Russia
## Latitude Longitude Continent Year Region Marker order
## 1 43.60042 39.74533 Europe 2021 Eastern Europe SNPs 46
## 2 43.60042 39.74533 Europe 2021 Eastern Europe SNPs 46
## 3 43.60042 39.74533 Europe 2021 Eastern Europe SNPs 46
## 4 43.60042 39.74533 Europe 2021 Eastern Europe SNPs 46
## 5 43.60042 39.74533 Europe 2021 Eastern Europe SNPs 46
## 6 43.60042 39.74533 Europe 2021 Eastern Europe SNPs 46
Get the latitude and longitude
## [,1] [,2]
## [1,] 43.60042 39.74533
## [2,] 43.60042 39.74533
## [3,] 43.60042 39.74533
## [4,] 43.60042 39.74533
## [5,] 43.60042 39.74533
## [6,] 43.60042 39.74533
Add jitter
## x y
## [1,] 43.60075 39.75428
## [2,] 43.60325 39.75480
## [3,] 43.59532 39.75272
## [4,] 43.59759 39.74487
## [5,] 43.60104 39.73846
## [6,] 43.59365 39.75122
Add to object
Save
saveRDS(
albo2,
here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/albo2_SNP_overlap_MAF1.rds"
)
)
Convert the data
##
## Converting data from a genind to a genpop object...
##
## ...done.
Get 1 mosquito per population, it is just to get the geographical coordinates
unique_populations <- unique(albo2@pop)
selected_individuals <- integer(length(unique_populations))
for (i in seq_along(unique_populations)) {
inds_in_pop <- which(albo2@pop == unique_populations[i])
selected_individuals[i] <- sample(inds_in_pop, 1)
}
albo2_subset <- albo2[selected_individuals, ]
Mantel test
Dgen <- dist.genpop(toto,method=2)
Dgeo <- dist(albo2_subset$other$xy)
ibd <- mantel.randtest(Dgen,Dgeo)
ibd
## Monte-Carlo test
## Call: mantel.randtest(m1 = Dgen, m2 = Dgeo)
##
## Observation: -0.09970885
##
## Based on 999 replicates
## Simulated p-value: 0.754
## Alternative hypothesis: greater
##
## Std.Obs Expectation Variance
## -0.749669092 -0.003145698 0.016591424
Monte-Carlo test Call: mantel.randtest(m1 = Dgen, m2 = Dgeo)
Observation: -0.09965469
Based on 999 replicates Simulated p-value: 0.753 Alternative hypothesis: greater
Std.Obs Expectation Variance
-0.76056551 -0.00085997 0.01687307
Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/simIBD_SNPs_MAF01_overlap.pdf"))
plot(ibd)
dev.off()
plot(Dgeo, Dgen)
# A linear regression model (lm stands for "linear model") is fitted, with the genetic distances (Dgen) as the response variable and the geographic distances (Dgeo) as the predictor. The distances are transformed into vectors using as.vector because the dist function produces a matrix-like structure, but the linear regression function lm requires vectors.
dist_lm <- lm(as.vector(Dgen) ~ as.vector(Dgeo))
abline(dist_lm, col="red", lty=2)
Save Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/Genetic_v_Geog_distance_SNPs_MAF01_overlap.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
dev.off()
Save Plot without equation
# Plot it
# Start the PDF device
CairoPDF(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/Genetic_v_Geog_distance_SNPs_MAF01_overlap_noequ.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
#text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
dev.off()
Use library MASS for plot
library(MASS)
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
#myPal <-
# colorRampPalette(c("white", "blue", "gold", "orange", "red"))
myPal <-
colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
title("Isolation by distance")
Save plot
library(MASS)
CairoPDF(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/IDB_PlotFromMASS_density_SNPs_MAF01_overlap_purple.pdf"))
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
#myPal <-
# colorRampPalette(c("white", "blue", "gold", "orange", "red"))
myPal <-
colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()
Save plot with equation
png(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/IDB_PlotFromMASS_density_SNPs_MAF01_overlap_equation.png"),
width = 5,
height = 4,
units='in',
res = 300)
myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.4fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()