Load libraries

library(hierfstat)
library(here)
## here() starts at /gpfs/gibbs/pi/caccone/mkc54/albo/scripts/RMarkdowns
library(adegenet)
## Loading required package: ade4
## 
##    /// adegenet 2.1.10 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()
## 
## Attaching package: 'adegenet'
## The following objects are masked from 'package:hierfstat':
## 
##     Hs, read.fstat
library(pegas)
## Loading required package: ape
## 
## Attaching package: 'ape'
## The following objects are masked from 'package:hierfstat':
## 
##     pcoa, varcomp
## Registered S3 method overwritten by 'pegas':
##   method      from
##   print.amova ade4
## 
## Attaching package: 'pegas'
## The following object is masked from 'package:ape':
## 
##     mst
## The following object is masked from 'package:ade4':
## 
##     amova

Microsatellite pops overlapping with SNP dataset with at least 4 individuals

1. Load .gen file with microsat data in 24 overlapping pops

fst_pops <- read.genepop("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/for_fst_albo_microsat_overlap.gen", ncode=3L)
## 
##  Converting data from a Genepop .gen file to a genind object... 
## 
## 
## File description:  ARBOMONITOR_Aedes_albopictus                                           
## 
## ...done.
fst_pops
## /// GENIND OBJECT /////////
## 
##  // 637 individuals; 11 loci; 163 alleles; size: 486.7 Kb
## 
##  // Basic content
##    @tab:  637 x 163 matrix of allele counts
##    @loc.n.all: number of alleles per locus (range: 6-22)
##    @loc.fac: locus factor for the 163 columns of @tab
##    @all.names: list of allele names for each locus
##    @ploidy: ploidy of each individual  (range: 2-2)
##    @type:  codom
##    @call: read.genepop(file = "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/for_fst_albo_microsat_overlap.gen", 
##     ncode = 3L)
## 
##  // Optional content
##    @pop: population of each individual (group size range: 5-60)
pop(fst_pops)
##   [1] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##   [7] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [13] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [19] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [25] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [31] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [37] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [43] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [49] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [55] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [61] SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11
##  [67] SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 BUL-BULO34
##  [73] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
##  [79] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
##  [85] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
##  [91] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
##  [97] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 ROS-ROSM30 ROS-ROSM30
## [103] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [109] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [115] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [121] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [127] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 SOC-RUSO10 SOC-RUSO10
## [133] SOC-RUSO10 SOC-RUSO10 SOC-RUSO10 SOC-RUSO10 SOC-RUSO10 SOC-RUSO10
## [139] SOC-RUSO10 SOC-RUSO10 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [145] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [151] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [157] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [163] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [169] SER-SRNO30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [175] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [181] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [187] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [193] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [199] TUA-TRLG30 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [205] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [211] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [217] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [223] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [229] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [235] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [241] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [247] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [253] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 CRO-CRPL30
## [259] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [265] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [271] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [277] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [283] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 GRC-GRCC25
## [289] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [295] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [301] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [307] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [313] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRA-GRAN08 GRA-GRAN08
## [319] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [325] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [331] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [337] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [343] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 ITP-ITVL09 ITP-ITVL09
## [349] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [355] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [361] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [367] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [373] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITR-ITRO17 ITR-ITRO17
## [379] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [385] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [391] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [397] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [403] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITB-ITBO15 ITB-ITBO15
## [409] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [415] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [421] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [427] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [433] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 MAL-MTLU39 MAL-MTLU39
## [439] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [445] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [451] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [457] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [463] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [469] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [475] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [481] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [487] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [493] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10
## [499] SPS-ESUP10 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10
## [505] SPS-ESUP10 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20
## [511] SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20
## [517] SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20
## [523] SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19
## [529] SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19
## [535] SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19
## [541] SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 FRS-FRMH45 FRS-FRMH45
## [547] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [553] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [559] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [565] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [571] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 STS-FRST30 STS-FRST30
## [577] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [583] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [589] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [595] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [601] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 GES-ABSU05 GES-ABSU05
## [607] GES-ABSU05 GES-ABSU05 GES-ABSU05 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [613] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [619] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [625] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [631] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [637] BAR-ESBA28
## 24 Levels: POP-PTPN60 SPB-ESBD11 BUL-BULO34 ROS-ROSM30 ... BAR-ESBA28
Ds<-genet.dist(fst_pops[,-2],method="Ds") # Nei's standard genetic distances
Ds_1 <- as.matrix(Ds)
Ds_2 <- as.data.frame(Ds_1)
write.csv(Ds_2, file="/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/Nei_stats_fst_pops.csv")

WC84

WC84 <- genet.dist(fst_pops, method = "WC84")
WC84_1 <- as.matrix(WC84)
WC84_2 <- as.data.frame(WC84_1)
write.csv(WC84_2, file="/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/WC84_stats_fst_pops_original.csv")
sampling_loc <- read.csv("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/sampling_loc_euro_microsats_overlap.csv")
saveRDS(sampling_loc, here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/sampling_loc_euro_microsats_overlap.rds"))

Import sample locations

#sampling_loc <- readRDS("sampling_loc_euro_microsats.rds")
sampling_loc <- readRDS("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/sampling_loc_euro_microsats_overlap.rds")

# Arrange by region 
sampling_loc <- sampling_loc |>
  dplyr::arrange(
    order_microsat
  )

# Check it
head(sampling_loc)
##               Pop_City  Country Latitude Longitude Abbreviation          Region
## 1 Saint-Martin-d'Heres   France 45.16531  5.771806          FRS  Western Europe
## 2           Strasbourg   France 48.61124  7.754512          STS  Western Europe
## 3             Penafiel Portugal 41.18555 -8.329371          POP Southern Europe
## 4              Badajoz    Spain 38.86622 -6.974194          SPB Southern Europe
## 5            San Roque    Spain 36.17042 -5.371530          SPS Southern Europe
## 6            Catarroja    Spain 39.40294 -0.395514          SPC Southern Europe
##   order_microsat
## 1              1
## 2              2
## 3              3
## 4              6
## 5             10
## 6             33

1.1 Now lets plot the Fst data with ggplot

library(tidyverse)
all_fst_matrix <- read.csv("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/WC84_stats_fst_pops.csv", header=TRUE)
all_fst_matrix <- column_to_rownames(all_fst_matrix, var="X")
all_fst_matrix <- as.matrix(all_fst_matrix)

all_fst_matrix[upper.tri(all_fst_matrix)] <- t(all_fst_matrix)[upper.tri(t(all_fst_matrix))]
head(all_fst_matrix)
##            POP        SPB        BUL       ROS       SOC        SER        TUA
## POP 0.00000000 0.09862327 0.10475031 0.1184234 0.1638067 0.14477361 0.09545614
## SPB 0.09862327 0.00000000 0.11080396 0.1016153 0.1628891 0.12372018 0.12771807
## BUL 0.10475031 0.11080396 0.00000000 0.1324089 0.1359418 0.06656334 0.14296227
## ROS 0.11842345 0.10161533 0.13240895 0.0000000 0.1705913 0.13843205 0.10154021
## SOC 0.16380668 0.16288910 0.13594183 0.1705913 0.0000000 0.16237093 0.13764053
## SER 0.14477361 0.12372018 0.06656334 0.1384320 0.1623709 0.00000000 0.14496649
##            TUH        ALD        CRO        GRC        GRA       ITP        ITR
## POP 0.07122529 0.14835966 0.13989804 0.12628871 0.11239462 0.1225115 0.09741293
## SPB 0.11310079 0.13825518 0.12622455 0.12457941 0.08145219 0.1212394 0.06879304
## BUL 0.12891409 0.07227269 0.08677972 0.07301433 0.08114090 0.1267097 0.05136298
## ROS 0.08902702 0.12651578 0.13480471 0.13729104 0.13671367 0.1222302 0.09727574
## SOC 0.14855716 0.11023240 0.10621109 0.15220631 0.11982206 0.1387863 0.07051625
## SER 0.13990064 0.08496288 0.10286947 0.09753203 0.11969399 0.1374689 0.08499667
##            ITB        MAL        SLO        SPS        SPC        SPM       FRS
## POP 0.09180241 0.09223906 0.08429084 0.11753684 0.11391168 0.10270469 0.1280323
## SPB 0.07812187 0.06400219 0.07761426 0.06371956 0.07106287 0.09913016 0.1410927
## BUL 0.04183915 0.09172075 0.03155298 0.11023396 0.14331553 0.13185030 0.1548117
## ROS 0.11880947 0.11433652 0.09505474 0.16599584 0.12032735 0.10467044 0.1551246
## SOC 0.11329893 0.06737795 0.09631422 0.19991036 0.16950400 0.10919955 0.1457662
## SER 0.06043377 0.11921096 0.05199941 0.16260205 0.15912775 0.16573689 0.1696733
##            STS         GES        BAR
## POP 0.09155837  0.11811409 0.06470586
## SPB 0.08257750  0.15145547 0.02950121
## BUL 0.06398690  0.10369875 0.08961048
## ROS 0.10169035  0.17586549 0.07965325
## SOC 0.10336390 -0.01769868 0.14452586
## SER 0.07062897  0.15416050 0.11858534
isSymmetric(all_fst_matrix)
## [1] TRUE
poporder <- populations_in_order <- colnames(all_fst_matrix)
poporder
##  [1] "POP"  "POL"  "SPB"  "ESAC" "ESMO" "ESSV" "ESAL" "SPS"  "ESBM" "ESTM"
## [11] "ESBN" "ESMC" "ESAB" "ESBY" "ESNI" "ESNU" "ESAG" "ESAZ" "ESCN" "ESCT"
## [21] "ESMU" "ESPP" "ESLM" "ESCA" "ESAY" "SPC"  "ESBS" "ESBE" "ESTS" "ESBA"
## [31] "ESCP" "SPM"  "FRS"  "STS"  "ITB"  "ITRO" "SLO"  "MAL"  "ITP"  "CRO" 
## [41] "ALD"  "SER"  "ROTI" "RODE" "ROS"  "GRTA" "BUL"  "ROPL" "GRPA" "GRA" 
## [51] "GRC"  "GRKV" "ROBU" "TUA"  "TRGN" "ROCO" "TRIS" "SOC"  "TRTR" "RUPL"
## [61] "RUBE" "ABAL" "TRRI" "ABSU" "TUH"  "GEPO" "TRAR"

Add NA on the upper left side of the matrix.

all_fst_matrix[lower.tri(all_fst_matrix)] <- NA
library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
pairfst.long <- melt(all_fst_matrix)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
summary(pairfst.long)
##        X1             X2           value       
##  POP    :  67   POP    :  67   Min.   :0.0000  
##  POL    :  67   POL    :  67   1st Qu.:0.0998  
##  SPB    :  67   SPB    :  67   Median :0.1448  
##  ESAC   :  67   ESAC   :  67   Mean   :0.1567  
##  ESMO   :  67   ESMO   :  67   3rd Qu.:0.2013  
##  ESSV   :  67   ESSV   :  67   Max.   :0.5237  
##  (Other):4087   (Other):4087   NA's   :2211

Now we have to convert the matrix to a data frame to plot it with ggplot.

pairfst.f <- ggplot(pairfst.long, aes(X1, X2)) +
  geom_tile(aes(fill = value), colour = "white") +
  scale_fill_gradient(
    low = "white",
    high = "#71b6ff",
    name = "Fst",
    na.value = "white",
    limits = c(0, 0.5)
  ) +
  scale_x_discrete(position = "top") +
  theme_bw() +
  geom_text(aes(label = ifelse(
    is.na(value), "", formatC(value, digits = 2, format = "f")
  )), size = 4) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1),
    axis.title = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.text.y = element_text(hjust = 0)
  )
pairfst.f

Save it

ggsave(
  filename = "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_matrix_microsats_overlap_fst_pops.pdf",
  pairfst.f,
  width = 14,
  height = 16,
  units = "in"
)

1.2 Fst by country

all_fst_df <- read.csv("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/WC84_stats_fst_pops.csv", header=TRUE)
all_fst_df <- column_to_rownames(all_fst_df, var="X")
# Step 1: Map abbreviation to country
abbreviation_to_country <- sampling_loc %>% dplyr::select(Abbreviation, Country)
abbreviation_to_order <- sampling_loc %>% dplyr::select(Abbreviation, order_microsat)
# Step 2: Calculate mean Fst for each pair of countries

# Convert the matrix to a data frame and add row names as a new column
fst_df <- as.data.frame(as.matrix(all_fst_df))
fst_df$Abbreviation1 <- rownames(fst_df)

# Gather columns into rows
fst_long <- fst_df %>% gather(key = "Abbreviation2", value = "Fst", -Abbreviation1)

# Merge with country mapping
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation1", by.y = "Abbreviation")
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation2", by.y = "Abbreviation", suffixes = c("_1", "_2"))

# Calculate mean Fst for each pair of countries
fst_summary <- fst_long %>% 
  group_by(Country_1, Country_2) %>% 
  summarize(Mean_Fst = mean(Fst, na.rm = TRUE), .groups = 'drop') %>% 
  filter(Country_1 != Country_2)


#save the fst values as csv
#write.csv(fst_summary, "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_summary_by_country_overlap.csv")

fst_summary_ordered <- here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_summary_ordered_overlap.txt")
#fst_summary_ordered <- data.frame(fst_summary_ordered)
fst_summary_ordered <- read.table(fst_summary_ordered, header=TRUE)


# Convert summary back to a matrix form, avoiding the use of tibbles for row names
fst_matrix_summary <- as.data.frame(spread(fst_summary_ordered, key = Country_2, value = Mean_Fst))
rownames(fst_matrix_summary) <- fst_matrix_summary$Country_1
fst_matrix_summary <- fst_matrix_summary[, -1]
fst_matrix_summary <- as.matrix(fst_matrix_summary)

# Make the matrix symmetric by averaging the off-diagonal elements
symmetric_fst_matrix <- matrix(nrow = nrow(fst_matrix_summary), ncol = ncol(fst_matrix_summary))
rownames(symmetric_fst_matrix) <- rownames(fst_matrix_summary)
colnames(symmetric_fst_matrix) <- colnames(fst_matrix_summary)

for(i in 1:nrow(fst_matrix_summary)) {
  for(j in i:nrow(fst_matrix_summary)) {
    if (i == j) {
      symmetric_fst_matrix[i, j] <- fst_matrix_summary[i, j]
    } else {
      avg_value <- mean(c(fst_matrix_summary[i, j], fst_matrix_summary[j, i]), na.rm = TRUE)
      symmetric_fst_matrix[i, j] <- avg_value
      symmetric_fst_matrix[j, i] <- avg_value
    }
  }
}

# Check if the matrix is symmetric
# print(isSymmetric(symmetric_fst_matrix))

# Your symmetric Fst matrix by country is now in symmetric_fst_matrix
print(symmetric_fst_matrix)
##             Albania   Bulgaria    Croatia     France     Georgia     Greece
## Albania          NA 0.07227269 0.04057776 0.08948333  0.11554526 0.07145213
## Bulgaria 0.07227269         NA 0.08677972 0.10939929  0.10369875 0.07707761
## Croatia  0.04057776 0.08677972         NA 0.11680090  0.11847108 0.09048210
## France   0.08948333 0.10939929 0.11680090         NA  0.12628339 0.09849714
## Georgia  0.11554526 0.10369875 0.11847108 0.12628339          NA 0.10644565
## Greece   0.07145213 0.07707761 0.09048210 0.09849714  0.10644565         NA
## Italy    0.07959606 0.07330393 0.08446501 0.07190402  0.09876712 0.08135587
## Malta    0.08442652 0.09172075 0.09097150 0.05900789  0.07969581 0.08991645
## Portugal 0.14835966 0.10475031 0.13989804 0.10979533  0.11811409 0.11934166
## Romania  0.12651578 0.13240895 0.13480471 0.12840747  0.17586549 0.13700235
## Russia   0.11023240 0.13594183 0.10621109 0.12456507 -0.01769868 0.13601419
## Serbia   0.08496288 0.06656334 0.10286947 0.12015114  0.15416050 0.10861301
## Slovenia 0.05805019 0.03155298 0.08929249 0.06578723  0.08383251 0.06200544
## Spain    0.13352057 0.11716285 0.14045223 0.10985579  0.13839704 0.11945354
## Turkey   0.12247859 0.13593818 0.13067456 0.06216445  0.13858295 0.13092924
##               Italy      Malta   Portugal    Romania      Russia     Serbia
## Albania  0.07959606 0.08442652 0.14835966 0.12651578  0.11023240 0.08496288
## Bulgaria 0.07330393 0.09172075 0.10475031 0.13240895  0.13594183 0.06656334
## Croatia  0.08446501 0.09097150 0.13989804 0.13480471  0.10621109 0.10286947
## France   0.07190402 0.05900789 0.10979533 0.12840747  0.12456507 0.12015114
## Georgia  0.09876712 0.07969581 0.11811409 0.17586549 -0.01769868 0.15416050
## Greece   0.08135587 0.08991645 0.11934166 0.13700235  0.13601419 0.10861301
## Italy            NA 0.06182629 0.10390895 0.11277179  0.10753383 0.09429978
## Malta    0.06182629         NA 0.09223906 0.11433652  0.06737795 0.11921096
## Portugal 0.10390895 0.09223906         NA 0.11842345  0.16380668 0.14477361
## Romania  0.11277179 0.11433652 0.11842345         NA  0.17059132 0.13843205
## Russia   0.10753383 0.06737795 0.16380668 0.17059132          NA 0.16237093
## Serbia   0.09429978 0.11921096 0.14477361 0.13843205  0.16237093         NA
## Slovenia 0.05398682 0.04654500 0.08429084 0.09505474  0.09631422 0.05199941
## Spain    0.10144315 0.08366327 0.09949647 0.11445244  0.15720577 0.14595444
## Turkey   0.09068222 0.07784607 0.08334071 0.09528362  0.14309884 0.14243357
##            Slovenia      Spain     Turkey
## Albania  0.05805019 0.13352057 0.12247859
## Bulgaria 0.03155298 0.11716285 0.13593818
## Croatia  0.08929249 0.14045223 0.13067456
## France   0.06578723 0.10985579 0.06216445
## Georgia  0.08383251 0.13839704 0.13858295
## Greece   0.06200544 0.11945354 0.13092924
## Italy    0.05398682 0.10144315 0.09068222
## Malta    0.04654500 0.08366327 0.07784607
## Portugal 0.08429084 0.09949647 0.08334071
## Romania  0.09505474 0.11445244 0.09528362
## Russia   0.09631422 0.15720577 0.14309884
## Serbia   0.05199941 0.14595444 0.14243357
## Slovenia         NA 0.08402922 0.08021932
## Spain    0.08402922         NA 0.11995260
## Turkey   0.08021932 0.11995260         NA

1.2.1 Reorder

# Read the file
country_order <- here(
  "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/country_order.txt")
country_order <- read.table(country_order, 
                       header = FALSE,
                       col.names = c("country"))

order_countries <- as.vector(country_order$country)
order_countries
##  [1] "France"   "Portugal" "Spain"    "Italy"    "Malta"    "Slovenia"
##  [7] "Croatia"  "Albania"  "Greece"   "Serbia"   "Romania"  "Bulgaria"
## [13] "Turkey"   "Russia"   "Georgia"
#write.csv(symmetric_fst_matrix, file = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/symmetric_fst_matrix_overlap.csv"))

symmetric_fst_matrix_ordered <- read_csv(
 "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/symmetric_fst_matrix_overlap_ordered.csv")
## New names:
## Rows: 15 Columns: 16
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (15): France, Portugal, Spain, Italy, Malta, Slovenia, Croatia,
## Albania,...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
symmetric_fst_matrix_ordered<- as.data.frame(symmetric_fst_matrix_ordered) #convert to dataframe
rownames(symmetric_fst_matrix_ordered) <- symmetric_fst_matrix_ordered$...1 #use first column as rownames
symmetric_fst_matrix_ordered <- symmetric_fst_matrix_ordered[ -c(1) ] #remove 1st column
symmetric_fst_matrix_ordered <-as.matrix(symmetric_fst_matrix_ordered) #convert to matrix
#symmetric_fst_matrix_ordered <- symmetric_fst_matrix_ordered[poporder, poporder]
symmetric_fst_matrix_ordered[lower.tri(symmetric_fst_matrix_ordered)] <- NA
print(symmetric_fst_matrix_ordered)
##          France  Portugal     Spain      Italy      Malta   Slovenia    Croatia
## France       NA 0.1713433 0.1480517 0.07190402 0.05900789 0.06578723 0.11680090
## Portugal     NA        NA 0.2502794 0.18373532 0.15233714 0.16752995 0.19866296
## Spain        NA        NA        NA 0.14122821 0.11810125 0.12005804 0.16661100
## Italy        NA        NA        NA         NA 0.06182629 0.05398682 0.08446501
## Malta        NA        NA        NA         NA         NA 0.04654500 0.09097150
## Slovenia     NA        NA        NA         NA         NA         NA 0.08929249
## Croatia      NA        NA        NA         NA         NA         NA         NA
## Albania      NA        NA        NA         NA         NA         NA         NA
## Greece       NA        NA        NA         NA         NA         NA         NA
## Serbia       NA        NA        NA         NA         NA         NA         NA
## Romania      NA        NA        NA         NA         NA         NA         NA
## Bulgaria     NA        NA        NA         NA         NA         NA         NA
## Turkey       NA        NA        NA         NA         NA         NA         NA
## Russia       NA        NA        NA         NA         NA         NA         NA
## Georgia      NA        NA        NA         NA         NA         NA         NA
##             Albania     Greece     Serbia    Romania   Bulgaria     Turkey
## France   0.08948333 0.11071192 0.12015114 0.09519542 0.10939929 0.07589234
## Portugal 0.22023894 0.19774973 0.23593387 0.16308682 0.20139278 0.17242399
## Spain    0.16293541 0.16109165 0.19369906 0.14816269 0.15849566 0.15743687
## Italy    0.07959606 0.08330830 0.09429978 0.08201868 0.07330393 0.08942815
## Malta    0.08442652 0.09156903 0.11921096 0.07089738 0.09172075 0.07583201
## Slovenia 0.05805019 0.06468557 0.05199941 0.06463287 0.03155298 0.06273769
## Croatia  0.04057776 0.07447962 0.10286947 0.12454840 0.08677972 0.12830928
## Albania          NA 0.07039105 0.08496288 0.11371843 0.07227269 0.11864242
## Greece           NA         NA 0.11368631 0.09991009 0.07559310 0.11606726
## Serbia           NA         NA         NA 0.13128964 0.06656334 0.12686711
## Romania          NA         NA         NA         NA 0.09300663 0.09196546
## Bulgaria         NA         NA         NA         NA         NA 0.10974546
## Turkey           NA         NA         NA         NA         NA         NA
## Russia           NA         NA         NA         NA         NA         NA
## Georgia          NA         NA         NA         NA         NA         NA
##              Russia    Georgia
## France   0.12619748 0.12058407
## Portugal 0.22765544 0.23105780
## Spain    0.19406219 0.21018824
## Italy    0.12093466 0.12083105
## Malta    0.09170185 0.09195896
## Slovenia 0.11508794 0.09905471
## Croatia  0.13999132 0.15550494
## Albania  0.14151206 0.13212053
## Greece   0.13608300 0.13317193
## Serbia   0.18199287 0.18202927
## Romania  0.14682007 0.13190772
## Bulgaria 0.15604941 0.14253634
## Turkey   0.14398829 0.13028856
## Russia           NA 0.08060513
## Georgia          NA         NA

Now we have to convert the matrix to a data frame to plot it with ggplot.

pairfst.long2 <- melt(symmetric_fst_matrix_ordered)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
summary(pairfst.long2)
##         X1             X2          value        
##  France  : 15   France  : 15   Min.   :0.03155  
##  Portugal: 15   Portugal: 15   1st Qu.:0.08443  
##  Spain   : 15   Spain   : 15   Median :0.11680  
##  Italy   : 15   Italy   : 15   Mean   :0.12026  
##  Malta   : 15   Malta   : 15   3rd Qu.:0.14816  
##  Slovenia: 15   Slovenia: 15   Max.   :0.25028  
##  (Other) :135   (Other) :135   NA's   :120
pairfst.f2 <- ggplot(pairfst.long2, aes(X1, X2)) +
  geom_tile(aes(fill = value), colour = "white") +
  scale_fill_gradient(
    low = "white",
    high = "#71b6ff",
    name = "Fst",
    na.value = "white",
    limits = c(0, 0.5)
  ) +
  scale_x_discrete(position = "top") +
  theme_bw() +
  geom_text(aes(label = ifelse(
    is.na(value), "", formatC(value, digits = 2, format = "f")
  )), size = 5) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 0),
    axis.title = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.text.y = element_text(hjust = 1)
  )
pairfst.f2

ggsave(
  filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_matrix_microsats_by_country_overlap.pdf"),
  pairfst.f2, 
  width = 10,
  height = 9,
  units = "in"
)

Remove NAs and rename columns

# remove NAs
fst2 <-
  pairfst.long |>
  drop_na()

# rename columns
fst2 <-
  fst2 |>
  dplyr::rename(pop1 = 1,
         pop2 = 2,
         fst  = 3)


# Split the data into two data frames, one for pop1 and one for pop2
df_pop1 <- fst2 |>
  dplyr::select(pop = pop1, fst)
df_pop2 <- fst2 |>
  dplyr::select(pop = pop2, fst)

# Combine the two data frames
df_combined <- bind_rows(df_pop1, df_pop2)

# Calculate the mean fst for each population
mean_fst <- df_combined |>
  group_by(pop) |>
  summarise(mean_fst = mean(fst))

print(mean_fst)
## # A tibble: 67 × 2
##    pop   mean_fst
##    <fct>    <dbl>
##  1 POP      0.127
##  2 POL      0.294
##  3 SPB      0.137
##  4 ESAC     0.270
##  5 ESMO     0.283
##  6 ESSV     0.158
##  7 ESAL     0.216
##  8 SPS      0.157
##  9 ESBM     0.196
## 10 ESTM     0.123
## # ℹ 57 more rows

Merge

fst3 <-
  sampling_loc |>
  left_join(
    mean_fst,
    by = c("Abbreviation" = "pop")
  )

head(fst3)
##               Pop_City  Country Latitude Longitude Abbreviation          Region
## 1 Saint-Martin-d'Heres   France 45.16531  5.771806          FRS  Western Europe
## 2           Strasbourg   France 48.61124  7.754512          STS  Western Europe
## 3             Penafiel Portugal 41.18555 -8.329371          POP Southern Europe
## 4              Badajoz    Spain 38.86622 -6.974194          SPB Southern Europe
## 5            San Roque    Spain 36.17042 -5.371530          SPS Southern Europe
## 6            Catarroja    Spain 39.40294 -0.395514          SPC Southern Europe
##   order_microsat   mean_fst
## 1              1 0.13991299
## 2              2 0.09604862
## 3              3 0.12709704
## 4              6 0.13741891
## 5             10 0.15705816
## 6             33 0.15086062

Mean by region

# Group by Region and calculate the mean_fst by Region
region_means <- fst3 |>
  group_by(Region) |>
  summarize(mean_fst_by_region = round(mean(mean_fst, na.rm = TRUE), 2)) |>
  ungroup()  # Ungroup the data

# Add the mean_fst_by_region column to the fst3 tibble
fst3 <- fst3 |>
  left_join(region_means, by = "Region")

# Print the modified fst3 tibble
print(fst3)
##                  Pop_City  Country Latitude Longitude Abbreviation
## 1    Saint-Martin-d'Heres   France 45.16531  5.771806          FRS
## 2              Strasbourg   France 48.61124  7.754512          STS
## 3                Penafiel Portugal 41.18555 -8.329371          POP
## 4                 Badajoz    Spain 38.86622 -6.974194          SPB
## 5               San Roque    Spain 36.17042 -5.371530          SPS
## 6               Catarroja    Spain 39.40294 -0.395514          SPC
## 7                 Magaluf    Spain 39.50679  2.530729          SPM
## 8  Cornella del Llobregat    Spain 41.35351  2.096833          BAR
## 9                 Bologna    Italy 44.48478 11.366584          ITB
## 10                   Rome    Italy 41.90215 12.517399          ITR
## 11                 Puglia    Italy 41.12213 16.844107          ITP
## 12                   Luqa    Malta 35.86053 14.487028          MAL
## 13             Ajdovscina Slovenia 45.88715 13.770997          SLO
## 14              Dubrovnik  Croatia 42.60654 18.226612          CRO
## 15                 Durres  Albania 41.29704 19.503734          ALD
## 16                 Chania   Greece 35.51448 24.017960          GRC
## 17                 Athens   Greece 37.93719 23.946883          GRA
## 18               Novi Sad   Serbia 45.25887 19.818778          SER
## 19              Satu Mare  Romania 47.79147 22.890202          ROS
## 20                    Lom Bulgaria 43.80489 23.236340          BUL
## 21                 Aliaga   Turkey 38.76390 26.944800          TUA
## 22                   Hopa   Turkey 41.38760 41.437800          TUH
## 23                  Sochi   Russia 43.60042 39.745328          SOC
## 24      Sakhumi, Abkhazia  Georgia 43.07851 40.887588          GES
##             Region order_microsat   mean_fst mean_fst_by_region
## 1   Western Europe              1 0.13991299               0.12
## 2   Western Europe              2 0.09604862               0.12
## 3  Southern Europe              3 0.12709704               0.13
## 4  Southern Europe              6 0.13741891               0.13
## 5  Southern Europe             10 0.15705816               0.13
## 6  Southern Europe             33 0.15086062               0.13
## 7  Southern Europe             35 0.14201066               0.13
## 8  Southern Europe             42         NA               0.13
## 9  Southern Europe             51 0.11120606               0.13
## 10 Southern Europe             54         NA               0.13
## 11 Southern Europe             56 0.13237633               0.13
## 12 Southern Europe             57 0.09631054               0.13
## 13 Southern Europe             58 0.09265578               0.13
## 14 Southern Europe             59 0.13525714               0.13
## 15 Southern Europe             61 0.12900361               0.13
## 16 Southern Europe             63 0.14340327               0.13
## 17 Southern Europe             65 0.11737097               0.13
## 18  Eastern Europe             68 0.15541107               0.13
## 19  Eastern Europe             71 0.14552059               0.13
## 20  Eastern Europe             75 0.12566523               0.13
## 21  Eastern Europe             76 0.12285868               0.13
## 22  Eastern Europe             81 0.11307589               0.13
## 23  Eastern Europe             92 0.14701374               0.13
## 24  Eastern Europe             94         NA               0.13

Mean By country

# Group by Country and calculate the mean_fst by Country
country_means <- fst3 |>
  group_by(Country) |>
  summarize(mean_fst_by_country = round(mean(mean_fst, na.rm = TRUE), 2)) |>
  ungroup()  # Ungroup the data

# Add the mean_fst_by_country column to the fst3 tibble
fst3 <- fst3 |>
  left_join(country_means, by = "Country")

# Print the modified fst3 tibble
print(sampling_loc)
##                  Pop_City  Country Latitude Longitude Abbreviation
## 1    Saint-Martin-d'Heres   France 45.16531  5.771806          FRS
## 2              Strasbourg   France 48.61124  7.754512          STS
## 3                Penafiel Portugal 41.18555 -8.329371          POP
## 4                 Badajoz    Spain 38.86622 -6.974194          SPB
## 5               San Roque    Spain 36.17042 -5.371530          SPS
## 6               Catarroja    Spain 39.40294 -0.395514          SPC
## 7                 Magaluf    Spain 39.50679  2.530729          SPM
## 8  Cornella del Llobregat    Spain 41.35351  2.096833          BAR
## 9                 Bologna    Italy 44.48478 11.366584          ITB
## 10                   Rome    Italy 41.90215 12.517399          ITR
## 11                 Puglia    Italy 41.12213 16.844107          ITP
## 12                   Luqa    Malta 35.86053 14.487028          MAL
## 13             Ajdovscina Slovenia 45.88715 13.770997          SLO
## 14              Dubrovnik  Croatia 42.60654 18.226612          CRO
## 15                 Durres  Albania 41.29704 19.503734          ALD
## 16                 Chania   Greece 35.51448 24.017960          GRC
## 17                 Athens   Greece 37.93719 23.946883          GRA
## 18               Novi Sad   Serbia 45.25887 19.818778          SER
## 19              Satu Mare  Romania 47.79147 22.890202          ROS
## 20                    Lom Bulgaria 43.80489 23.236340          BUL
## 21                 Aliaga   Turkey 38.76390 26.944800          TUA
## 22                   Hopa   Turkey 41.38760 41.437800          TUH
## 23                  Sochi   Russia 43.60042 39.745328          SOC
## 24      Sakhumi, Abkhazia  Georgia 43.07851 40.887588          GES
##             Region order_microsat
## 1   Western Europe              1
## 2   Western Europe              2
## 3  Southern Europe              3
## 4  Southern Europe              6
## 5  Southern Europe             10
## 6  Southern Europe             33
## 7  Southern Europe             35
## 8  Southern Europe             42
## 9  Southern Europe             51
## 10 Southern Europe             54
## 11 Southern Europe             56
## 12 Southern Europe             57
## 13 Southern Europe             58
## 14 Southern Europe             59
## 15 Southern Europe             61
## 16 Southern Europe             63
## 17 Southern Europe             65
## 18  Eastern Europe             68
## 19  Eastern Europe             71
## 20  Eastern Europe             75
## 21  Eastern Europe             76
## 22  Eastern Europe             81
## 23  Eastern Europe             92
## 24  Eastern Europe             94
fst4 <- fst3 |>
  dplyr::select(
    Region, mean_fst_by_region, Country, mean_fst_by_country, Pop_City, Abbreviation, mean_fst,
  )

fst4 <- fst4 |>
  arrange(
    Region, Country, Pop_City
  )

# Round
fst4 <- fst4 |>
  mutate_if(is.numeric, ~ round(., 2))

head(fst4)
##           Region mean_fst_by_region  Country mean_fst_by_country
## 1 Eastern Europe               0.13 Bulgaria                0.13
## 2 Eastern Europe               0.13  Georgia                 NaN
## 3 Eastern Europe               0.13  Romania                0.15
## 4 Eastern Europe               0.13   Russia                0.15
## 5 Eastern Europe               0.13   Serbia                0.16
## 6 Eastern Europe               0.13   Turkey                0.12
##            Pop_City Abbreviation mean_fst
## 1               Lom          BUL     0.13
## 2 Sakhumi, Abkhazia          GES       NA
## 3         Satu Mare          ROS     0.15
## 4             Sochi          SOC     0.15
## 5          Novi Sad          SER     0.16
## 6            Aliaga          TUA     0.12
library(flextable)
library(officer)
# Set theme if you want to use something different from the previous table
set_flextable_defaults(
  font.family = "Arial",
  font.size = 9,
  big.mark = ",",
  theme_fun = "theme_zebra" # try the themes: theme_alafoli(), theme_apa(), theme_booktabs(), theme_box(), theme_tron_legacy(), theme_tron(), theme_vader(), theme_vanilla(), theme_zebra()
)

# Then create the flextable object
flex_table <- flextable(fst4) |>
  set_caption(caption = as_paragraph(
    as_chunk(
      "Table 1. Fst values for European microsatellite data",
      props = fp_text_default(color = "#000000", font.size = 14)
    )
  ),
  fp_p = fp_par(text.align = "center", padding = 5))

# Print the flextable
flex_table
Table 1. Fst values for European microsatellite data

Region

mean_fst_by_region

Country

mean_fst_by_country

Pop_City

Abbreviation

mean_fst

Eastern Europe

0.13

Bulgaria

0.13

Lom

BUL

0.13

Eastern Europe

0.13

Georgia

Sakhumi, Abkhazia

GES

Eastern Europe

0.13

Romania

0.15

Satu Mare

ROS

0.15

Eastern Europe

0.13

Russia

0.15

Sochi

SOC

0.15

Eastern Europe

0.13

Serbia

0.16

Novi Sad

SER

0.16

Eastern Europe

0.13

Turkey

0.12

Aliaga

TUA

0.12

Eastern Europe

0.13

Turkey

0.12

Hopa

TUH

0.11

Southern Europe

0.13

Albania

0.13

Durres

ALD

0.13

Southern Europe

0.13

Croatia

0.14

Dubrovnik

CRO

0.14

Southern Europe

0.13

Greece

0.13

Athens

GRA

0.12

Southern Europe

0.13

Greece

0.13

Chania

GRC

0.14

Southern Europe

0.13

Italy

0.12

Bologna

ITB

0.11

Southern Europe

0.13

Italy

0.12

Puglia

ITP

0.13

Southern Europe

0.13

Italy

0.12

Rome

ITR

Southern Europe

0.13

Malta

0.10

Luqa

MAL

0.10

Southern Europe

0.13

Portugal

0.13

Penafiel

POP

0.13

Southern Europe

0.13

Slovenia

0.09

Ajdovscina

SLO

0.09

Southern Europe

0.13

Spain

0.15

Badajoz

SPB

0.14

Southern Europe

0.13

Spain

0.15

Catarroja

SPC

0.15

Southern Europe

0.13

Spain

0.15

Cornella del Llobregat

BAR

Southern Europe

0.13

Spain

0.15

Magaluf

SPM

0.14

Southern Europe

0.13

Spain

0.15

San Roque

SPS

0.16

Western Europe

0.12

France

0.12

Saint-Martin-d'Heres

FRS

0.14

Western Europe

0.12

France

0.12

Strasbourg

STS

0.10

# Initialize Word document
doc <- 
  read_docx() |>
  body_add_flextable(value = flex_table)

# Define the output path with 'here' library
output_path <- here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_Europe_microsats_table.docx"
  )

# Save the Word document
print(doc, target = output_path)

To make scatter plot

# Group by Country and calculate the mean for mean_fst_by_country
aggregated_data <- fst4 |>
  dplyr::group_by(Country) |>
  dplyr::summarise(mean_fst = mean(mean_fst_by_country, na.rm = TRUE))

# Order the aggregated data
aggregated_data <- aggregated_data[order(aggregated_data$mean_fst), ]

# Assign a numeric index for plotting
aggregated_data$index <- 1:nrow(aggregated_data)

# Fit a linear model
lm_fit <- lm(mean_fst ~ index, data = aggregated_data)

# Predicted values from the linear model
aggregated_data$fitted_values <- predict(lm_fit)

ggplot(aggregated_data, aes(x = index, y = mean_fst)) +
  geom_point(aes(color = Country), size = 3) +
  geom_line(aes(y = fitted_values), color = "blue") +  # Fitted line
  labs(
    title = "Mean Fst by Country",
    x = "Ordered Countries",
    y = "Mean Fst Value"
  ) +
  scale_x_continuous(breaks = aggregated_data$index, labels = aggregated_data$Country) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme(legend.position = "none")

Save it

ggsave(
  filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/mean_fst_by_country_microsats_overlap.pdf"),
  width = 10,
  height = 6,
  units = "in"
)

1.3 Estimate distances

Estimate distances

library(geosphere)
# Grab the population names from the matrix aa
populations_with_fst <- colnames(all_fst_matrix)

# Subset the sampling_loc dataframe to only include populations with FST estimates
filtered_sampling_loc <- sampling_loc %>% filter(Abbreviation %in% populations_with_fst)

# Create an empty matrix to store the distances
n <- nrow(filtered_sampling_loc)
distance_matrix <- matrix(0, n, n)
rownames(distance_matrix) <- filtered_sampling_loc$Abbreviation
colnames(distance_matrix) <- filtered_sampling_loc$Abbreviation

# Calculate the distances
for (i in 1:n) {
  for (j in 1:n) {
    if (i != j) {
      coord1 <- c(filtered_sampling_loc$Longitude[i], filtered_sampling_loc$Latitude[i])
      coord2 <- c(filtered_sampling_loc$Longitude[j], filtered_sampling_loc$Latitude[j])
      distance_matrix[i, j] <- distHaversine(coord1, coord2) / 1000 # distance in km
    }
  }
}
# Print the distance matrix
head(distance_matrix)
##           FRS       STS       POP       SPB       SPS       SPC       SPM
## FRS    0.0000  412.1522 1225.4383 1263.7518 1371.4448  817.6702  683.8845
## STS  412.1522    0.0000 1509.1745 1601.1573 1750.4874 1213.5639 1095.6072
## POP 1225.4383 1509.1745    0.0000  282.8409  614.5101  701.9619  939.4811
## SPB 1263.7518 1601.1573  282.8409    0.0000  331.7685  571.0509  822.8201
## SPS 1371.4448 1750.4874  614.5101  331.7685    0.0000  566.5153  787.3372
## SPC  817.6702 1213.5639  701.9619  571.0509  566.5153    0.0000  251.7726
##           ITB      ITP      MAL       SLO      CRO      ALD      GRC      GRA
## FRS  448.0871 1004.474 1269.904  628.7258 1037.845 1192.210 1877.379 1709.174
## STS  536.0256 1098.126 1522.503  546.2343 1052.803 1230.239 1974.712 1763.684
## POP 1644.8695 2102.566 2066.730 1851.8481 2197.100 2319.742 2876.039 2777.366
## SPB 1643.6694 2040.301 1923.429 1869.2123 2157.626 2262.516 2760.206 2686.566
## SPS 1690.3169 2002.327 1785.308 1929.6259 2144.409 2225.649 2642.916 2601.355
## SPC 1124.6677 1474.434 1368.139 1363.2153 1600.981 1697.515 2193.176 2115.539
##           SER      ROS      BUL      TUA      TUH      SOC
## FRS 1100.2591 1341.427 1392.500 1884.818 2897.030 2688.509
## STS  988.6774 1124.852 1304.105 1885.544 2745.357 2511.584
## POP 2315.5228 2566.256 2591.385 3000.557 4103.736 3906.072
## SPB 2313.3538 2594.045 2569.099 2924.845 4076.402 3894.554
## SPS 2340.8167 2651.131 2568.416 2854.982 4053.790 3891.912
## SPC 1780.6744 2085.530 2019.617 2354.477 3518.971 3346.038
saveRDS(distance_matrix, here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/distance_matrix.rds"))

Load and check the Fst values

distance_matrix <- readRDS(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/distance_matrix.rds"))

LD2_df <- read_csv("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/LD2_df_01_overlap.csv")
## New names:
## Rows: 24 Columns: 25
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (23): SOC, GES, STS, ITR, GRC, BAR, BUL, CRO, GRA, ITB, MAL, SPM,
## TUA, T... lgl (1): SPS
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
head(LD2_df)
## # A tibble: 6 × 25
##   ...1      SOC    GES     STS     ITR    GRC   BAR   BUL   CRO   GRA   ITB
##   <chr>   <dbl>  <dbl>   <dbl>   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SOC   NA      NA     NA      NA      NA        NA    NA    NA    NA    NA
## 2 GES    0.0113 NA     NA      NA      NA        NA    NA    NA    NA    NA
## 3 STS    0.106   0.109 NA      NA      NA        NA    NA    NA    NA    NA
## 4 ITR    0.0955  0.101  0.0398 NA      NA        NA    NA    NA    NA    NA
## 5 GRC    0.117   0.122  0.0743  0.0571 NA        NA    NA    NA    NA    NA
## 6 BAR    0.136   0.143  0.0927  0.0655  0.101    NA    NA    NA    NA    NA
## # ℹ 14 more variables: MAL <dbl>, SPM <dbl>, TUA <dbl>, TUH <dbl>, ALD <dbl>,
## #   FRS <dbl>, ITP <dbl>, POP <dbl>, ROS <dbl>, SER <dbl>, SLO <dbl>,
## #   SPC <dbl>, SPB <dbl>, SPS <lgl>
LD2_df <- LD2_df %>% remove_rownames %>% column_to_rownames(var="...1")

Convert the data into a matrix.

aa <- as.matrix(LD2_df)
aa[upper.tri(aa)] <- t(aa)[upper.tri(t(aa))]
head(aa)
##            SOC        GES        STS        ITR        GRC        BAR
## SOC         NA 0.01126978 0.10591967 0.09551819 0.11671692 0.13602044
## GES 0.01126978         NA 0.10927914 0.10057375 0.12208159 0.14270495
## STS 0.10591967 0.10927914         NA 0.03982659 0.07434949 0.09268239
## ITR 0.09551819 0.10057375 0.03982659         NA 0.05710568 0.06549046
## GRC 0.11671692 0.12208159 0.07434949 0.05710568         NA 0.10094517
## BAR 0.13602044 0.14270495 0.09268239 0.06549046 0.10094517         NA
##            BUL        CRO        GRA        ITB        MAL        SPM
## SOC 0.12832355 0.11495017 0.10889177 0.13371682 0.09458982 0.12205750
## GES 0.13384104 0.12036716 0.11376844 0.13892193 0.10125102 0.12971049
## STS 0.07799253 0.06354859 0.07077473 0.07120783 0.03980952 0.06532393
## ITR 0.05392586 0.04949314 0.04798060 0.06838917 0.01702597 0.06977034
## GRC 0.09338818 0.05248230 0.02417956 0.09782363 0.05730552 0.09532555
## BAR 0.10885970 0.09393694 0.09225317 0.12946121 0.07823715 0.11048101
##            TUA        TUH        ALD        FRS        ITP        POP
## SOC 0.11826743 0.10435177 0.13932531 0.10279814 0.10070244 0.09884633
## GES 0.12401729 0.11137798 0.14452571 0.10779885 0.10598420 0.10461590
## STS 0.07272436 0.05322703 0.09733023 0.03009784 0.04109490 0.04320132
## ITR 0.03907520 0.03526920 0.08060398 0.03984728 0.02050277 0.03645268
## GRC 0.07973727 0.07020895 0.07388877 0.07086343 0.06244349 0.06910929
## BAR 0.06317696 0.04376254 0.12083140 0.08889532 0.08100898 0.08718046
##            ROS       SER        SLO        SPC        SPB        SPS
## SOC 0.13423764 0.2267217 0.09140753 0.12576249 0.11561471 0.15080720
## GES 0.13912010 0.2329000 0.09665973 0.13312607 0.12082033 0.15814054
## STS 0.07530389 0.1752962 0.04446887 0.06818588 0.06120598 0.09859755
## ITR 0.07758959 0.1595922 0.04133080 0.08683449 0.07820355 0.07085827
## GRC 0.10336915 0.1893113 0.05999989 0.10519477 0.09488816 0.10597149
## BAR 0.12469756 0.2177834 0.07855252 0.12926803 0.12167900 0.11394015

Order

order_pops <- as.vector(sampling_loc$Abbreviation)
order_pops
##  [1] "FRS" "STS" "POP" "SPB" "SPS" "SPC" "SPM" "BAR" "ITB" "ITR" "ITP" "MAL"
## [13] "SLO" "CRO" "ALD" "GRC" "GRA" "SER" "ROS" "BUL" "TUA" "TUH" "SOC" "GES"

Create vector with order of populations

# Extract the populations that appear in LD2_df
populations_in_LD2 <- colnames(LD2_df)

# Reorder the populations based on order_pops
poporder <- populations_in_LD2[populations_in_LD2 %in% order_pops]

#LD2_df[match(poporder, LD2_df$Abbreviation),] #this also doesn't reorder it

# Print the reordered populations
print(poporder)
##  [1] "SOC" "GES" "STS" "ITR" "GRC" "BAR" "BUL" "CRO" "GRA" "ITB" "MAL" "SPM"
## [13] "TUA" "TUH" "ALD" "FRS" "ITP" "POP" "ROS" "SER" "SLO" "SPC" "SPB" "SPS"

Lets check if the matrix is symmetric.

isSymmetric(aa)
## [1] TRUE

Order the matrix using poporder. We will also add NA on the upper left side of the matrix.

aa <- aa[order_pops, order_pops]
aa[lower.tri(aa)] <- NA

Now we have to convert the matrix to a data frame to plot it with ggplot.

pairfst.long <- melt(aa)
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
summary(pairfst.long)
##        X1            X2          value        
##  FRS    : 24   FRS    : 24   Min.   :0.00626  
##  STS    : 24   STS    : 24   1st Qu.:0.06090  
##  POP    : 24   POP    : 24   Median :0.08701  
##  SPB    : 24   SPB    : 24   Mean   :0.09085  
##  SPS    : 24   SPS    : 24   3rd Qu.:0.10948  
##  SPC    : 24   SPC    : 24   Max.   :0.23859  
##  (Other):432   (Other):432   NA's   :300

Compare distance and FST

# Fill lower triangle of 'aa' matrix
aa[lower.tri(aa)] <- t(aa)[lower.tri(aa)]

# Fill diagonal with 0 (or another value that makes sense in your context)
diag(aa) <- 0

# Combine 'aa' and 'distance_matrix'
data <- data.frame(Distance = as.vector(distance_matrix), FST = as.vector(aa))
#saveRDS(data, here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/data_aa.rds"))
#data <- readRDS(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/data_aa.rds"))

# Add row and column indices for easier tracking
data$row_index <- rep(rownames(distance_matrix), each = ncol(distance_matrix))
data$col_index <- rep(colnames(distance_matrix), nrow(distance_matrix))

data <- data |>
  dplyr::arrange(
    Distance
  )
head(data) 
##   Distance FST row_index col_index
## 1        0   0       FRS       FRS
## 2        0   0       STS       STS
## 3        0   0       POP       POP
## 4        0   0       SPB       SPB
## 5        0   0       SPS       SPS
## 6        0   0       SPC       SPC

Fit linear regression

data <- data[data$Distance > 0, ]

# Fit linear model
lm_model <- lm(FST/(1-FST) ~ log(Distance), data = data)
equation_text <- sprintf("y = %.6fx + %.3f", coef(lm_model)[2], coef(lm_model)[1])
r2_text <- sprintf("R^2 = %.2f", summary(lm_model)$r.squared)

# source the plotting function
source(here("analyses", "my_theme2.R"))


# Plot
ggplot(data, aes(x = log(Distance), y = FST/(1-FST))) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
 annotate("text", x = max(log((data$Distance))) * 0.85, y = max(data$FST/(1-data$FST)) * 0.95, label = paste(equation_text, r2_text, sep = "\n"), size = 4, color = "black") +
  labs(title = "FST vs Distance - All populations",
       x = "Log(Distance)",
       y = "FST(1-FST)") +
  scale_x_continuous(labels = scales::comma) + 
  theme_classic() +
  theme(axis.text.x = element_text(size = 14),  # Increase font size for x-axis
        axis.text.y = element_text(size = 14                               ))  # Increase font size for y-axi
## `geom_smooth()` using formula = 'y ~ x'

ggsave(
  filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_by_distance_microsats_overlap_with_equ.pdf"),
  width = 6,
  height = 4,
  units = "in"
)

Without equation

data <- data[data$Distance > 0, ]

# Fit linear model
lm_model <- lm(FST/(1-FST) ~ log(Distance), data = data)
equation_text <- sprintf("y = %.6fx + %.3f", coef(lm_model)[2], coef(lm_model)[1])
r2_text <- sprintf("R^2 = %.2f", summary(lm_model)$r.squared)

# source the plotting function
source(here("analyses", "my_theme2.R"))


# Plot
ggplot(data, aes(x = log(Distance), y = FST/(1-FST))) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
# annotate("text", x = max(log((data$Distance))) * 0.85, y = max(data$FST/(1-data$FST)) * 0.95, label = paste(equation_text, #r2_text, sep = "\n"), size = 4, color = "black") +
  labs(title = "FST vs Distance - All populations",
       x = "Log(Distance)",
       y = "FST(1-FST)") +
  scale_x_continuous(labels = scales::comma) + 
  theme_classic() +
  theme(axis.text.x = element_text(size = 14),  # Increase font size for x-axis
        axis.text.y = element_text(size = 14                               ))  # Increase font size for y-axi
## `geom_smooth()` using formula = 'y ~ x'

ggsave(
  filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_by_distance_microsats_overlap.pdf"),
  width = 6,
  height = 4,
  units = "in"
)

We can merge the FST and distance matrices

# Ensure the matrices have the same names in the same order
common_names <- intersect(rownames(distance_matrix), rownames(aa))
sorted_names <- sort(common_names)

# Reorder the matrices
distance_matrix <- distance_matrix[sorted_names, sorted_names]
aa <- aa[sorted_names, sorted_names]

# Initialize the final merged matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names
colnames(merged_matrix) <- sorted_names

# Fill the upper triangular part from aa
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]

# Fill the lower triangular part from distance_matrix
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]

# Format the matrix (Fst two decimals and distance in Km with zero decimals)
# Format the elements based on their position in the matrix
for(i in 1:nrow(merged_matrix)) {
  for(j in 1:ncol(merged_matrix)) {
    if (i < j) {
      # Upper triangular - Fst values with two decimal places
      merged_matrix[i, j] <- sprintf("%.2f", as.numeric(merged_matrix[i, j]))
    } else if (i > j) {
      # Lower triangular - Distance values with zero decimal places
      merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
    }
  }
}

# Now the merged_matrix should be formatted as you need
print(merged_matrix)
##     ALD    BAR    BUL    CRO    FRS    GES    GRA    GRC    ITB    ITP   
## ALD NA     "0.12" "0.11" "0.05" "0.09" "0.14" "0.08" "0.07" "0.12" "0.08"
## BAR "1453" NA     "0.11" "0.09" "0.09" "0.14" "0.09" "0.10" "0.13" "0.08"
## BUL "414"  "1749" NA     "0.09" "0.07" "0.13" "0.09" "0.09" "0.10" "0.06"
## CRO "180"  "1340" "428"  NA     "0.06" "0.12" "0.06" "0.05" "0.09" "0.05"
## FRS "1192" "518"  "1393" "1038" NA     "0.11" "0.07" "0.07" "0.06" "0.04"
## GES "1770" "3175" "1426" "1845" "2794" NA     "0.11" "0.12" "0.14" "0.11"
## GRA "534"  "1906" "656"  "711"  "1709" "1540" NA     "0.02" "0.10" "0.05"
## GRC "754"  "2012" "925"  "934"  "1877" "1675" "270"  NA     "0.10" "0.06"
## ITB "752"  "832"  "950"  "591"  "448"  "2365" "1279" "1467" NA     "0.07"
## ITP "224"  "1233" "604"  "201"  "1004" "1991" "705"  "884"  "583"  NA    
## ITR "585"  "869"  "899"  "477"  "654"  "2321" "1070" "1224" "302"  "371" 
## MAL "746"  "1237" "1157" "817"  "1270" "2395" "873"  "862"  "996"  "621" 
## POP "2320" "872"  "2591" "2197" "1225" "4010" "2777" "2876" "1645" "2103"
## ROS "771"  "1791" "445"  "683"  "1341" "1496" "1100" "1370" "961"  "884" 
## SER "442"  "1496" "316"  "322"  "1100" "1695" "884"  "1141" "672"  "520" 
## SLO "689"  "1066" "782"  "509"  "629"  "2166" "1220" "1440" "245"  "585" 
## SOC "1678" "3075" "1327" "1748" "2689" "109"  "1470" "1618" "2262" "1898"
## SPB "2263" "820"  "2569" "2158" "1264" "3995" "2687" "2760" "1644" "2040"
## SPC "1698" "303"  "2020" "1601" "818"  "3445" "2116" "2193" "1125" "1474"
## SPM "1450" "209"  "1782" "1360" "684"  "3204" "1864" "1944" "916"  "1227"
## SPS "2226" "867"  "2568" "2144" "1371" "3988" "2601" "2643" "1690" "2002"
## STS "1230" "922"  "1304" "1053" "412"  "2620" "1764" "1975" "536"  "1098"
## TUA "694"  "2129" "641"  "851"  "1885" "1265" "277"  "445"  "1441" "901" 
## TUH "1828" "3258" "1512" "1919" "2897" "194"  "1544" "1650" "2461" "2051"
##     ITR    MAL    POP    ROS    SER    SLO    SOC    SPB    SPC    SPM   
## ALD "0.08" "0.08" "0.09" "0.12" "0.22" "0.08" "0.14" "0.12" "0.13" "0.11"
## BAR "0.07" "0.08" "0.09" "0.12" "0.22" "0.08" "0.14" "0.12" "0.13" "0.11"
## BUL "0.05" "0.06" "0.07" "0.10" "0.20" "0.07" "0.13" "0.10" "0.10" "0.10"
## CRO "0.05" "0.06" "0.06" "0.10" "0.17" "0.06" "0.11" "0.09" "0.10" "0.09"
## FRS "0.04" "0.04" "0.04" "0.07" "0.16" "0.04" "0.10" "0.06" "0.07" "0.06"
## GES "0.10" "0.10" "0.10" "0.14" "0.23" "0.10" "0.01" "0.12" "0.13" "0.13"
## GRA "0.05" "0.05" "0.07" "0.10" "0.19" "0.05" "0.11" "0.09" "0.10" "0.09"
## GRC "0.06" "0.06" "0.07" "0.10" "0.19" "0.06" "0.12" "0.09" "0.11" "0.10"
## ITB "0.07" "0.06" "0.05" "0.10" "0.22" "0.06" "0.13" "0.08" "0.09" "0.09"
## ITP "0.02" "0.02" "0.04" "0.08" "0.17" "0.04" "0.10" "0.07" "0.08" "0.06"
## ITR NA     "0.02" "0.04" "0.08" "0.16" "0.04" "0.10" "0.08" "0.09" "0.07"
## MAL "694"  NA     "0.03" "0.07" "0.16" "0.04" "0.09" "0.07" "0.07" "0.06"
## POP "1734" "2067" NA     "0.07" "0.17" "0.04" "0.10" "0.05" "0.05" "0.05"
## ROS "1047" "1498" "2566" NA     "0.20" "0.06" "0.13" "0.09" "0.10" "0.09"
## SER "697"  "1139" "2316" "367"  NA     "0.15" "0.23" "0.20" "0.22" "0.21"
## SLO "455"  "1118" "1852" "726"  "476"  NA     "0.09" "0.05" "0.06" "0.05"
## SOC "2224" "2316" "3906" "1388" "1591" "2060" NA     "0.12" "0.13" "0.12"
## SPB "1683" "1923" "283"  "2594" "2313" "1869" "3895" NA     "0.01" "0.01"
## SPC "1124" "1368" "702"  "2086" "1781" "1363" "3346" "571"  NA     "0.03"
## SPM "883"  "1128" "939"  "1873" "1555" "1160" "3107" "823"  "252"  NA    
## SPS "1669" "1785" "615"  "2651" "2341" "1930" "3892" "332"  "567"  "787" 
## STS "835"  "1523" "1509" "1125" "989"  "546"  "2512" "1601" "1214" "1096"
## TUA "1271" "1148" "3001" "1057" "932"  "1340" "1198" "2925" "2354" "2103"
## TUH "2395" "2412" "4104" "1629" "1797" "2272" "283"  "4076" "3519" "3275"
##     SPS    STS    TUA    TUH   
## ALD "0.14" "0.10" "0.10" "0.09"
## BAR "0.11" "0.09" "0.06" "0.04"
## BUL "0.12" "0.08" "0.08" "0.07"
## CRO "0.11" "0.06" "0.07" "0.07"
## FRS "0.10" "0.03" "0.07" "0.05"
## GES "0.16" "0.11" "0.12" "0.11"
## GRA "0.10" "0.07" "0.07" "0.06"
## GRC "0.11" "0.07" "0.08" "0.07"
## ITB "0.13" "0.07" "0.10" "0.08"
## ITP "0.09" "0.04" "0.06" "0.04"
## ITR "0.07" "0.04" "0.04" "0.04"
## MAL "0.07" "0.04" "0.05" "0.04"
## POP "0.09" "0.04" "0.06" "0.05"
## ROS "0.13" "0.08" "0.10" "0.08"
## SER "0.24" "0.18" "0.20" "0.17"
## SLO "0.09" "0.04" "0.06" "0.04"
## SOC "0.15" "0.11" "0.12" "0.10"
## SPB "0.13" "0.06" "0.10" "0.08"
## SPC "0.14" "0.07" "0.11" "0.08"
## SPM "0.08" "0.07" "0.09" "0.07"
## SPS NA     "0.10" "0.03" "0.09"
## STS "1750" NA     "0.07" "0.05"
## TUA "2855" "1886" NA     "0.05"
## TUH "4054" "2745" "1267" NA
cities <- readRDS(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/sampling_loc_euro_microsats_overlap.rds"))
cities <- as_tibble(cities)
head(cities)
## # A tibble: 6 × 7
##   Pop_City         Country Latitude Longitude Abbreviation Region order_microsat
##   <chr>            <chr>      <dbl>     <dbl> <chr>        <chr>           <int>
## 1 Lom              Bulgar…     43.8      23.2 BUL          Easte…             75
## 2 Sakhumi, Abkhaz… Georgia     43.1      40.9 GES          Easte…             94
## 3 Satu Mare        Romania     47.8      22.9 ROS          Easte…             71
## 4 Sochi            Russia      43.6      39.7 SOC          Easte…             92
## 5 Novi Sad         Serbia      45.3      19.8 SER          Easte…             68
## 6 Aliaga           Turkey      38.8      26.9 TUA          Easte…             76

We can sort by distance

# Calculate row-wise mean distances (excluding diagonal)
row_means <- rowMeans(distance_matrix, na.rm=TRUE)

# Sort row names by mean distances
sorted_names_by_distance <- names(sort(row_means))

# Reorder distance_matrix and aa matrices based on these sorted names
distance_matrix <- distance_matrix[sorted_names_by_distance, sorted_names_by_distance]
aa <- aa[sorted_names_by_distance, sorted_names_by_distance]

# Your existing code to initialize and fill the merged_matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names_by_distance
colnames(merged_matrix) <- sorted_names_by_distance

merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]

# Formatting code with absolute value for upper triangular part
for(i in 1:nrow(merged_matrix)) {
  for(j in 1:ncol(merged_matrix)) {
    if (i < j) {
      merged_matrix[i, j] <- sprintf("%.2f", abs(as.numeric(merged_matrix[i, j])))
    } else if (i > j) {
      merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
    }
  }
}

# Print the merged matrix
print(merged_matrix)
##     CRO    ITP    ITR    ALD    ITB    SLO    SER    BUL    FRS    MAL   
## CRO NA     "0.05" "0.05" "0.05" "0.09" "0.06" "0.17" "0.09" "0.06" "0.06"
## ITP "201"  NA     "0.02" "0.08" "0.07" "0.04" "0.17" "0.06" "0.04" "0.02"
## ITR "477"  "371"  NA     "0.08" "0.07" "0.04" "0.16" "0.05" "0.04" "0.02"
## ALD "180"  "224"  "585"  NA     "0.12" "0.08" "0.22" "0.11" "0.09" "0.08"
## ITB "591"  "583"  "302"  "752"  NA     "0.06" "0.22" "0.10" "0.06" "0.06"
## SLO "509"  "585"  "455"  "689"  "245"  NA     "0.15" "0.07" "0.04" "0.04"
## SER "322"  "520"  "697"  "442"  "672"  "476"  NA     "0.20" "0.16" "0.16"
## BUL "428"  "604"  "899"  "414"  "950"  "782"  "316"  NA     "0.07" "0.06"
## FRS "1038" "1004" "654"  "1192" "448"  "629"  "1100" "1393" NA     "0.04"
## MAL "817"  "621"  "694"  "746"  "996"  "1118" "1139" "1157" "1270" NA    
## ROS "683"  "884"  "1047" "771"  "961"  "726"  "367"  "445"  "1341" "1498"
## GRA "711"  "705"  "1070" "534"  "1279" "1220" "884"  "656"  "1709" "873" 
## STS "1053" "1098" "835"  "1230" "536"  "546"  "989"  "1304" "412"  "1523"
## BAR "1340" "1233" "869"  "1453" "832"  "1066" "1496" "1749" "518"  "1237"
## SPM "1360" "1227" "883"  "1450" "916"  "1160" "1555" "1782" "684"  "1128"
## TUA "851"  "901"  "1271" "694"  "1441" "1340" "932"  "641"  "1885" "1148"
## GRC "934"  "884"  "1224" "754"  "1467" "1440" "1141" "925"  "1877" "862" 
## SPC "1601" "1474" "1124" "1698" "1125" "1363" "1781" "2020" "818"  "1368"
## SPB "2158" "2040" "1683" "2263" "1644" "1869" "2313" "2569" "1264" "1923"
## SPS "2144" "2002" "1669" "2226" "1690" "1930" "2341" "2568" "1371" "1785"
## POP "2197" "2103" "1734" "2320" "1645" "1852" "2316" "2591" "1225" "2067"
## SOC "1748" "1898" "2224" "1678" "2262" "2060" "1591" "1327" "2689" "2316"
## GES "1845" "1991" "2321" "1770" "2365" "2166" "1695" "1426" "2794" "2395"
## TUH "1919" "2051" "2395" "1828" "2461" "2272" "1797" "1512" "2897" "2412"
##     ROS    GRA    STS    BAR    SPM    TUA    GRC    SPC    SPB    SPS   
## CRO "0.10" "0.06" "0.06" "0.09" "0.09" "0.07" "0.05" "0.10" "0.09" "0.11"
## ITP "0.08" "0.05" "0.04" "0.08" "0.06" "0.06" "0.06" "0.08" "0.07" "0.09"
## ITR "0.08" "0.05" "0.04" "0.07" "0.07" "0.04" "0.06" "0.09" "0.08" "0.07"
## ALD "0.12" "0.08" "0.10" "0.12" "0.11" "0.10" "0.07" "0.13" "0.12" "0.14"
## ITB "0.10" "0.10" "0.07" "0.13" "0.09" "0.10" "0.10" "0.09" "0.08" "0.13"
## SLO "0.06" "0.05" "0.04" "0.08" "0.05" "0.06" "0.06" "0.06" "0.05" "0.09"
## SER "0.20" "0.19" "0.18" "0.22" "0.21" "0.20" "0.19" "0.22" "0.20" "0.24"
## BUL "0.10" "0.09" "0.08" "0.11" "0.10" "0.08" "0.09" "0.10" "0.10" "0.12"
## FRS "0.07" "0.07" "0.03" "0.09" "0.06" "0.07" "0.07" "0.07" "0.06" "0.10"
## MAL "0.07" "0.05" "0.04" "0.08" "0.06" "0.05" "0.06" "0.07" "0.07" "0.07"
## ROS NA     "0.10" "0.08" "0.12" "0.09" "0.10" "0.10" "0.10" "0.09" "0.13"
## GRA "1100" NA     "0.07" "0.09" "0.09" "0.07" "0.02" "0.10" "0.09" "0.10"
## STS "1125" "1764" NA     "0.09" "0.07" "0.07" "0.07" "0.07" "0.06" "0.10"
## BAR "1791" "1906" "922"  NA     "0.11" "0.06" "0.10" "0.13" "0.12" "0.11"
## SPM "1873" "1864" "1096" "209"  NA     "0.09" "0.10" "0.03" "0.01" "0.08"
## TUA "1057" "277"  "1886" "2129" "2103" NA     "0.08" "0.11" "0.10" "0.03"
## GRC "1370" "270"  "1975" "2012" "1944" "445"  NA     "0.11" "0.09" "0.11"
## SPC "2086" "2116" "1214" "303"  "252"  "2354" "2193" NA     "0.01" "0.14"
## SPB "2594" "2687" "1601" "820"  "823"  "2925" "2760" "571"  NA     "0.13"
## SPS "2651" "2601" "1750" "867"  "787"  "2855" "2643" "567"  "332"  NA    
## POP "2566" "2777" "1509" "872"  "939"  "3001" "2876" "702"  "283"  "615" 
## SOC "1388" "1470" "2512" "3075" "3107" "1198" "1618" "3346" "3895" "3892"
## GES "1496" "1540" "2620" "3175" "3204" "1265" "1675" "3445" "3995" "3988"
## TUH "1629" "1544" "2745" "3258" "3275" "1267" "1650" "3519" "4076" "4054"
##     POP    SOC    GES    TUH   
## CRO "0.06" "0.11" "0.12" "0.07"
## ITP "0.04" "0.10" "0.11" "0.04"
## ITR "0.04" "0.10" "0.10" "0.04"
## ALD "0.09" "0.14" "0.14" "0.09"
## ITB "0.05" "0.13" "0.14" "0.08"
## SLO "0.04" "0.09" "0.10" "0.04"
## SER "0.17" "0.23" "0.23" "0.17"
## BUL "0.07" "0.13" "0.13" "0.07"
## FRS "0.04" "0.10" "0.11" "0.05"
## MAL "0.03" "0.09" "0.10" "0.04"
## ROS "0.07" "0.13" "0.14" "0.08"
## GRA "0.07" "0.11" "0.11" "0.06"
## STS "0.04" "0.11" "0.11" "0.05"
## BAR "0.09" "0.14" "0.14" "0.04"
## SPM "0.05" "0.12" "0.13" "0.07"
## TUA "0.06" "0.12" "0.12" "0.05"
## GRC "0.07" "0.12" "0.12" "0.07"
## SPC "0.05" "0.13" "0.13" "0.08"
## SPB "0.05" "0.12" "0.12" "0.08"
## SPS "0.09" "0.15" "0.16" "0.09"
## POP NA     "0.10" "0.10" "0.05"
## SOC "3906" NA     "0.01" "0.10"
## GES "4010" "109"  NA     "0.11"
## TUH "4104" "283"  "194"  NA

Make a table and save it as a word document

# Convert the matrix to a data frame and add a column with row names
merged_df <- as.data.frame(merged_matrix)
merged_df$Population <- rownames(merged_matrix)

# Reorder columns to have RowNames as the first column
merged_df <- merged_df[, c("Population", colnames(merged_matrix))]

merged_df1 <- as.data.frame(merged_df)
write.csv(merged_df1, "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/merged_df_microsats_overlap.csv")

# Create a flextable object from the merged_matrix
ft <- qflextable(as.data.frame(merged_df)) 

ft

Population

CRO

ITP

ITR

ALD

ITB

SLO

SER

BUL

FRS

MAL

ROS

GRA

STS

BAR

SPM

TUA

GRC

SPC

SPB

SPS

POP

SOC

GES

TUH

CRO

0.05

0.05

0.05

0.09

0.06

0.17

0.09

0.06

0.06

0.10

0.06

0.06

0.09

0.09

0.07

0.05

0.10

0.09

0.11

0.06

0.11

0.12

0.07

ITP

201

0.02

0.08

0.07

0.04

0.17

0.06

0.04

0.02

0.08

0.05

0.04

0.08

0.06

0.06

0.06

0.08

0.07

0.09

0.04

0.10

0.11

0.04

ITR

477

371

0.08

0.07

0.04

0.16

0.05

0.04

0.02

0.08

0.05

0.04

0.07

0.07

0.04

0.06

0.09

0.08

0.07

0.04

0.10

0.10

0.04

ALD

180

224

585

0.12

0.08

0.22

0.11

0.09

0.08

0.12

0.08

0.10

0.12

0.11

0.10

0.07

0.13

0.12

0.14

0.09

0.14

0.14

0.09

ITB

591

583

302

752

0.06

0.22

0.10

0.06

0.06

0.10

0.10

0.07

0.13

0.09

0.10

0.10

0.09

0.08

0.13

0.05

0.13

0.14

0.08

SLO

509

585

455

689

245

0.15

0.07

0.04

0.04

0.06

0.05

0.04

0.08

0.05

0.06

0.06

0.06

0.05

0.09

0.04

0.09

0.10

0.04

SER

322

520

697

442

672

476

0.20

0.16

0.16

0.20

0.19

0.18

0.22

0.21

0.20

0.19

0.22

0.20

0.24

0.17

0.23

0.23

0.17

BUL

428

604

899

414

950

782

316

0.07

0.06

0.10

0.09

0.08

0.11

0.10

0.08

0.09

0.10

0.10

0.12

0.07

0.13

0.13

0.07

FRS

1038

1004

654

1192

448

629

1100

1393

0.04

0.07

0.07

0.03

0.09

0.06

0.07

0.07

0.07

0.06

0.10

0.04

0.10

0.11

0.05

MAL

817

621

694

746

996

1118

1139

1157

1270

0.07

0.05

0.04

0.08

0.06

0.05

0.06

0.07

0.07

0.07

0.03

0.09

0.10

0.04

ROS

683

884

1047

771

961

726

367

445

1341

1498

0.10

0.08

0.12

0.09

0.10

0.10

0.10

0.09

0.13

0.07

0.13

0.14

0.08

GRA

711

705

1070

534

1279

1220

884

656

1709

873

1100

0.07

0.09

0.09

0.07

0.02

0.10

0.09

0.10

0.07

0.11

0.11

0.06

STS

1053

1098

835

1230

536

546

989

1304

412

1523

1125

1764

0.09

0.07

0.07

0.07

0.07

0.06

0.10

0.04

0.11

0.11

0.05

BAR

1340

1233

869

1453

832

1066

1496

1749

518

1237

1791

1906

922

0.11

0.06

0.10

0.13

0.12

0.11

0.09

0.14

0.14

0.04

SPM

1360

1227

883

1450

916

1160

1555

1782

684

1128

1873

1864

1096

209

0.09

0.10

0.03

0.01

0.08

0.05

0.12

0.13

0.07

TUA

851

901

1271

694

1441

1340

932

641

1885

1148

1057

277

1886

2129

2103

0.08

0.11

0.10

0.03

0.06

0.12

0.12

0.05

GRC

934

884

1224

754

1467

1440

1141

925

1877

862

1370

270

1975

2012

1944

445

0.11

0.09

0.11

0.07

0.12

0.12

0.07

SPC

1601

1474

1124

1698

1125

1363

1781

2020

818

1368

2086

2116

1214

303

252

2354

2193

0.01

0.14

0.05

0.13

0.13

0.08

SPB

2158

2040

1683

2263

1644

1869

2313

2569

1264

1923

2594

2687

1601

820

823

2925

2760

571

0.13

0.05

0.12

0.12

0.08

SPS

2144

2002

1669

2226

1690

1930

2341

2568

1371

1785

2651

2601

1750

867

787

2855

2643

567

332

0.09

0.15

0.16

0.09

POP

2197

2103

1734

2320

1645

1852

2316

2591

1225

2067

2566

2777

1509

872

939

3001

2876

702

283

615

0.10

0.10

0.05

SOC

1748

1898

2224

1678

2262

2060

1591

1327

2689

2316

1388

1470

2512

3075

3107

1198

1618

3346

3895

3892

3906

0.01

0.10

GES

1845

1991

2321

1770

2365

2166

1695

1426

2794

2395

1496

1540

2620

3175

3204

1265

1675

3445

3995

3988

4010

109

0.11

TUH

1919

2051

2395

1828

2461

2272

1797

1512

2897

2412

1629

1544

2745

3258

3275

1267

1650

3519

4076

4054

4104

283

194

1.4 Mantel test with microsats overlapping with SNPs

fam_data <- read.csv("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/family_info_microsats.csv")

Merge

# Join with sampling_loc to get sampling localities
loc_albo <- fam_data |>
  left_join(sampling_loc, by = c("FamilyID" = "Abbreviation"))

head(loc_albo)
##   IndividualID FamilyID   Loc1   Loc2   Loc3   Loc4   Loc5   Loc6   Loc7   Loc8
## 1   POP-PTPN01      POP 180183 209222 166166 141184 256258 196205 223229 129137
## 2   POP-PTPN02      POP 180183 209209 177177 184184 256256 205205 219225 129140
## 3   POP-PTPN03      POP 180183 209216 165166 141193 256256 205205 221223 129137
## 4   POP-PTPN04      POP 183183 209222 166177 184193 256256 205205 221225 129129
## 5   POP-PTPN05      POP 180180 209209 169177 184184 258262 205205 221221 129129
## 6   POP-PTPN06      POP 180183 204209 171177 175184 252256 205205 221221 129137
##     Loc9  Loc10  Loc11 Pop_City  Country Latitude Longitude          Region
## 1 172175 209209 159171 Penafiel Portugal 41.18555 -8.329371 Southern Europe
## 2 175175 209209 176176 Penafiel Portugal 41.18555 -8.329371 Southern Europe
## 3 172175 206206 159161 Penafiel Portugal 41.18555 -8.329371 Southern Europe
## 4 175175 206206 161165 Penafiel Portugal 41.18555 -8.329371 Southern Europe
## 5 172175 206206 168168 Penafiel Portugal 41.18555 -8.329371 Southern Europe
## 6 175175 206206 161161 Penafiel Portugal 41.18555 -8.329371 Southern Europe
##   order_microsat
## 1              3
## 2              3
## 3              3
## 4              3
## 5              3
## 6              3

Load .gen file with microsat data from all individuals

all_pops <- read.genepop("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/for_fst_albo_microsat_overlap.gen", ncode=3L)
## 
##  Converting data from a Genepop .gen file to a genind object... 
## 
## 
## File description:  ARBOMONITOR_Aedes_albopictus                                           
## 
## ...done.
all_pops
## /// GENIND OBJECT /////////
## 
##  // 637 individuals; 11 loci; 163 alleles; size: 486.7 Kb
## 
##  // Basic content
##    @tab:  637 x 163 matrix of allele counts
##    @loc.n.all: number of alleles per locus (range: 6-22)
##    @loc.fac: locus factor for the 163 columns of @tab
##    @all.names: list of allele names for each locus
##    @ploidy: ploidy of each individual  (range: 2-2)
##    @type:  codom
##    @call: read.genepop(file = "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/for_fst_albo_microsat_overlap.gen", 
##     ncode = 3L)
## 
##  // Optional content
##    @pop: population of each individual (group size range: 5-60)
pop(all_pops)
##   [1] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##   [7] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [13] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [19] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [25] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [31] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [37] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [43] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [49] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [55] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
##  [61] SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11
##  [67] SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 BUL-BULO34
##  [73] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
##  [79] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
##  [85] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
##  [91] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
##  [97] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 ROS-ROSM30 ROS-ROSM30
## [103] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [109] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [115] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [121] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [127] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 SOC-RUSO10 SOC-RUSO10
## [133] SOC-RUSO10 SOC-RUSO10 SOC-RUSO10 SOC-RUSO10 SOC-RUSO10 SOC-RUSO10
## [139] SOC-RUSO10 SOC-RUSO10 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [145] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [151] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [157] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [163] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [169] SER-SRNO30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [175] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [181] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [187] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [193] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [199] TUA-TRLG30 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [205] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [211] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [217] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [223] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [229] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [235] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [241] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [247] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [253] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 CRO-CRPL30
## [259] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [265] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [271] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [277] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [283] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 GRC-GRCC25
## [289] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [295] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [301] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [307] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [313] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRA-GRAN08 GRA-GRAN08
## [319] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [325] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [331] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [337] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [343] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 ITP-ITVL09 ITP-ITVL09
## [349] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [355] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [361] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [367] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [373] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITR-ITRO17 ITR-ITRO17
## [379] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [385] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [391] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [397] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [403] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITB-ITBO15 ITB-ITBO15
## [409] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [415] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [421] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [427] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [433] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 MAL-MTLU39 MAL-MTLU39
## [439] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [445] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [451] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [457] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [463] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [469] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [475] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [481] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [487] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [493] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10
## [499] SPS-ESUP10 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10
## [505] SPS-ESUP10 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20
## [511] SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20
## [517] SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20
## [523] SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19
## [529] SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19
## [535] SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19
## [541] SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 FRS-FRMH45 FRS-FRMH45
## [547] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [553] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [559] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [565] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [571] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 STS-FRST30 STS-FRST30
## [577] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [583] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [589] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [595] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [601] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 GES-ABSU05 GES-ABSU05
## [607] GES-ABSU05 GES-ABSU05 GES-ABSU05 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [613] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [619] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [625] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [631] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [637] BAR-ESBA28
## 24 Levels: POP-PTPN60 SPB-ESBD11 BUL-BULO34 ROS-ROSM30 ... BAR-ESBA28

Get the latitude and longitude

albo_dist2 <- cbind(loc_albo$Latitude, loc_albo$Longitude)
head(albo_dist2)
##          [,1]      [,2]
## [1,] 41.18555 -8.329371
## [2,] 41.18555 -8.329371
## [3,] 41.18555 -8.329371
## [4,] 41.18555 -8.329371
## [5,] 41.18555 -8.329371
## [6,] 41.18555 -8.329371
colnames(albo_dist2)<- c("x","y") 

Add jitter

albo_dist2 <- jitter(albo_dist2, factor = 1, amount = NULL)
head(albo_dist2)
##             x         y
## [1,] 41.18050 -8.327255
## [2,] 41.18858 -8.333034
## [3,] 41.17833 -8.326968
## [4,] 41.17941 -8.335236
## [5,] 41.17853 -8.328774
## [6,] 41.18248 -8.330608

Add to object

all_pops$other$xy <- albo_dist2

Convert the data

toto <- genind2genpop(all_pops)
## 
##  Converting data from a genind to a genpop object... 
## 
## ...done.

Get 1 mosquito per population, it is just to get the geographical coordinates

unique_populations <- unique(all_pops@pop)
selected_individuals <- integer(length(unique_populations))
for (i in seq_along(unique_populations)) {
  inds_in_pop <- which(all_pops@pop == unique_populations[i])
  selected_individuals[i] <- sample(inds_in_pop, 1)
}
all_pops_subset <- all_pops[selected_individuals, ]

Mantel test

library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-2
Dgen <- dist.genpop(toto,method=2)
Dgeo <- dist(all_pops_subset$other$xy)

ibd <- mantel(
Dgen,
Dgeo,
method = "pearson",
permutations = 999,
strata = NULL,
na.rm = FALSE,
parallel = getOption("mc.cores")
)

Mantel statistic based on Pearson’s product-moment correlation

Call: mantel(xdis = Dgen, ydis = Dgeo, method = “pearson”, permutations = 999, strata = NULL, na.rm = FALSE, parallel = getOption(“mc.cores”))

Mantel statistic r: -0.008604 Significance: 0.517

Upper quantiles of permutations (null model): 90% 95% 97.5% 99% 0.148 0.191 0.231 0.276 Permutation: free Number of permutations: 999

plot(Dgeo, Dgen)
# A linear regression model (lm stands for "linear model") is fitted, with the genetic distances (Dgen) as the response variable and the geographic distances (Dgeo) as the predictor. The distances are transformed into vectors using as.vector because the dist function produces a matrix-like structure, but the linear regression function lm requires vectors.
dist_lm <- lm(as.vector(Dgen) ~ as.vector(Dgeo))
abline(dist_lm, col="red", lty=2)

Save Plot

# Plot it
# Start the PDF device
CairoPDF(here(
     "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/Genetic_v_Geog_distance_microsats_overlap.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)

# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared

# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)

text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)

dev.off()

Save Plot without equation

# Plot it
# Start the PDF device
CairoPDF(here(
      "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/Genetic_v_Geog_distance_microsats_overlap_noequ.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)

# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared

# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)

#text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)

dev.off()
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
#myPal <-
#  colorRampPalette(c("white", "blue", "gold", "orange", "red"))
myPal <-
  colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared

# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)

# Adding the equation and R^2 to the plot
text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)

title("Isolation by distance")

Save plot

library(MASS)
CairoPDF(here(
     "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/IDB_PlotFromMASS_density_microsats_overlap_purple.pdf"))
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
#myPal <-
  colorRampPalette(c("white", "blue", "gold", "orange", "red"))

myPal <-
 colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared

# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)

# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)

title("Isolation by distance")

dev.off()