Load libraries
## here() starts at /gpfs/gibbs/pi/caccone/mkc54/albo/scripts/RMarkdowns
## Loading required package: ade4
##
## /// adegenet 2.1.10 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature requests: adegenetIssues()
##
## Attaching package: 'adegenet'
## The following objects are masked from 'package:hierfstat':
##
## Hs, read.fstat
## Loading required package: ape
##
## Attaching package: 'ape'
## The following objects are masked from 'package:hierfstat':
##
## pcoa, varcomp
## Registered S3 method overwritten by 'pegas':
## method from
## print.amova ade4
##
## Attaching package: 'pegas'
## The following object is masked from 'package:ape':
##
## mst
## The following object is masked from 'package:ade4':
##
## amova
Microsatellite pops overlapping with SNP dataset with at least 4 individuals
fst_pops <- read.genepop("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/for_fst_albo_microsat_overlap.gen", ncode=3L)
##
## Converting data from a Genepop .gen file to a genind object...
##
##
## File description: ARBOMONITOR_Aedes_albopictus
##
## ...done.
## /// GENIND OBJECT /////////
##
## // 637 individuals; 11 loci; 163 alleles; size: 486.7 Kb
##
## // Basic content
## @tab: 637 x 163 matrix of allele counts
## @loc.n.all: number of alleles per locus (range: 6-22)
## @loc.fac: locus factor for the 163 columns of @tab
## @all.names: list of allele names for each locus
## @ploidy: ploidy of each individual (range: 2-2)
## @type: codom
## @call: read.genepop(file = "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/for_fst_albo_microsat_overlap.gen",
## ncode = 3L)
##
## // Optional content
## @pop: population of each individual (group size range: 5-60)
## [1] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [7] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [13] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [19] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [25] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [31] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [37] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [43] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [49] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [55] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [61] SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11
## [67] SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 BUL-BULO34
## [73] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
## [79] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
## [85] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
## [91] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
## [97] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 ROS-ROSM30 ROS-ROSM30
## [103] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [109] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [115] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [121] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [127] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 SOC-RUSO10 SOC-RUSO10
## [133] SOC-RUSO10 SOC-RUSO10 SOC-RUSO10 SOC-RUSO10 SOC-RUSO10 SOC-RUSO10
## [139] SOC-RUSO10 SOC-RUSO10 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [145] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [151] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [157] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [163] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [169] SER-SRNO30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [175] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [181] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [187] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [193] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [199] TUA-TRLG30 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [205] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [211] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [217] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [223] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [229] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [235] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [241] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [247] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [253] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 CRO-CRPL30
## [259] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [265] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [271] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [277] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [283] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 GRC-GRCC25
## [289] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [295] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [301] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [307] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [313] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRA-GRAN08 GRA-GRAN08
## [319] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [325] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [331] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [337] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [343] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 ITP-ITVL09 ITP-ITVL09
## [349] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [355] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [361] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [367] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [373] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITR-ITRO17 ITR-ITRO17
## [379] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [385] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [391] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [397] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [403] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITB-ITBO15 ITB-ITBO15
## [409] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [415] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [421] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [427] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [433] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 MAL-MTLU39 MAL-MTLU39
## [439] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [445] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [451] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [457] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [463] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [469] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [475] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [481] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [487] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [493] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10
## [499] SPS-ESUP10 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10
## [505] SPS-ESUP10 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20
## [511] SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20
## [517] SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20
## [523] SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19
## [529] SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19
## [535] SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19
## [541] SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 FRS-FRMH45 FRS-FRMH45
## [547] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [553] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [559] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [565] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [571] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 STS-FRST30 STS-FRST30
## [577] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [583] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [589] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [595] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [601] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 GES-ABSU05 GES-ABSU05
## [607] GES-ABSU05 GES-ABSU05 GES-ABSU05 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [613] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [619] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [625] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [631] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [637] BAR-ESBA28
## 24 Levels: POP-PTPN60 SPB-ESBD11 BUL-BULO34 ROS-ROSM30 ... BAR-ESBA28
write.csv(Ds_2, file="/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/Nei_stats_fst_pops.csv")
WC84
write.csv(WC84_2, file="/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/WC84_stats_fst_pops_original.csv")
sampling_loc <- read.csv("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/sampling_loc_euro_microsats_overlap.csv")
saveRDS(sampling_loc, here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/sampling_loc_euro_microsats_overlap.rds"))
Import sample locations
#sampling_loc <- readRDS("sampling_loc_euro_microsats.rds")
sampling_loc <- readRDS("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/sampling_loc_euro_microsats_overlap.rds")
# Arrange by region
sampling_loc <- sampling_loc |>
dplyr::arrange(
order_microsat
)
# Check it
head(sampling_loc)
## Pop_City Country Latitude Longitude Abbreviation Region
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 FRS Western Europe
## 2 Strasbourg France 48.61124 7.754512 STS Western Europe
## 3 Penafiel Portugal 41.18555 -8.329371 POP Southern Europe
## 4 Badajoz Spain 38.86622 -6.974194 SPB Southern Europe
## 5 San Roque Spain 36.17042 -5.371530 SPS Southern Europe
## 6 Catarroja Spain 39.40294 -0.395514 SPC Southern Europe
## order_microsat
## 1 1
## 2 2
## 3 3
## 4 6
## 5 10
## 6 33
library(tidyverse)
all_fst_matrix <- read.csv("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/WC84_stats_fst_pops.csv", header=TRUE)
all_fst_matrix <- column_to_rownames(all_fst_matrix, var="X")
all_fst_matrix <- as.matrix(all_fst_matrix)
all_fst_matrix[upper.tri(all_fst_matrix)] <- t(all_fst_matrix)[upper.tri(t(all_fst_matrix))]
head(all_fst_matrix)
## POP SPB BUL ROS SOC SER TUA
## POP 0.00000000 0.09862327 0.10475031 0.1184234 0.1638067 0.14477361 0.09545614
## SPB 0.09862327 0.00000000 0.11080396 0.1016153 0.1628891 0.12372018 0.12771807
## BUL 0.10475031 0.11080396 0.00000000 0.1324089 0.1359418 0.06656334 0.14296227
## ROS 0.11842345 0.10161533 0.13240895 0.0000000 0.1705913 0.13843205 0.10154021
## SOC 0.16380668 0.16288910 0.13594183 0.1705913 0.0000000 0.16237093 0.13764053
## SER 0.14477361 0.12372018 0.06656334 0.1384320 0.1623709 0.00000000 0.14496649
## TUH ALD CRO GRC GRA ITP ITR
## POP 0.07122529 0.14835966 0.13989804 0.12628871 0.11239462 0.1225115 0.09741293
## SPB 0.11310079 0.13825518 0.12622455 0.12457941 0.08145219 0.1212394 0.06879304
## BUL 0.12891409 0.07227269 0.08677972 0.07301433 0.08114090 0.1267097 0.05136298
## ROS 0.08902702 0.12651578 0.13480471 0.13729104 0.13671367 0.1222302 0.09727574
## SOC 0.14855716 0.11023240 0.10621109 0.15220631 0.11982206 0.1387863 0.07051625
## SER 0.13990064 0.08496288 0.10286947 0.09753203 0.11969399 0.1374689 0.08499667
## ITB MAL SLO SPS SPC SPM FRS
## POP 0.09180241 0.09223906 0.08429084 0.11753684 0.11391168 0.10270469 0.1280323
## SPB 0.07812187 0.06400219 0.07761426 0.06371956 0.07106287 0.09913016 0.1410927
## BUL 0.04183915 0.09172075 0.03155298 0.11023396 0.14331553 0.13185030 0.1548117
## ROS 0.11880947 0.11433652 0.09505474 0.16599584 0.12032735 0.10467044 0.1551246
## SOC 0.11329893 0.06737795 0.09631422 0.19991036 0.16950400 0.10919955 0.1457662
## SER 0.06043377 0.11921096 0.05199941 0.16260205 0.15912775 0.16573689 0.1696733
## STS GES BAR
## POP 0.09155837 0.11811409 0.06470586
## SPB 0.08257750 0.15145547 0.02950121
## BUL 0.06398690 0.10369875 0.08961048
## ROS 0.10169035 0.17586549 0.07965325
## SOC 0.10336390 -0.01769868 0.14452586
## SER 0.07062897 0.15416050 0.11858534
## [1] TRUE
## [1] "POP" "POL" "SPB" "ESAC" "ESMO" "ESSV" "ESAL" "SPS" "ESBM" "ESTM"
## [11] "ESBN" "ESMC" "ESAB" "ESBY" "ESNI" "ESNU" "ESAG" "ESAZ" "ESCN" "ESCT"
## [21] "ESMU" "ESPP" "ESLM" "ESCA" "ESAY" "SPC" "ESBS" "ESBE" "ESTS" "ESBA"
## [31] "ESCP" "SPM" "FRS" "STS" "ITB" "ITRO" "SLO" "MAL" "ITP" "CRO"
## [41] "ALD" "SER" "ROTI" "RODE" "ROS" "GRTA" "BUL" "ROPL" "GRPA" "GRA"
## [51] "GRC" "GRKV" "ROBU" "TUA" "TRGN" "ROCO" "TRIS" "SOC" "TRTR" "RUPL"
## [61] "RUBE" "ABAL" "TRRI" "ABSU" "TUH" "GEPO" "TRAR"
Add NA on the upper left side of the matrix.
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## X1 X2 value
## POP : 67 POP : 67 Min. :0.0000
## POL : 67 POL : 67 1st Qu.:0.0998
## SPB : 67 SPB : 67 Median :0.1448
## ESAC : 67 ESAC : 67 Mean :0.1567
## ESMO : 67 ESMO : 67 3rd Qu.:0.2013
## ESSV : 67 ESSV : 67 Max. :0.5237
## (Other):4087 (Other):4087 NA's :2211
Now we have to convert the matrix to a data frame to plot it with ggplot.
pairfst.f <- ggplot(pairfst.long, aes(X1, X2)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(
low = "white",
high = "#71b6ff",
name = "Fst",
na.value = "white",
limits = c(0, 0.5)
) +
scale_x_discrete(position = "top") +
theme_bw() +
geom_text(aes(label = ifelse(
is.na(value), "", formatC(value, digits = 2, format = "f")
)), size = 4) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.y = element_text(hjust = 0)
)
pairfst.f
Save it
all_fst_df <- read.csv("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/WC84_stats_fst_pops.csv", header=TRUE)
all_fst_df <- column_to_rownames(all_fst_df, var="X")
# Step 1: Map abbreviation to country
abbreviation_to_country <- sampling_loc %>% dplyr::select(Abbreviation, Country)
abbreviation_to_order <- sampling_loc %>% dplyr::select(Abbreviation, order_microsat)
# Step 2: Calculate mean Fst for each pair of countries
# Convert the matrix to a data frame and add row names as a new column
fst_df <- as.data.frame(as.matrix(all_fst_df))
fst_df$Abbreviation1 <- rownames(fst_df)
# Gather columns into rows
fst_long <- fst_df %>% gather(key = "Abbreviation2", value = "Fst", -Abbreviation1)
# Merge with country mapping
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation1", by.y = "Abbreviation")
fst_long <- merge(fst_long, abbreviation_to_country, by.x = "Abbreviation2", by.y = "Abbreviation", suffixes = c("_1", "_2"))
# Calculate mean Fst for each pair of countries
fst_summary <- fst_long %>%
group_by(Country_1, Country_2) %>%
summarize(Mean_Fst = mean(Fst, na.rm = TRUE), .groups = 'drop') %>%
filter(Country_1 != Country_2)
#save the fst values as csv
#write.csv(fst_summary, "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_summary_by_country_overlap.csv")
fst_summary_ordered <- here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_summary_ordered_overlap.txt")
#fst_summary_ordered <- data.frame(fst_summary_ordered)
fst_summary_ordered <- read.table(fst_summary_ordered, header=TRUE)
# Convert summary back to a matrix form, avoiding the use of tibbles for row names
fst_matrix_summary <- as.data.frame(spread(fst_summary_ordered, key = Country_2, value = Mean_Fst))
rownames(fst_matrix_summary) <- fst_matrix_summary$Country_1
fst_matrix_summary <- fst_matrix_summary[, -1]
fst_matrix_summary <- as.matrix(fst_matrix_summary)
# Make the matrix symmetric by averaging the off-diagonal elements
symmetric_fst_matrix <- matrix(nrow = nrow(fst_matrix_summary), ncol = ncol(fst_matrix_summary))
rownames(symmetric_fst_matrix) <- rownames(fst_matrix_summary)
colnames(symmetric_fst_matrix) <- colnames(fst_matrix_summary)
for(i in 1:nrow(fst_matrix_summary)) {
for(j in i:nrow(fst_matrix_summary)) {
if (i == j) {
symmetric_fst_matrix[i, j] <- fst_matrix_summary[i, j]
} else {
avg_value <- mean(c(fst_matrix_summary[i, j], fst_matrix_summary[j, i]), na.rm = TRUE)
symmetric_fst_matrix[i, j] <- avg_value
symmetric_fst_matrix[j, i] <- avg_value
}
}
}
# Check if the matrix is symmetric
# print(isSymmetric(symmetric_fst_matrix))
# Your symmetric Fst matrix by country is now in symmetric_fst_matrix
print(symmetric_fst_matrix)
## Albania Bulgaria Croatia France Georgia Greece
## Albania NA 0.07227269 0.04057776 0.08948333 0.11554526 0.07145213
## Bulgaria 0.07227269 NA 0.08677972 0.10939929 0.10369875 0.07707761
## Croatia 0.04057776 0.08677972 NA 0.11680090 0.11847108 0.09048210
## France 0.08948333 0.10939929 0.11680090 NA 0.12628339 0.09849714
## Georgia 0.11554526 0.10369875 0.11847108 0.12628339 NA 0.10644565
## Greece 0.07145213 0.07707761 0.09048210 0.09849714 0.10644565 NA
## Italy 0.07959606 0.07330393 0.08446501 0.07190402 0.09876712 0.08135587
## Malta 0.08442652 0.09172075 0.09097150 0.05900789 0.07969581 0.08991645
## Portugal 0.14835966 0.10475031 0.13989804 0.10979533 0.11811409 0.11934166
## Romania 0.12651578 0.13240895 0.13480471 0.12840747 0.17586549 0.13700235
## Russia 0.11023240 0.13594183 0.10621109 0.12456507 -0.01769868 0.13601419
## Serbia 0.08496288 0.06656334 0.10286947 0.12015114 0.15416050 0.10861301
## Slovenia 0.05805019 0.03155298 0.08929249 0.06578723 0.08383251 0.06200544
## Spain 0.13352057 0.11716285 0.14045223 0.10985579 0.13839704 0.11945354
## Turkey 0.12247859 0.13593818 0.13067456 0.06216445 0.13858295 0.13092924
## Italy Malta Portugal Romania Russia Serbia
## Albania 0.07959606 0.08442652 0.14835966 0.12651578 0.11023240 0.08496288
## Bulgaria 0.07330393 0.09172075 0.10475031 0.13240895 0.13594183 0.06656334
## Croatia 0.08446501 0.09097150 0.13989804 0.13480471 0.10621109 0.10286947
## France 0.07190402 0.05900789 0.10979533 0.12840747 0.12456507 0.12015114
## Georgia 0.09876712 0.07969581 0.11811409 0.17586549 -0.01769868 0.15416050
## Greece 0.08135587 0.08991645 0.11934166 0.13700235 0.13601419 0.10861301
## Italy NA 0.06182629 0.10390895 0.11277179 0.10753383 0.09429978
## Malta 0.06182629 NA 0.09223906 0.11433652 0.06737795 0.11921096
## Portugal 0.10390895 0.09223906 NA 0.11842345 0.16380668 0.14477361
## Romania 0.11277179 0.11433652 0.11842345 NA 0.17059132 0.13843205
## Russia 0.10753383 0.06737795 0.16380668 0.17059132 NA 0.16237093
## Serbia 0.09429978 0.11921096 0.14477361 0.13843205 0.16237093 NA
## Slovenia 0.05398682 0.04654500 0.08429084 0.09505474 0.09631422 0.05199941
## Spain 0.10144315 0.08366327 0.09949647 0.11445244 0.15720577 0.14595444
## Turkey 0.09068222 0.07784607 0.08334071 0.09528362 0.14309884 0.14243357
## Slovenia Spain Turkey
## Albania 0.05805019 0.13352057 0.12247859
## Bulgaria 0.03155298 0.11716285 0.13593818
## Croatia 0.08929249 0.14045223 0.13067456
## France 0.06578723 0.10985579 0.06216445
## Georgia 0.08383251 0.13839704 0.13858295
## Greece 0.06200544 0.11945354 0.13092924
## Italy 0.05398682 0.10144315 0.09068222
## Malta 0.04654500 0.08366327 0.07784607
## Portugal 0.08429084 0.09949647 0.08334071
## Romania 0.09505474 0.11445244 0.09528362
## Russia 0.09631422 0.15720577 0.14309884
## Serbia 0.05199941 0.14595444 0.14243357
## Slovenia NA 0.08402922 0.08021932
## Spain 0.08402922 NA 0.11995260
## Turkey 0.08021932 0.11995260 NA
# Read the file
country_order <- here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/country_order.txt")
country_order <- read.table(country_order,
header = FALSE,
col.names = c("country"))
order_countries <- as.vector(country_order$country)
order_countries
## [1] "France" "Portugal" "Spain" "Italy" "Malta" "Slovenia"
## [7] "Croatia" "Albania" "Greece" "Serbia" "Romania" "Bulgaria"
## [13] "Turkey" "Russia" "Georgia"
#write.csv(symmetric_fst_matrix, file = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/symmetric_fst_matrix_overlap.csv"))
symmetric_fst_matrix_ordered <- read_csv(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/symmetric_fst_matrix_overlap_ordered.csv")
## New names:
## Rows: 15 Columns: 16
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (15): France, Portugal, Spain, Italy, Malta, Slovenia, Croatia,
## Albania,...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
symmetric_fst_matrix_ordered<- as.data.frame(symmetric_fst_matrix_ordered) #convert to dataframe
rownames(symmetric_fst_matrix_ordered) <- symmetric_fst_matrix_ordered$...1 #use first column as rownames
symmetric_fst_matrix_ordered <- symmetric_fst_matrix_ordered[ -c(1) ] #remove 1st column
symmetric_fst_matrix_ordered <-as.matrix(symmetric_fst_matrix_ordered) #convert to matrix
#symmetric_fst_matrix_ordered <- symmetric_fst_matrix_ordered[poporder, poporder]
symmetric_fst_matrix_ordered[lower.tri(symmetric_fst_matrix_ordered)] <- NA
print(symmetric_fst_matrix_ordered)
## France Portugal Spain Italy Malta Slovenia Croatia
## France NA 0.1713433 0.1480517 0.07190402 0.05900789 0.06578723 0.11680090
## Portugal NA NA 0.2502794 0.18373532 0.15233714 0.16752995 0.19866296
## Spain NA NA NA 0.14122821 0.11810125 0.12005804 0.16661100
## Italy NA NA NA NA 0.06182629 0.05398682 0.08446501
## Malta NA NA NA NA NA 0.04654500 0.09097150
## Slovenia NA NA NA NA NA NA 0.08929249
## Croatia NA NA NA NA NA NA NA
## Albania NA NA NA NA NA NA NA
## Greece NA NA NA NA NA NA NA
## Serbia NA NA NA NA NA NA NA
## Romania NA NA NA NA NA NA NA
## Bulgaria NA NA NA NA NA NA NA
## Turkey NA NA NA NA NA NA NA
## Russia NA NA NA NA NA NA NA
## Georgia NA NA NA NA NA NA NA
## Albania Greece Serbia Romania Bulgaria Turkey
## France 0.08948333 0.11071192 0.12015114 0.09519542 0.10939929 0.07589234
## Portugal 0.22023894 0.19774973 0.23593387 0.16308682 0.20139278 0.17242399
## Spain 0.16293541 0.16109165 0.19369906 0.14816269 0.15849566 0.15743687
## Italy 0.07959606 0.08330830 0.09429978 0.08201868 0.07330393 0.08942815
## Malta 0.08442652 0.09156903 0.11921096 0.07089738 0.09172075 0.07583201
## Slovenia 0.05805019 0.06468557 0.05199941 0.06463287 0.03155298 0.06273769
## Croatia 0.04057776 0.07447962 0.10286947 0.12454840 0.08677972 0.12830928
## Albania NA 0.07039105 0.08496288 0.11371843 0.07227269 0.11864242
## Greece NA NA 0.11368631 0.09991009 0.07559310 0.11606726
## Serbia NA NA NA 0.13128964 0.06656334 0.12686711
## Romania NA NA NA NA 0.09300663 0.09196546
## Bulgaria NA NA NA NA NA 0.10974546
## Turkey NA NA NA NA NA NA
## Russia NA NA NA NA NA NA
## Georgia NA NA NA NA NA NA
## Russia Georgia
## France 0.12619748 0.12058407
## Portugal 0.22765544 0.23105780
## Spain 0.19406219 0.21018824
## Italy 0.12093466 0.12083105
## Malta 0.09170185 0.09195896
## Slovenia 0.11508794 0.09905471
## Croatia 0.13999132 0.15550494
## Albania 0.14151206 0.13212053
## Greece 0.13608300 0.13317193
## Serbia 0.18199287 0.18202927
## Romania 0.14682007 0.13190772
## Bulgaria 0.15604941 0.14253634
## Turkey 0.14398829 0.13028856
## Russia NA 0.08060513
## Georgia NA NA
Now we have to convert the matrix to a data frame to plot it with ggplot.
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## X1 X2 value
## France : 15 France : 15 Min. :0.03155
## Portugal: 15 Portugal: 15 1st Qu.:0.08443
## Spain : 15 Spain : 15 Median :0.11680
## Italy : 15 Italy : 15 Mean :0.12026
## Malta : 15 Malta : 15 3rd Qu.:0.14816
## Slovenia: 15 Slovenia: 15 Max. :0.25028
## (Other) :135 (Other) :135 NA's :120
pairfst.f2 <- ggplot(pairfst.long2, aes(X1, X2)) +
geom_tile(aes(fill = value), colour = "white") +
scale_fill_gradient(
low = "white",
high = "#71b6ff",
name = "Fst",
na.value = "white",
limits = c(0, 0.5)
) +
scale_x_discrete(position = "top") +
theme_bw() +
geom_text(aes(label = ifelse(
is.na(value), "", formatC(value, digits = 2, format = "f")
)), size = 5) +
theme(
axis.text.x = element_text(angle = 90, hjust = 0),
axis.title = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.y = element_text(hjust = 1)
)
pairfst.f2
ggsave(
filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_matrix_microsats_by_country_overlap.pdf"),
pairfst.f2,
width = 10,
height = 9,
units = "in"
)
Remove NAs and rename columns
# remove NAs
fst2 <-
pairfst.long |>
drop_na()
# rename columns
fst2 <-
fst2 |>
dplyr::rename(pop1 = 1,
pop2 = 2,
fst = 3)
# Split the data into two data frames, one for pop1 and one for pop2
df_pop1 <- fst2 |>
dplyr::select(pop = pop1, fst)
df_pop2 <- fst2 |>
dplyr::select(pop = pop2, fst)
# Combine the two data frames
df_combined <- bind_rows(df_pop1, df_pop2)
# Calculate the mean fst for each population
mean_fst <- df_combined |>
group_by(pop) |>
summarise(mean_fst = mean(fst))
print(mean_fst)
## # A tibble: 67 × 2
## pop mean_fst
## <fct> <dbl>
## 1 POP 0.127
## 2 POL 0.294
## 3 SPB 0.137
## 4 ESAC 0.270
## 5 ESMO 0.283
## 6 ESSV 0.158
## 7 ESAL 0.216
## 8 SPS 0.157
## 9 ESBM 0.196
## 10 ESTM 0.123
## # ℹ 57 more rows
Merge
## Pop_City Country Latitude Longitude Abbreviation Region
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 FRS Western Europe
## 2 Strasbourg France 48.61124 7.754512 STS Western Europe
## 3 Penafiel Portugal 41.18555 -8.329371 POP Southern Europe
## 4 Badajoz Spain 38.86622 -6.974194 SPB Southern Europe
## 5 San Roque Spain 36.17042 -5.371530 SPS Southern Europe
## 6 Catarroja Spain 39.40294 -0.395514 SPC Southern Europe
## order_microsat mean_fst
## 1 1 0.13991299
## 2 2 0.09604862
## 3 3 0.12709704
## 4 6 0.13741891
## 5 10 0.15705816
## 6 33 0.15086062
Mean by region
# Group by Region and calculate the mean_fst by Region
region_means <- fst3 |>
group_by(Region) |>
summarize(mean_fst_by_region = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_region column to the fst3 tibble
fst3 <- fst3 |>
left_join(region_means, by = "Region")
# Print the modified fst3 tibble
print(fst3)
## Pop_City Country Latitude Longitude Abbreviation
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 FRS
## 2 Strasbourg France 48.61124 7.754512 STS
## 3 Penafiel Portugal 41.18555 -8.329371 POP
## 4 Badajoz Spain 38.86622 -6.974194 SPB
## 5 San Roque Spain 36.17042 -5.371530 SPS
## 6 Catarroja Spain 39.40294 -0.395514 SPC
## 7 Magaluf Spain 39.50679 2.530729 SPM
## 8 Cornella del Llobregat Spain 41.35351 2.096833 BAR
## 9 Bologna Italy 44.48478 11.366584 ITB
## 10 Rome Italy 41.90215 12.517399 ITR
## 11 Puglia Italy 41.12213 16.844107 ITP
## 12 Luqa Malta 35.86053 14.487028 MAL
## 13 Ajdovscina Slovenia 45.88715 13.770997 SLO
## 14 Dubrovnik Croatia 42.60654 18.226612 CRO
## 15 Durres Albania 41.29704 19.503734 ALD
## 16 Chania Greece 35.51448 24.017960 GRC
## 17 Athens Greece 37.93719 23.946883 GRA
## 18 Novi Sad Serbia 45.25887 19.818778 SER
## 19 Satu Mare Romania 47.79147 22.890202 ROS
## 20 Lom Bulgaria 43.80489 23.236340 BUL
## 21 Aliaga Turkey 38.76390 26.944800 TUA
## 22 Hopa Turkey 41.38760 41.437800 TUH
## 23 Sochi Russia 43.60042 39.745328 SOC
## 24 Sakhumi, Abkhazia Georgia 43.07851 40.887588 GES
## Region order_microsat mean_fst mean_fst_by_region
## 1 Western Europe 1 0.13991299 0.12
## 2 Western Europe 2 0.09604862 0.12
## 3 Southern Europe 3 0.12709704 0.13
## 4 Southern Europe 6 0.13741891 0.13
## 5 Southern Europe 10 0.15705816 0.13
## 6 Southern Europe 33 0.15086062 0.13
## 7 Southern Europe 35 0.14201066 0.13
## 8 Southern Europe 42 NA 0.13
## 9 Southern Europe 51 0.11120606 0.13
## 10 Southern Europe 54 NA 0.13
## 11 Southern Europe 56 0.13237633 0.13
## 12 Southern Europe 57 0.09631054 0.13
## 13 Southern Europe 58 0.09265578 0.13
## 14 Southern Europe 59 0.13525714 0.13
## 15 Southern Europe 61 0.12900361 0.13
## 16 Southern Europe 63 0.14340327 0.13
## 17 Southern Europe 65 0.11737097 0.13
## 18 Eastern Europe 68 0.15541107 0.13
## 19 Eastern Europe 71 0.14552059 0.13
## 20 Eastern Europe 75 0.12566523 0.13
## 21 Eastern Europe 76 0.12285868 0.13
## 22 Eastern Europe 81 0.11307589 0.13
## 23 Eastern Europe 92 0.14701374 0.13
## 24 Eastern Europe 94 NA 0.13
Mean By country
# Group by Country and calculate the mean_fst by Country
country_means <- fst3 |>
group_by(Country) |>
summarize(mean_fst_by_country = round(mean(mean_fst, na.rm = TRUE), 2)) |>
ungroup() # Ungroup the data
# Add the mean_fst_by_country column to the fst3 tibble
fst3 <- fst3 |>
left_join(country_means, by = "Country")
# Print the modified fst3 tibble
print(sampling_loc)
## Pop_City Country Latitude Longitude Abbreviation
## 1 Saint-Martin-d'Heres France 45.16531 5.771806 FRS
## 2 Strasbourg France 48.61124 7.754512 STS
## 3 Penafiel Portugal 41.18555 -8.329371 POP
## 4 Badajoz Spain 38.86622 -6.974194 SPB
## 5 San Roque Spain 36.17042 -5.371530 SPS
## 6 Catarroja Spain 39.40294 -0.395514 SPC
## 7 Magaluf Spain 39.50679 2.530729 SPM
## 8 Cornella del Llobregat Spain 41.35351 2.096833 BAR
## 9 Bologna Italy 44.48478 11.366584 ITB
## 10 Rome Italy 41.90215 12.517399 ITR
## 11 Puglia Italy 41.12213 16.844107 ITP
## 12 Luqa Malta 35.86053 14.487028 MAL
## 13 Ajdovscina Slovenia 45.88715 13.770997 SLO
## 14 Dubrovnik Croatia 42.60654 18.226612 CRO
## 15 Durres Albania 41.29704 19.503734 ALD
## 16 Chania Greece 35.51448 24.017960 GRC
## 17 Athens Greece 37.93719 23.946883 GRA
## 18 Novi Sad Serbia 45.25887 19.818778 SER
## 19 Satu Mare Romania 47.79147 22.890202 ROS
## 20 Lom Bulgaria 43.80489 23.236340 BUL
## 21 Aliaga Turkey 38.76390 26.944800 TUA
## 22 Hopa Turkey 41.38760 41.437800 TUH
## 23 Sochi Russia 43.60042 39.745328 SOC
## 24 Sakhumi, Abkhazia Georgia 43.07851 40.887588 GES
## Region order_microsat
## 1 Western Europe 1
## 2 Western Europe 2
## 3 Southern Europe 3
## 4 Southern Europe 6
## 5 Southern Europe 10
## 6 Southern Europe 33
## 7 Southern Europe 35
## 8 Southern Europe 42
## 9 Southern Europe 51
## 10 Southern Europe 54
## 11 Southern Europe 56
## 12 Southern Europe 57
## 13 Southern Europe 58
## 14 Southern Europe 59
## 15 Southern Europe 61
## 16 Southern Europe 63
## 17 Southern Europe 65
## 18 Eastern Europe 68
## 19 Eastern Europe 71
## 20 Eastern Europe 75
## 21 Eastern Europe 76
## 22 Eastern Europe 81
## 23 Eastern Europe 92
## 24 Eastern Europe 94
fst4 <- fst3 |>
dplyr::select(
Region, mean_fst_by_region, Country, mean_fst_by_country, Pop_City, Abbreviation, mean_fst,
)
fst4 <- fst4 |>
arrange(
Region, Country, Pop_City
)
# Round
fst4 <- fst4 |>
mutate_if(is.numeric, ~ round(., 2))
head(fst4)
## Region mean_fst_by_region Country mean_fst_by_country
## 1 Eastern Europe 0.13 Bulgaria 0.13
## 2 Eastern Europe 0.13 Georgia NaN
## 3 Eastern Europe 0.13 Romania 0.15
## 4 Eastern Europe 0.13 Russia 0.15
## 5 Eastern Europe 0.13 Serbia 0.16
## 6 Eastern Europe 0.13 Turkey 0.12
## Pop_City Abbreviation mean_fst
## 1 Lom BUL 0.13
## 2 Sakhumi, Abkhazia GES NA
## 3 Satu Mare ROS 0.15
## 4 Sochi SOC 0.15
## 5 Novi Sad SER 0.16
## 6 Aliaga TUA 0.12
library(flextable)
library(officer)
# Set theme if you want to use something different from the previous table
set_flextable_defaults(
font.family = "Arial",
font.size = 9,
big.mark = ",",
theme_fun = "theme_zebra" # try the themes: theme_alafoli(), theme_apa(), theme_booktabs(), theme_box(), theme_tron_legacy(), theme_tron(), theme_vader(), theme_vanilla(), theme_zebra()
)
# Then create the flextable object
flex_table <- flextable(fst4) |>
set_caption(caption = as_paragraph(
as_chunk(
"Table 1. Fst values for European microsatellite data",
props = fp_text_default(color = "#000000", font.size = 14)
)
),
fp_p = fp_par(text.align = "center", padding = 5))
# Print the flextable
flex_table
Region | mean_fst_by_region | Country | mean_fst_by_country | Pop_City | Abbreviation | mean_fst |
---|---|---|---|---|---|---|
Eastern Europe | 0.13 | Bulgaria | 0.13 | Lom | BUL | 0.13 |
Eastern Europe | 0.13 | Georgia | Sakhumi, Abkhazia | GES | ||
Eastern Europe | 0.13 | Romania | 0.15 | Satu Mare | ROS | 0.15 |
Eastern Europe | 0.13 | Russia | 0.15 | Sochi | SOC | 0.15 |
Eastern Europe | 0.13 | Serbia | 0.16 | Novi Sad | SER | 0.16 |
Eastern Europe | 0.13 | Turkey | 0.12 | Aliaga | TUA | 0.12 |
Eastern Europe | 0.13 | Turkey | 0.12 | Hopa | TUH | 0.11 |
Southern Europe | 0.13 | Albania | 0.13 | Durres | ALD | 0.13 |
Southern Europe | 0.13 | Croatia | 0.14 | Dubrovnik | CRO | 0.14 |
Southern Europe | 0.13 | Greece | 0.13 | Athens | GRA | 0.12 |
Southern Europe | 0.13 | Greece | 0.13 | Chania | GRC | 0.14 |
Southern Europe | 0.13 | Italy | 0.12 | Bologna | ITB | 0.11 |
Southern Europe | 0.13 | Italy | 0.12 | Puglia | ITP | 0.13 |
Southern Europe | 0.13 | Italy | 0.12 | Rome | ITR | |
Southern Europe | 0.13 | Malta | 0.10 | Luqa | MAL | 0.10 |
Southern Europe | 0.13 | Portugal | 0.13 | Penafiel | POP | 0.13 |
Southern Europe | 0.13 | Slovenia | 0.09 | Ajdovscina | SLO | 0.09 |
Southern Europe | 0.13 | Spain | 0.15 | Badajoz | SPB | 0.14 |
Southern Europe | 0.13 | Spain | 0.15 | Catarroja | SPC | 0.15 |
Southern Europe | 0.13 | Spain | 0.15 | Cornella del Llobregat | BAR | |
Southern Europe | 0.13 | Spain | 0.15 | Magaluf | SPM | 0.14 |
Southern Europe | 0.13 | Spain | 0.15 | San Roque | SPS | 0.16 |
Western Europe | 0.12 | France | 0.12 | Saint-Martin-d'Heres | FRS | 0.14 |
Western Europe | 0.12 | France | 0.12 | Strasbourg | STS | 0.10 |
# Initialize Word document
doc <-
read_docx() |>
body_add_flextable(value = flex_table)
# Define the output path with 'here' library
output_path <- here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_Europe_microsats_table.docx"
)
# Save the Word document
print(doc, target = output_path)
To make scatter plot
# Group by Country and calculate the mean for mean_fst_by_country
aggregated_data <- fst4 |>
dplyr::group_by(Country) |>
dplyr::summarise(mean_fst = mean(mean_fst_by_country, na.rm = TRUE))
# Order the aggregated data
aggregated_data <- aggregated_data[order(aggregated_data$mean_fst), ]
# Assign a numeric index for plotting
aggregated_data$index <- 1:nrow(aggregated_data)
# Fit a linear model
lm_fit <- lm(mean_fst ~ index, data = aggregated_data)
# Predicted values from the linear model
aggregated_data$fitted_values <- predict(lm_fit)
ggplot(aggregated_data, aes(x = index, y = mean_fst)) +
geom_point(aes(color = Country), size = 3) +
geom_line(aes(y = fitted_values), color = "blue") + # Fitted line
labs(
title = "Mean Fst by Country",
x = "Ordered Countries",
y = "Mean Fst Value"
) +
scale_x_continuous(breaks = aggregated_data$index, labels = aggregated_data$Country) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(legend.position = "none")
Save it
Estimate distances
library(geosphere)
# Grab the population names from the matrix aa
populations_with_fst <- colnames(all_fst_matrix)
# Subset the sampling_loc dataframe to only include populations with FST estimates
filtered_sampling_loc <- sampling_loc %>% filter(Abbreviation %in% populations_with_fst)
# Create an empty matrix to store the distances
n <- nrow(filtered_sampling_loc)
distance_matrix <- matrix(0, n, n)
rownames(distance_matrix) <- filtered_sampling_loc$Abbreviation
colnames(distance_matrix) <- filtered_sampling_loc$Abbreviation
# Calculate the distances
for (i in 1:n) {
for (j in 1:n) {
if (i != j) {
coord1 <- c(filtered_sampling_loc$Longitude[i], filtered_sampling_loc$Latitude[i])
coord2 <- c(filtered_sampling_loc$Longitude[j], filtered_sampling_loc$Latitude[j])
distance_matrix[i, j] <- distHaversine(coord1, coord2) / 1000 # distance in km
}
}
}
# Print the distance matrix
head(distance_matrix)
## FRS STS POP SPB SPS SPC SPM
## FRS 0.0000 412.1522 1225.4383 1263.7518 1371.4448 817.6702 683.8845
## STS 412.1522 0.0000 1509.1745 1601.1573 1750.4874 1213.5639 1095.6072
## POP 1225.4383 1509.1745 0.0000 282.8409 614.5101 701.9619 939.4811
## SPB 1263.7518 1601.1573 282.8409 0.0000 331.7685 571.0509 822.8201
## SPS 1371.4448 1750.4874 614.5101 331.7685 0.0000 566.5153 787.3372
## SPC 817.6702 1213.5639 701.9619 571.0509 566.5153 0.0000 251.7726
## ITB ITP MAL SLO CRO ALD GRC GRA
## FRS 448.0871 1004.474 1269.904 628.7258 1037.845 1192.210 1877.379 1709.174
## STS 536.0256 1098.126 1522.503 546.2343 1052.803 1230.239 1974.712 1763.684
## POP 1644.8695 2102.566 2066.730 1851.8481 2197.100 2319.742 2876.039 2777.366
## SPB 1643.6694 2040.301 1923.429 1869.2123 2157.626 2262.516 2760.206 2686.566
## SPS 1690.3169 2002.327 1785.308 1929.6259 2144.409 2225.649 2642.916 2601.355
## SPC 1124.6677 1474.434 1368.139 1363.2153 1600.981 1697.515 2193.176 2115.539
## SER ROS BUL TUA TUH SOC
## FRS 1100.2591 1341.427 1392.500 1884.818 2897.030 2688.509
## STS 988.6774 1124.852 1304.105 1885.544 2745.357 2511.584
## POP 2315.5228 2566.256 2591.385 3000.557 4103.736 3906.072
## SPB 2313.3538 2594.045 2569.099 2924.845 4076.402 3894.554
## SPS 2340.8167 2651.131 2568.416 2854.982 4053.790 3891.912
## SPC 1780.6744 2085.530 2019.617 2354.477 3518.971 3346.038
saveRDS(distance_matrix, here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/distance_matrix.rds"))
Load and check the Fst values
distance_matrix <- readRDS(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/distance_matrix.rds"))
LD2_df <- read_csv("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/LD2_df_01_overlap.csv")
## New names:
## Rows: 24 Columns: 25
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (23): SOC, GES, STS, ITR, GRC, BAR, BUL, CRO, GRA, ITB, MAL, SPM,
## TUA, T... lgl (1): SPS
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
## # A tibble: 6 × 25
## ...1 SOC GES STS ITR GRC BAR BUL CRO GRA ITB
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SOC NA NA NA NA NA NA NA NA NA NA
## 2 GES 0.0113 NA NA NA NA NA NA NA NA NA
## 3 STS 0.106 0.109 NA NA NA NA NA NA NA NA
## 4 ITR 0.0955 0.101 0.0398 NA NA NA NA NA NA NA
## 5 GRC 0.117 0.122 0.0743 0.0571 NA NA NA NA NA NA
## 6 BAR 0.136 0.143 0.0927 0.0655 0.101 NA NA NA NA NA
## # ℹ 14 more variables: MAL <dbl>, SPM <dbl>, TUA <dbl>, TUH <dbl>, ALD <dbl>,
## # FRS <dbl>, ITP <dbl>, POP <dbl>, ROS <dbl>, SER <dbl>, SLO <dbl>,
## # SPC <dbl>, SPB <dbl>, SPS <lgl>
Convert the data into a matrix.
## SOC GES STS ITR GRC BAR
## SOC NA 0.01126978 0.10591967 0.09551819 0.11671692 0.13602044
## GES 0.01126978 NA 0.10927914 0.10057375 0.12208159 0.14270495
## STS 0.10591967 0.10927914 NA 0.03982659 0.07434949 0.09268239
## ITR 0.09551819 0.10057375 0.03982659 NA 0.05710568 0.06549046
## GRC 0.11671692 0.12208159 0.07434949 0.05710568 NA 0.10094517
## BAR 0.13602044 0.14270495 0.09268239 0.06549046 0.10094517 NA
## BUL CRO GRA ITB MAL SPM
## SOC 0.12832355 0.11495017 0.10889177 0.13371682 0.09458982 0.12205750
## GES 0.13384104 0.12036716 0.11376844 0.13892193 0.10125102 0.12971049
## STS 0.07799253 0.06354859 0.07077473 0.07120783 0.03980952 0.06532393
## ITR 0.05392586 0.04949314 0.04798060 0.06838917 0.01702597 0.06977034
## GRC 0.09338818 0.05248230 0.02417956 0.09782363 0.05730552 0.09532555
## BAR 0.10885970 0.09393694 0.09225317 0.12946121 0.07823715 0.11048101
## TUA TUH ALD FRS ITP POP
## SOC 0.11826743 0.10435177 0.13932531 0.10279814 0.10070244 0.09884633
## GES 0.12401729 0.11137798 0.14452571 0.10779885 0.10598420 0.10461590
## STS 0.07272436 0.05322703 0.09733023 0.03009784 0.04109490 0.04320132
## ITR 0.03907520 0.03526920 0.08060398 0.03984728 0.02050277 0.03645268
## GRC 0.07973727 0.07020895 0.07388877 0.07086343 0.06244349 0.06910929
## BAR 0.06317696 0.04376254 0.12083140 0.08889532 0.08100898 0.08718046
## ROS SER SLO SPC SPB SPS
## SOC 0.13423764 0.2267217 0.09140753 0.12576249 0.11561471 0.15080720
## GES 0.13912010 0.2329000 0.09665973 0.13312607 0.12082033 0.15814054
## STS 0.07530389 0.1752962 0.04446887 0.06818588 0.06120598 0.09859755
## ITR 0.07758959 0.1595922 0.04133080 0.08683449 0.07820355 0.07085827
## GRC 0.10336915 0.1893113 0.05999989 0.10519477 0.09488816 0.10597149
## BAR 0.12469756 0.2177834 0.07855252 0.12926803 0.12167900 0.11394015
Order
## [1] "FRS" "STS" "POP" "SPB" "SPS" "SPC" "SPM" "BAR" "ITB" "ITR" "ITP" "MAL"
## [13] "SLO" "CRO" "ALD" "GRC" "GRA" "SER" "ROS" "BUL" "TUA" "TUH" "SOC" "GES"
Create vector with order of populations
# Extract the populations that appear in LD2_df
populations_in_LD2 <- colnames(LD2_df)
# Reorder the populations based on order_pops
poporder <- populations_in_LD2[populations_in_LD2 %in% order_pops]
#LD2_df[match(poporder, LD2_df$Abbreviation),] #this also doesn't reorder it
# Print the reordered populations
print(poporder)
## [1] "SOC" "GES" "STS" "ITR" "GRC" "BAR" "BUL" "CRO" "GRA" "ITB" "MAL" "SPM"
## [13] "TUA" "TUH" "ALD" "FRS" "ITP" "POP" "ROS" "SER" "SLO" "SPC" "SPB" "SPS"
Lets check if the matrix is symmetric.
## [1] TRUE
Order the matrix using poporder. We will also add NA on the upper left side of the matrix.
Now we have to convert the matrix to a data frame to plot it with ggplot.
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by the
## caller; using TRUE
## X1 X2 value
## FRS : 24 FRS : 24 Min. :0.00626
## STS : 24 STS : 24 1st Qu.:0.06090
## POP : 24 POP : 24 Median :0.08701
## SPB : 24 SPB : 24 Mean :0.09085
## SPS : 24 SPS : 24 3rd Qu.:0.10948
## SPC : 24 SPC : 24 Max. :0.23859
## (Other):432 (Other):432 NA's :300
Compare distance and FST
# Fill lower triangle of 'aa' matrix
aa[lower.tri(aa)] <- t(aa)[lower.tri(aa)]
# Fill diagonal with 0 (or another value that makes sense in your context)
diag(aa) <- 0
# Combine 'aa' and 'distance_matrix'
data <- data.frame(Distance = as.vector(distance_matrix), FST = as.vector(aa))
#saveRDS(data, here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/data_aa.rds"))
#data <- readRDS(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/data_aa.rds"))
# Add row and column indices for easier tracking
data$row_index <- rep(rownames(distance_matrix), each = ncol(distance_matrix))
data$col_index <- rep(colnames(distance_matrix), nrow(distance_matrix))
data <- data |>
dplyr::arrange(
Distance
)
head(data)
## Distance FST row_index col_index
## 1 0 0 FRS FRS
## 2 0 0 STS STS
## 3 0 0 POP POP
## 4 0 0 SPB SPB
## 5 0 0 SPS SPS
## 6 0 0 SPC SPC
Fit linear regression
data <- data[data$Distance > 0, ]
# Fit linear model
lm_model <- lm(FST/(1-FST) ~ log(Distance), data = data)
equation_text <- sprintf("y = %.6fx + %.3f", coef(lm_model)[2], coef(lm_model)[1])
r2_text <- sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
# source the plotting function
source(here("analyses", "my_theme2.R"))
# Plot
ggplot(data, aes(x = log(Distance), y = FST/(1-FST))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
annotate("text", x = max(log((data$Distance))) * 0.85, y = max(data$FST/(1-data$FST)) * 0.95, label = paste(equation_text, r2_text, sep = "\n"), size = 4, color = "black") +
labs(title = "FST vs Distance - All populations",
x = "Log(Distance)",
y = "FST(1-FST)") +
scale_x_continuous(labels = scales::comma) +
theme_classic() +
theme(axis.text.x = element_text(size = 14), # Increase font size for x-axis
axis.text.y = element_text(size = 14 )) # Increase font size for y-axi
## `geom_smooth()` using formula = 'y ~ x'
ggsave(
filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_by_distance_microsats_overlap_with_equ.pdf"),
width = 6,
height = 4,
units = "in"
)
Without equation
data <- data[data$Distance > 0, ]
# Fit linear model
lm_model <- lm(FST/(1-FST) ~ log(Distance), data = data)
equation_text <- sprintf("y = %.6fx + %.3f", coef(lm_model)[2], coef(lm_model)[1])
r2_text <- sprintf("R^2 = %.2f", summary(lm_model)$r.squared)
# source the plotting function
source(here("analyses", "my_theme2.R"))
# Plot
ggplot(data, aes(x = log(Distance), y = FST/(1-FST))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
# annotate("text", x = max(log((data$Distance))) * 0.85, y = max(data$FST/(1-data$FST)) * 0.95, label = paste(equation_text, #r2_text, sep = "\n"), size = 4, color = "black") +
labs(title = "FST vs Distance - All populations",
x = "Log(Distance)",
y = "FST(1-FST)") +
scale_x_continuous(labels = scales::comma) +
theme_classic() +
theme(axis.text.x = element_text(size = 14), # Increase font size for x-axis
axis.text.y = element_text(size = 14 )) # Increase font size for y-axi
## `geom_smooth()` using formula = 'y ~ x'
ggsave(
filename = here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/fst_by_distance_microsats_overlap.pdf"),
width = 6,
height = 4,
units = "in"
)
We can merge the FST and distance matrices
# Ensure the matrices have the same names in the same order
common_names <- intersect(rownames(distance_matrix), rownames(aa))
sorted_names <- sort(common_names)
# Reorder the matrices
distance_matrix <- distance_matrix[sorted_names, sorted_names]
aa <- aa[sorted_names, sorted_names]
# Initialize the final merged matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names
colnames(merged_matrix) <- sorted_names
# Fill the upper triangular part from aa
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
# Fill the lower triangular part from distance_matrix
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]
# Format the matrix (Fst two decimals and distance in Km with zero decimals)
# Format the elements based on their position in the matrix
for(i in 1:nrow(merged_matrix)) {
for(j in 1:ncol(merged_matrix)) {
if (i < j) {
# Upper triangular - Fst values with two decimal places
merged_matrix[i, j] <- sprintf("%.2f", as.numeric(merged_matrix[i, j]))
} else if (i > j) {
# Lower triangular - Distance values with zero decimal places
merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
}
}
}
# Now the merged_matrix should be formatted as you need
print(merged_matrix)
## ALD BAR BUL CRO FRS GES GRA GRC ITB ITP
## ALD NA "0.12" "0.11" "0.05" "0.09" "0.14" "0.08" "0.07" "0.12" "0.08"
## BAR "1453" NA "0.11" "0.09" "0.09" "0.14" "0.09" "0.10" "0.13" "0.08"
## BUL "414" "1749" NA "0.09" "0.07" "0.13" "0.09" "0.09" "0.10" "0.06"
## CRO "180" "1340" "428" NA "0.06" "0.12" "0.06" "0.05" "0.09" "0.05"
## FRS "1192" "518" "1393" "1038" NA "0.11" "0.07" "0.07" "0.06" "0.04"
## GES "1770" "3175" "1426" "1845" "2794" NA "0.11" "0.12" "0.14" "0.11"
## GRA "534" "1906" "656" "711" "1709" "1540" NA "0.02" "0.10" "0.05"
## GRC "754" "2012" "925" "934" "1877" "1675" "270" NA "0.10" "0.06"
## ITB "752" "832" "950" "591" "448" "2365" "1279" "1467" NA "0.07"
## ITP "224" "1233" "604" "201" "1004" "1991" "705" "884" "583" NA
## ITR "585" "869" "899" "477" "654" "2321" "1070" "1224" "302" "371"
## MAL "746" "1237" "1157" "817" "1270" "2395" "873" "862" "996" "621"
## POP "2320" "872" "2591" "2197" "1225" "4010" "2777" "2876" "1645" "2103"
## ROS "771" "1791" "445" "683" "1341" "1496" "1100" "1370" "961" "884"
## SER "442" "1496" "316" "322" "1100" "1695" "884" "1141" "672" "520"
## SLO "689" "1066" "782" "509" "629" "2166" "1220" "1440" "245" "585"
## SOC "1678" "3075" "1327" "1748" "2689" "109" "1470" "1618" "2262" "1898"
## SPB "2263" "820" "2569" "2158" "1264" "3995" "2687" "2760" "1644" "2040"
## SPC "1698" "303" "2020" "1601" "818" "3445" "2116" "2193" "1125" "1474"
## SPM "1450" "209" "1782" "1360" "684" "3204" "1864" "1944" "916" "1227"
## SPS "2226" "867" "2568" "2144" "1371" "3988" "2601" "2643" "1690" "2002"
## STS "1230" "922" "1304" "1053" "412" "2620" "1764" "1975" "536" "1098"
## TUA "694" "2129" "641" "851" "1885" "1265" "277" "445" "1441" "901"
## TUH "1828" "3258" "1512" "1919" "2897" "194" "1544" "1650" "2461" "2051"
## ITR MAL POP ROS SER SLO SOC SPB SPC SPM
## ALD "0.08" "0.08" "0.09" "0.12" "0.22" "0.08" "0.14" "0.12" "0.13" "0.11"
## BAR "0.07" "0.08" "0.09" "0.12" "0.22" "0.08" "0.14" "0.12" "0.13" "0.11"
## BUL "0.05" "0.06" "0.07" "0.10" "0.20" "0.07" "0.13" "0.10" "0.10" "0.10"
## CRO "0.05" "0.06" "0.06" "0.10" "0.17" "0.06" "0.11" "0.09" "0.10" "0.09"
## FRS "0.04" "0.04" "0.04" "0.07" "0.16" "0.04" "0.10" "0.06" "0.07" "0.06"
## GES "0.10" "0.10" "0.10" "0.14" "0.23" "0.10" "0.01" "0.12" "0.13" "0.13"
## GRA "0.05" "0.05" "0.07" "0.10" "0.19" "0.05" "0.11" "0.09" "0.10" "0.09"
## GRC "0.06" "0.06" "0.07" "0.10" "0.19" "0.06" "0.12" "0.09" "0.11" "0.10"
## ITB "0.07" "0.06" "0.05" "0.10" "0.22" "0.06" "0.13" "0.08" "0.09" "0.09"
## ITP "0.02" "0.02" "0.04" "0.08" "0.17" "0.04" "0.10" "0.07" "0.08" "0.06"
## ITR NA "0.02" "0.04" "0.08" "0.16" "0.04" "0.10" "0.08" "0.09" "0.07"
## MAL "694" NA "0.03" "0.07" "0.16" "0.04" "0.09" "0.07" "0.07" "0.06"
## POP "1734" "2067" NA "0.07" "0.17" "0.04" "0.10" "0.05" "0.05" "0.05"
## ROS "1047" "1498" "2566" NA "0.20" "0.06" "0.13" "0.09" "0.10" "0.09"
## SER "697" "1139" "2316" "367" NA "0.15" "0.23" "0.20" "0.22" "0.21"
## SLO "455" "1118" "1852" "726" "476" NA "0.09" "0.05" "0.06" "0.05"
## SOC "2224" "2316" "3906" "1388" "1591" "2060" NA "0.12" "0.13" "0.12"
## SPB "1683" "1923" "283" "2594" "2313" "1869" "3895" NA "0.01" "0.01"
## SPC "1124" "1368" "702" "2086" "1781" "1363" "3346" "571" NA "0.03"
## SPM "883" "1128" "939" "1873" "1555" "1160" "3107" "823" "252" NA
## SPS "1669" "1785" "615" "2651" "2341" "1930" "3892" "332" "567" "787"
## STS "835" "1523" "1509" "1125" "989" "546" "2512" "1601" "1214" "1096"
## TUA "1271" "1148" "3001" "1057" "932" "1340" "1198" "2925" "2354" "2103"
## TUH "2395" "2412" "4104" "1629" "1797" "2272" "283" "4076" "3519" "3275"
## SPS STS TUA TUH
## ALD "0.14" "0.10" "0.10" "0.09"
## BAR "0.11" "0.09" "0.06" "0.04"
## BUL "0.12" "0.08" "0.08" "0.07"
## CRO "0.11" "0.06" "0.07" "0.07"
## FRS "0.10" "0.03" "0.07" "0.05"
## GES "0.16" "0.11" "0.12" "0.11"
## GRA "0.10" "0.07" "0.07" "0.06"
## GRC "0.11" "0.07" "0.08" "0.07"
## ITB "0.13" "0.07" "0.10" "0.08"
## ITP "0.09" "0.04" "0.06" "0.04"
## ITR "0.07" "0.04" "0.04" "0.04"
## MAL "0.07" "0.04" "0.05" "0.04"
## POP "0.09" "0.04" "0.06" "0.05"
## ROS "0.13" "0.08" "0.10" "0.08"
## SER "0.24" "0.18" "0.20" "0.17"
## SLO "0.09" "0.04" "0.06" "0.04"
## SOC "0.15" "0.11" "0.12" "0.10"
## SPB "0.13" "0.06" "0.10" "0.08"
## SPC "0.14" "0.07" "0.11" "0.08"
## SPM "0.08" "0.07" "0.09" "0.07"
## SPS NA "0.10" "0.03" "0.09"
## STS "1750" NA "0.07" "0.05"
## TUA "2855" "1886" NA "0.05"
## TUH "4054" "2745" "1267" NA
cities <- readRDS(here("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/sampling_loc_euro_microsats_overlap.rds"))
cities <- as_tibble(cities)
head(cities)
## # A tibble: 6 × 7
## Pop_City Country Latitude Longitude Abbreviation Region order_microsat
## <chr> <chr> <dbl> <dbl> <chr> <chr> <int>
## 1 Lom Bulgar… 43.8 23.2 BUL Easte… 75
## 2 Sakhumi, Abkhaz… Georgia 43.1 40.9 GES Easte… 94
## 3 Satu Mare Romania 47.8 22.9 ROS Easte… 71
## 4 Sochi Russia 43.6 39.7 SOC Easte… 92
## 5 Novi Sad Serbia 45.3 19.8 SER Easte… 68
## 6 Aliaga Turkey 38.8 26.9 TUA Easte… 76
We can sort by distance
# Calculate row-wise mean distances (excluding diagonal)
row_means <- rowMeans(distance_matrix, na.rm=TRUE)
# Sort row names by mean distances
sorted_names_by_distance <- names(sort(row_means))
# Reorder distance_matrix and aa matrices based on these sorted names
distance_matrix <- distance_matrix[sorted_names_by_distance, sorted_names_by_distance]
aa <- aa[sorted_names_by_distance, sorted_names_by_distance]
# Your existing code to initialize and fill the merged_matrix
merged_matrix <- matrix(NA, nrow = nrow(aa), ncol = ncol(aa))
rownames(merged_matrix) <- sorted_names_by_distance
colnames(merged_matrix) <- sorted_names_by_distance
merged_matrix[upper.tri(merged_matrix, diag = FALSE)] <- aa[upper.tri(aa, diag = FALSE)]
merged_matrix[lower.tri(merged_matrix, diag = FALSE)] <- distance_matrix[lower.tri(distance_matrix, diag = FALSE)]
# Formatting code with absolute value for upper triangular part
for(i in 1:nrow(merged_matrix)) {
for(j in 1:ncol(merged_matrix)) {
if (i < j) {
merged_matrix[i, j] <- sprintf("%.2f", abs(as.numeric(merged_matrix[i, j])))
} else if (i > j) {
merged_matrix[i, j] <- sprintf("%.0f", as.numeric(merged_matrix[i, j]))
}
}
}
# Print the merged matrix
print(merged_matrix)
## CRO ITP ITR ALD ITB SLO SER BUL FRS MAL
## CRO NA "0.05" "0.05" "0.05" "0.09" "0.06" "0.17" "0.09" "0.06" "0.06"
## ITP "201" NA "0.02" "0.08" "0.07" "0.04" "0.17" "0.06" "0.04" "0.02"
## ITR "477" "371" NA "0.08" "0.07" "0.04" "0.16" "0.05" "0.04" "0.02"
## ALD "180" "224" "585" NA "0.12" "0.08" "0.22" "0.11" "0.09" "0.08"
## ITB "591" "583" "302" "752" NA "0.06" "0.22" "0.10" "0.06" "0.06"
## SLO "509" "585" "455" "689" "245" NA "0.15" "0.07" "0.04" "0.04"
## SER "322" "520" "697" "442" "672" "476" NA "0.20" "0.16" "0.16"
## BUL "428" "604" "899" "414" "950" "782" "316" NA "0.07" "0.06"
## FRS "1038" "1004" "654" "1192" "448" "629" "1100" "1393" NA "0.04"
## MAL "817" "621" "694" "746" "996" "1118" "1139" "1157" "1270" NA
## ROS "683" "884" "1047" "771" "961" "726" "367" "445" "1341" "1498"
## GRA "711" "705" "1070" "534" "1279" "1220" "884" "656" "1709" "873"
## STS "1053" "1098" "835" "1230" "536" "546" "989" "1304" "412" "1523"
## BAR "1340" "1233" "869" "1453" "832" "1066" "1496" "1749" "518" "1237"
## SPM "1360" "1227" "883" "1450" "916" "1160" "1555" "1782" "684" "1128"
## TUA "851" "901" "1271" "694" "1441" "1340" "932" "641" "1885" "1148"
## GRC "934" "884" "1224" "754" "1467" "1440" "1141" "925" "1877" "862"
## SPC "1601" "1474" "1124" "1698" "1125" "1363" "1781" "2020" "818" "1368"
## SPB "2158" "2040" "1683" "2263" "1644" "1869" "2313" "2569" "1264" "1923"
## SPS "2144" "2002" "1669" "2226" "1690" "1930" "2341" "2568" "1371" "1785"
## POP "2197" "2103" "1734" "2320" "1645" "1852" "2316" "2591" "1225" "2067"
## SOC "1748" "1898" "2224" "1678" "2262" "2060" "1591" "1327" "2689" "2316"
## GES "1845" "1991" "2321" "1770" "2365" "2166" "1695" "1426" "2794" "2395"
## TUH "1919" "2051" "2395" "1828" "2461" "2272" "1797" "1512" "2897" "2412"
## ROS GRA STS BAR SPM TUA GRC SPC SPB SPS
## CRO "0.10" "0.06" "0.06" "0.09" "0.09" "0.07" "0.05" "0.10" "0.09" "0.11"
## ITP "0.08" "0.05" "0.04" "0.08" "0.06" "0.06" "0.06" "0.08" "0.07" "0.09"
## ITR "0.08" "0.05" "0.04" "0.07" "0.07" "0.04" "0.06" "0.09" "0.08" "0.07"
## ALD "0.12" "0.08" "0.10" "0.12" "0.11" "0.10" "0.07" "0.13" "0.12" "0.14"
## ITB "0.10" "0.10" "0.07" "0.13" "0.09" "0.10" "0.10" "0.09" "0.08" "0.13"
## SLO "0.06" "0.05" "0.04" "0.08" "0.05" "0.06" "0.06" "0.06" "0.05" "0.09"
## SER "0.20" "0.19" "0.18" "0.22" "0.21" "0.20" "0.19" "0.22" "0.20" "0.24"
## BUL "0.10" "0.09" "0.08" "0.11" "0.10" "0.08" "0.09" "0.10" "0.10" "0.12"
## FRS "0.07" "0.07" "0.03" "0.09" "0.06" "0.07" "0.07" "0.07" "0.06" "0.10"
## MAL "0.07" "0.05" "0.04" "0.08" "0.06" "0.05" "0.06" "0.07" "0.07" "0.07"
## ROS NA "0.10" "0.08" "0.12" "0.09" "0.10" "0.10" "0.10" "0.09" "0.13"
## GRA "1100" NA "0.07" "0.09" "0.09" "0.07" "0.02" "0.10" "0.09" "0.10"
## STS "1125" "1764" NA "0.09" "0.07" "0.07" "0.07" "0.07" "0.06" "0.10"
## BAR "1791" "1906" "922" NA "0.11" "0.06" "0.10" "0.13" "0.12" "0.11"
## SPM "1873" "1864" "1096" "209" NA "0.09" "0.10" "0.03" "0.01" "0.08"
## TUA "1057" "277" "1886" "2129" "2103" NA "0.08" "0.11" "0.10" "0.03"
## GRC "1370" "270" "1975" "2012" "1944" "445" NA "0.11" "0.09" "0.11"
## SPC "2086" "2116" "1214" "303" "252" "2354" "2193" NA "0.01" "0.14"
## SPB "2594" "2687" "1601" "820" "823" "2925" "2760" "571" NA "0.13"
## SPS "2651" "2601" "1750" "867" "787" "2855" "2643" "567" "332" NA
## POP "2566" "2777" "1509" "872" "939" "3001" "2876" "702" "283" "615"
## SOC "1388" "1470" "2512" "3075" "3107" "1198" "1618" "3346" "3895" "3892"
## GES "1496" "1540" "2620" "3175" "3204" "1265" "1675" "3445" "3995" "3988"
## TUH "1629" "1544" "2745" "3258" "3275" "1267" "1650" "3519" "4076" "4054"
## POP SOC GES TUH
## CRO "0.06" "0.11" "0.12" "0.07"
## ITP "0.04" "0.10" "0.11" "0.04"
## ITR "0.04" "0.10" "0.10" "0.04"
## ALD "0.09" "0.14" "0.14" "0.09"
## ITB "0.05" "0.13" "0.14" "0.08"
## SLO "0.04" "0.09" "0.10" "0.04"
## SER "0.17" "0.23" "0.23" "0.17"
## BUL "0.07" "0.13" "0.13" "0.07"
## FRS "0.04" "0.10" "0.11" "0.05"
## MAL "0.03" "0.09" "0.10" "0.04"
## ROS "0.07" "0.13" "0.14" "0.08"
## GRA "0.07" "0.11" "0.11" "0.06"
## STS "0.04" "0.11" "0.11" "0.05"
## BAR "0.09" "0.14" "0.14" "0.04"
## SPM "0.05" "0.12" "0.13" "0.07"
## TUA "0.06" "0.12" "0.12" "0.05"
## GRC "0.07" "0.12" "0.12" "0.07"
## SPC "0.05" "0.13" "0.13" "0.08"
## SPB "0.05" "0.12" "0.12" "0.08"
## SPS "0.09" "0.15" "0.16" "0.09"
## POP NA "0.10" "0.10" "0.05"
## SOC "3906" NA "0.01" "0.10"
## GES "4010" "109" NA "0.11"
## TUH "4104" "283" "194" NA
Make a table and save it as a word document
# Convert the matrix to a data frame and add a column with row names
merged_df <- as.data.frame(merged_matrix)
merged_df$Population <- rownames(merged_matrix)
# Reorder columns to have RowNames as the first column
merged_df <- merged_df[, c("Population", colnames(merged_matrix))]
merged_df1 <- as.data.frame(merged_df)
write.csv(merged_df1, "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/merged_df_microsats_overlap.csv")
# Create a flextable object from the merged_matrix
ft <- qflextable(as.data.frame(merged_df))
ft
Population | CRO | ITP | ITR | ALD | ITB | SLO | SER | BUL | FRS | MAL | ROS | GRA | STS | BAR | SPM | TUA | GRC | SPC | SPB | SPS | POP | SOC | GES | TUH |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
CRO | 0.05 | 0.05 | 0.05 | 0.09 | 0.06 | 0.17 | 0.09 | 0.06 | 0.06 | 0.10 | 0.06 | 0.06 | 0.09 | 0.09 | 0.07 | 0.05 | 0.10 | 0.09 | 0.11 | 0.06 | 0.11 | 0.12 | 0.07 | |
ITP | 201 | 0.02 | 0.08 | 0.07 | 0.04 | 0.17 | 0.06 | 0.04 | 0.02 | 0.08 | 0.05 | 0.04 | 0.08 | 0.06 | 0.06 | 0.06 | 0.08 | 0.07 | 0.09 | 0.04 | 0.10 | 0.11 | 0.04 | |
ITR | 477 | 371 | 0.08 | 0.07 | 0.04 | 0.16 | 0.05 | 0.04 | 0.02 | 0.08 | 0.05 | 0.04 | 0.07 | 0.07 | 0.04 | 0.06 | 0.09 | 0.08 | 0.07 | 0.04 | 0.10 | 0.10 | 0.04 | |
ALD | 180 | 224 | 585 | 0.12 | 0.08 | 0.22 | 0.11 | 0.09 | 0.08 | 0.12 | 0.08 | 0.10 | 0.12 | 0.11 | 0.10 | 0.07 | 0.13 | 0.12 | 0.14 | 0.09 | 0.14 | 0.14 | 0.09 | |
ITB | 591 | 583 | 302 | 752 | 0.06 | 0.22 | 0.10 | 0.06 | 0.06 | 0.10 | 0.10 | 0.07 | 0.13 | 0.09 | 0.10 | 0.10 | 0.09 | 0.08 | 0.13 | 0.05 | 0.13 | 0.14 | 0.08 | |
SLO | 509 | 585 | 455 | 689 | 245 | 0.15 | 0.07 | 0.04 | 0.04 | 0.06 | 0.05 | 0.04 | 0.08 | 0.05 | 0.06 | 0.06 | 0.06 | 0.05 | 0.09 | 0.04 | 0.09 | 0.10 | 0.04 | |
SER | 322 | 520 | 697 | 442 | 672 | 476 | 0.20 | 0.16 | 0.16 | 0.20 | 0.19 | 0.18 | 0.22 | 0.21 | 0.20 | 0.19 | 0.22 | 0.20 | 0.24 | 0.17 | 0.23 | 0.23 | 0.17 | |
BUL | 428 | 604 | 899 | 414 | 950 | 782 | 316 | 0.07 | 0.06 | 0.10 | 0.09 | 0.08 | 0.11 | 0.10 | 0.08 | 0.09 | 0.10 | 0.10 | 0.12 | 0.07 | 0.13 | 0.13 | 0.07 | |
FRS | 1038 | 1004 | 654 | 1192 | 448 | 629 | 1100 | 1393 | 0.04 | 0.07 | 0.07 | 0.03 | 0.09 | 0.06 | 0.07 | 0.07 | 0.07 | 0.06 | 0.10 | 0.04 | 0.10 | 0.11 | 0.05 | |
MAL | 817 | 621 | 694 | 746 | 996 | 1118 | 1139 | 1157 | 1270 | 0.07 | 0.05 | 0.04 | 0.08 | 0.06 | 0.05 | 0.06 | 0.07 | 0.07 | 0.07 | 0.03 | 0.09 | 0.10 | 0.04 | |
ROS | 683 | 884 | 1047 | 771 | 961 | 726 | 367 | 445 | 1341 | 1498 | 0.10 | 0.08 | 0.12 | 0.09 | 0.10 | 0.10 | 0.10 | 0.09 | 0.13 | 0.07 | 0.13 | 0.14 | 0.08 | |
GRA | 711 | 705 | 1070 | 534 | 1279 | 1220 | 884 | 656 | 1709 | 873 | 1100 | 0.07 | 0.09 | 0.09 | 0.07 | 0.02 | 0.10 | 0.09 | 0.10 | 0.07 | 0.11 | 0.11 | 0.06 | |
STS | 1053 | 1098 | 835 | 1230 | 536 | 546 | 989 | 1304 | 412 | 1523 | 1125 | 1764 | 0.09 | 0.07 | 0.07 | 0.07 | 0.07 | 0.06 | 0.10 | 0.04 | 0.11 | 0.11 | 0.05 | |
BAR | 1340 | 1233 | 869 | 1453 | 832 | 1066 | 1496 | 1749 | 518 | 1237 | 1791 | 1906 | 922 | 0.11 | 0.06 | 0.10 | 0.13 | 0.12 | 0.11 | 0.09 | 0.14 | 0.14 | 0.04 | |
SPM | 1360 | 1227 | 883 | 1450 | 916 | 1160 | 1555 | 1782 | 684 | 1128 | 1873 | 1864 | 1096 | 209 | 0.09 | 0.10 | 0.03 | 0.01 | 0.08 | 0.05 | 0.12 | 0.13 | 0.07 | |
TUA | 851 | 901 | 1271 | 694 | 1441 | 1340 | 932 | 641 | 1885 | 1148 | 1057 | 277 | 1886 | 2129 | 2103 | 0.08 | 0.11 | 0.10 | 0.03 | 0.06 | 0.12 | 0.12 | 0.05 | |
GRC | 934 | 884 | 1224 | 754 | 1467 | 1440 | 1141 | 925 | 1877 | 862 | 1370 | 270 | 1975 | 2012 | 1944 | 445 | 0.11 | 0.09 | 0.11 | 0.07 | 0.12 | 0.12 | 0.07 | |
SPC | 1601 | 1474 | 1124 | 1698 | 1125 | 1363 | 1781 | 2020 | 818 | 1368 | 2086 | 2116 | 1214 | 303 | 252 | 2354 | 2193 | 0.01 | 0.14 | 0.05 | 0.13 | 0.13 | 0.08 | |
SPB | 2158 | 2040 | 1683 | 2263 | 1644 | 1869 | 2313 | 2569 | 1264 | 1923 | 2594 | 2687 | 1601 | 820 | 823 | 2925 | 2760 | 571 | 0.13 | 0.05 | 0.12 | 0.12 | 0.08 | |
SPS | 2144 | 2002 | 1669 | 2226 | 1690 | 1930 | 2341 | 2568 | 1371 | 1785 | 2651 | 2601 | 1750 | 867 | 787 | 2855 | 2643 | 567 | 332 | 0.09 | 0.15 | 0.16 | 0.09 | |
POP | 2197 | 2103 | 1734 | 2320 | 1645 | 1852 | 2316 | 2591 | 1225 | 2067 | 2566 | 2777 | 1509 | 872 | 939 | 3001 | 2876 | 702 | 283 | 615 | 0.10 | 0.10 | 0.05 | |
SOC | 1748 | 1898 | 2224 | 1678 | 2262 | 2060 | 1591 | 1327 | 2689 | 2316 | 1388 | 1470 | 2512 | 3075 | 3107 | 1198 | 1618 | 3346 | 3895 | 3892 | 3906 | 0.01 | 0.10 | |
GES | 1845 | 1991 | 2321 | 1770 | 2365 | 2166 | 1695 | 1426 | 2794 | 2395 | 1496 | 1540 | 2620 | 3175 | 3204 | 1265 | 1675 | 3445 | 3995 | 3988 | 4010 | 109 | 0.11 | |
TUH | 1919 | 2051 | 2395 | 1828 | 2461 | 2272 | 1797 | 1512 | 2897 | 2412 | 1629 | 1544 | 2745 | 3258 | 3275 | 1267 | 1650 | 3519 | 4076 | 4054 | 4104 | 283 | 194 |
fam_data <- read.csv("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/family_info_microsats.csv")
Merge
# Join with sampling_loc to get sampling localities
loc_albo <- fam_data |>
left_join(sampling_loc, by = c("FamilyID" = "Abbreviation"))
head(loc_albo)
## IndividualID FamilyID Loc1 Loc2 Loc3 Loc4 Loc5 Loc6 Loc7 Loc8
## 1 POP-PTPN01 POP 180183 209222 166166 141184 256258 196205 223229 129137
## 2 POP-PTPN02 POP 180183 209209 177177 184184 256256 205205 219225 129140
## 3 POP-PTPN03 POP 180183 209216 165166 141193 256256 205205 221223 129137
## 4 POP-PTPN04 POP 183183 209222 166177 184193 256256 205205 221225 129129
## 5 POP-PTPN05 POP 180180 209209 169177 184184 258262 205205 221221 129129
## 6 POP-PTPN06 POP 180183 204209 171177 175184 252256 205205 221221 129137
## Loc9 Loc10 Loc11 Pop_City Country Latitude Longitude Region
## 1 172175 209209 159171 Penafiel Portugal 41.18555 -8.329371 Southern Europe
## 2 175175 209209 176176 Penafiel Portugal 41.18555 -8.329371 Southern Europe
## 3 172175 206206 159161 Penafiel Portugal 41.18555 -8.329371 Southern Europe
## 4 175175 206206 161165 Penafiel Portugal 41.18555 -8.329371 Southern Europe
## 5 172175 206206 168168 Penafiel Portugal 41.18555 -8.329371 Southern Europe
## 6 175175 206206 161161 Penafiel Portugal 41.18555 -8.329371 Southern Europe
## order_microsat
## 1 3
## 2 3
## 3 3
## 4 3
## 5 3
## 6 3
Load .gen file with microsat data from all individuals
all_pops <- read.genepop("/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/for_fst_albo_microsat_overlap.gen", ncode=3L)
##
## Converting data from a Genepop .gen file to a genind object...
##
##
## File description: ARBOMONITOR_Aedes_albopictus
##
## ...done.
## /// GENIND OBJECT /////////
##
## // 637 individuals; 11 loci; 163 alleles; size: 486.7 Kb
##
## // Basic content
## @tab: 637 x 163 matrix of allele counts
## @loc.n.all: number of alleles per locus (range: 6-22)
## @loc.fac: locus factor for the 163 columns of @tab
## @all.names: list of allele names for each locus
## @ploidy: ploidy of each individual (range: 2-2)
## @type: codom
## @call: read.genepop(file = "/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/for_fst_albo_microsat_overlap.gen",
## ncode = 3L)
##
## // Optional content
## @pop: population of each individual (group size range: 5-60)
## [1] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [7] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [13] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [19] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [25] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [31] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [37] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [43] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [49] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [55] POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60 POP-PTPN60
## [61] SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11
## [67] SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 SPB-ESBD11 BUL-BULO34
## [73] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
## [79] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
## [85] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
## [91] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34
## [97] BUL-BULO34 BUL-BULO34 BUL-BULO34 BUL-BULO34 ROS-ROSM30 ROS-ROSM30
## [103] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [109] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [115] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [121] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30
## [127] ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 ROS-ROSM30 SOC-RUSO10 SOC-RUSO10
## [133] SOC-RUSO10 SOC-RUSO10 SOC-RUSO10 SOC-RUSO10 SOC-RUSO10 SOC-RUSO10
## [139] SOC-RUSO10 SOC-RUSO10 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [145] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [151] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [157] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [163] SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30 SER-SRNO30
## [169] SER-SRNO30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [175] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [181] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [187] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [193] TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30 TUA-TRLG30
## [199] TUA-TRLG30 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [205] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [211] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [217] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [223] TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29 TUH-TRHO29
## [229] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [235] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [241] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [247] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30
## [253] ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 ALD-ALDU30 CRO-CRPL30
## [259] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [265] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [271] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [277] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30
## [283] CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 CRO-CRPL30 GRC-GRCC25
## [289] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [295] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [301] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [307] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25
## [313] GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRC-GRCC25 GRA-GRAN08 GRA-GRAN08
## [319] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [325] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [331] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [337] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08
## [343] GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 GRA-GRAN08 ITP-ITVL09 ITP-ITVL09
## [349] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [355] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [361] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [367] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09
## [373] ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITP-ITVL09 ITR-ITRO17 ITR-ITRO17
## [379] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [385] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [391] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [397] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17
## [403] ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITR-ITRO17 ITB-ITBO15 ITB-ITBO15
## [409] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [415] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [421] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [427] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15
## [433] ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 ITB-ITBO15 MAL-MTLU39 MAL-MTLU39
## [439] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [445] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [451] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [457] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 MAL-MTLU39
## [463] MAL-MTLU39 MAL-MTLU39 MAL-MTLU39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [469] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [475] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [481] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [487] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SLO-SLGO39
## [493] SLO-SLGO39 SLO-SLGO39 SLO-SLGO39 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10
## [499] SPS-ESUP10 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10 SPS-ESUP10
## [505] SPS-ESUP10 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20
## [511] SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20
## [517] SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPC-ESVL20
## [523] SPC-ESVL20 SPC-ESVL20 SPC-ESVL20 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19
## [529] SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19
## [535] SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19
## [541] SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 SPM-ESMG19 FRS-FRMH45 FRS-FRMH45
## [547] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [553] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [559] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [565] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45
## [571] FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 FRS-FRMH45 STS-FRST30 STS-FRST30
## [577] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [583] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [589] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [595] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30
## [601] STS-FRST30 STS-FRST30 STS-FRST30 STS-FRST30 GES-ABSU05 GES-ABSU05
## [607] GES-ABSU05 GES-ABSU05 GES-ABSU05 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [613] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [619] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [625] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [631] BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28 BAR-ESBA28
## [637] BAR-ESBA28
## 24 Levels: POP-PTPN60 SPB-ESBD11 BUL-BULO34 ROS-ROSM30 ... BAR-ESBA28
Get the latitude and longitude
## [,1] [,2]
## [1,] 41.18555 -8.329371
## [2,] 41.18555 -8.329371
## [3,] 41.18555 -8.329371
## [4,] 41.18555 -8.329371
## [5,] 41.18555 -8.329371
## [6,] 41.18555 -8.329371
Add jitter
## x y
## [1,] 41.18050 -8.327255
## [2,] 41.18858 -8.333034
## [3,] 41.17833 -8.326968
## [4,] 41.17941 -8.335236
## [5,] 41.17853 -8.328774
## [6,] 41.18248 -8.330608
Add to object
Convert the data
##
## Converting data from a genind to a genpop object...
##
## ...done.
Get 1 mosquito per population, it is just to get the geographical coordinates
unique_populations <- unique(all_pops@pop)
selected_individuals <- integer(length(unique_populations))
for (i in seq_along(unique_populations)) {
inds_in_pop <- which(all_pops@pop == unique_populations[i])
selected_individuals[i] <- sample(inds_in_pop, 1)
}
all_pops_subset <- all_pops[selected_individuals, ]
Mantel test
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-2
Dgen <- dist.genpop(toto,method=2)
Dgeo <- dist(all_pops_subset$other$xy)
ibd <- mantel(
Dgen,
Dgeo,
method = "pearson",
permutations = 999,
strata = NULL,
na.rm = FALSE,
parallel = getOption("mc.cores")
)
Mantel statistic based on Pearson’s product-moment correlation
Call: mantel(xdis = Dgen, ydis = Dgeo, method = “pearson”, permutations = 999, strata = NULL, na.rm = FALSE, parallel = getOption(“mc.cores”))
Mantel statistic r: -0.008604 Significance: 0.517
Upper quantiles of permutations (null model): 90% 95% 97.5% 99% 0.148 0.191 0.231 0.276 Permutation: free Number of permutations: 999
plot(Dgeo, Dgen)
# A linear regression model (lm stands for "linear model") is fitted, with the genetic distances (Dgen) as the response variable and the geographic distances (Dgeo) as the predictor. The distances are transformed into vectors using as.vector because the dist function produces a matrix-like structure, but the linear regression function lm requires vectors.
dist_lm <- lm(as.vector(Dgen) ~ as.vector(Dgeo))
abline(dist_lm, col="red", lty=2)
Save Plot
# Plot it
# Start the PDF device
CairoPDF(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/Genetic_v_Geog_distance_microsats_overlap.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
dev.off()
Save Plot without equation
# Plot it
# Start the PDF device
CairoPDF(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/Genetic_v_Geog_distance_microsats_overlap_noequ.pdf"))
plot(Dgeo, Dgen, main = "Genetic Distance vs Geographic Distance")
abline(dist_lm, col="red", lty=2)
# Extracting the coefficients from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Generating the equation string
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
#text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.85, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
dev.off()
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
#myPal <-
# colorRampPalette(c("white", "blue", "gold", "orange", "red"))
myPal <-
colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
Save plot
library(MASS)
CairoPDF(here(
"/gpfs/gibbs/pi/caccone/mkc54/albo/europe/output/fst/overlap/microsats/IDB_PlotFromMASS_density_microsats_overlap_purple.pdf"))
dens <- kde2d(as.vector(Dgeo), as.vector(Dgen), n = 500)
#myPal <-
colorRampPalette(c("white", "blue", "gold", "orange", "red"))
myPal <-
colorRampPalette(c("white", "purple", "gold", "orange", "red"))
plot(Dgeo, Dgen, pch = 20, cex = .3, bty = "n")
image(dens, col = transp(myPal(300), .7), add = TRUE)
abline(dist_lm)
# Extracting the coefficients and R^2 from the linear model
intercept <- coef(dist_lm)[1]
slope <- coef(dist_lm)[2]
r2 <- summary(dist_lm)$r.squared
# Constructing the equation and R^2 strings
equation <- sprintf("y = %.2fx + %.2f", slope, intercept)
r2_label <- sprintf("R^2 = %.2f", r2)
# Adding the equation and R^2 to the plot
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.95, labels = equation)
#text(x = max(as.vector(Dgeo)) * 0.8, y = max(as.vector(Dgen)) * 0.90, labels = r2_label)
title("Isolation by distance")
dev.off()