This is an R Markdown document. The data for this analysis was collected on 23 January 2017. I have a pool of paired-end 100 sequences from Cedrela species and I have subsetted the data to Cedrela odorata only. These sequences were obtained via hybridization capture, targeted enrichment, and short-read sequencing on the Illumina HiSeq 3000.

Protocol: Samtools workflow found here http://www.htslib.org/workflow/#mapping_to_variant for variant calling with GATK-> .vcf files .vcf filters: –max-missing 0.85 –remove-indels –thin 10 –maf 0.05 –max-alleles 2 –min-alleles 2 –missing-indv (individuals with >15% missing data)

#load packages
library(rgdal) # used to read world map data
library(rgeos) # to fortify without needing gpclib
library(maptools)
library(ggplot2)
library(scales) # for formatting ggplot scales with commas
library(gridExtra)
library(maps)

Data Code

#FSTs----

#NvsS (more or less Central American v. South America)
nvs<-read.csv("NvsS.weir.fst.rand.csv",header=1)
ggplot(nvs, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density()+theme_bw()

nvs$test<-rep("nvs", length(1000))

#NvsSvsM (divided by latitude into three equal groups)
nvsvm<-read.csv("NvsSvsM.weir.fst.rand.csv", header=1)
ggplot(nvsvm, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density()+theme_bw()

nvsvm$test<-rep("nvsvm", length(1000))


#CAvsSA Central America v South America
cavsa<-read.csv("CAvsSA.weir.fst.rand.csv",header=1)
ggplot(cavsa, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density()+theme_bw()

cavsa$test<-rep("cavsa", length(1000))

#Combine----
comp<-rbind(nvs,nvsvm,cavsa)
comp_plot<-ggplot(comp,aes(x=WEIR_AND_COCKERHAM_FST, color=test))+geom_density()+theme_bw()+
  xlab("Fst")+
  ylab("Density")+
  theme(legend.justification=c(1,0), legend.position=c(0.9,0.7),
        legend.background = element_blank())+
  scale_color_discrete(name="Analysis",
    breaks=c("nvs","nvsvm","cavsa"),
    labels=c("North v South","North v Middle v South","Central Am. v South Am."))

#ggsave("comp_plot.jpg",comp_plot,width=6,height = 4)

#more FST tests

#CR populations within Costa Rica
cr<-read.csv("20170616_CR.weir.fst.csv",header=1)
cr.plot<-ggplot(cr, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density(color="blue")+theme_bw()+
  xlab("Fst")+
  ylab("Density")

#PeruNvS Populations with in Peru
per.nvs<-read.csv("20170616_PeruNvS.weir.fst.csv",header=1)
per.nvs.plot<-ggplot(per.nvs, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density(color="darkorange")+theme_bw()+
  xlab("Fst")+
  ylab("Density")

#Peru.Ec Populations in North Peru and Ecuador
per.ec<-read.csv("20170616_Peru.Ec2.weir.fst.csv",header=1)
per.ec.plot<-ggplot(per.ec, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density(color="forestgreen")+theme_bw()+
  xlab("Fst")+
  ylab("Density")

#country Samples separated by country of origin
cntry<-read.csv("20170616_country.weir.fst.csv",header=1)
cntry.plot<-ggplot(cntry, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density()+theme_bw()+
  xlab("Fst")+
  ylab("Density")

#CAvSA different sites and samples
cavsa2<-read.csv("20170616_CAvSA.weir.fst.csv",header=1)
cavsa2.plot<-ggplot(cavsa2, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density(color="firebrick")+theme_bw()+
  xlab("Fst")+
  ylab("Density")

#Fst with all spp. 
spp<-read.csv("20170617_spp.weir.fst.csv",header=1)
spp.plot<-ggplot(spp, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density()+theme_bw()+
  xlab("Fst")+
  ylab("Density")+ xlim(0,1.1)

#fst/maf

#maf = minor allele frequency
#CAvSA
maf.cavsa<-read.csv("20170619_CAvSA.frq.fst.csv", header=1)
maf.cavsa.plot<-ggplot(maf.cavsa, aes(x=fst, y=minor.frq))+geom_point(color="firebrick")+
  theme_bw()+
  xlab("Fst")+
  ylab("Minor Allele Frequency")

#Peru.Ec
maf.peru.ec<-read.csv("20170619_Peru.Ec2.frq.fst.csv",header=1)
maf.peru.ec.plot<-ggplot(maf.peru.ec, aes(x=fst, y=minor.frq))+geom_point(color="forestgreen")+
  theme_bw()+
  xlab("Fst")+
  ylab("Minor Allele Frequency")

#Peru NvS
maf.peru.nvs<-read.csv("20170619_PeruNvS.frq.fst.csv",header=1)
maf.peru.nvs.plot<-ggplot(maf.peru.nvs, aes(x=fst, y=minor.frq))+geom_point(color="darkorange")+
  theme_bw()+
  xlab("Fst")+
  ylab("Minor Allele Frequency")

#All spp.
maf.spp<-read.csv("20170619_spp.frq.fst.csv",header=1)
maf.spp.plot<-ggplot(maf.spp, aes(x=fst, y=minor.frq))+geom_point(color="turquoise")+
  theme_bw()+
  xlab("Fst")+
  ylab("Minor Allele Frequency")

#Country
maf.cntry<-read.csv("20170619_country.frq.fst.csv",header=1)
maf.cntry.plot<-ggplot(maf.cntry, aes(x=fst, y=minor.frq))+geom_point()+
  theme_bw()+
  xlab("Fst")+
  ylab("Minor Allele Frequency")

#Costa Rica
maf.CR<-read.csv("20170619_CR.frq.fst.csv",header=1)
maf.CR.plot<-ggplot(maf.CR, aes(x=fst, y=minor.frq))+geom_point(color="blue")+
  theme_bw()+
  xlab("Fst")+
  ylab("Minor Allele Frequency")

Map Code

# Data from http://thematicmapping.org/downloads/world_borders.php.
# Direct link: http://thematicmapping.org/downloads/TM_WORLD_BORDERS_SIMPL-0.3.zip
# Unpack and put the files in a dir 'data'

worldMap <- readOGR(dsn="TM_WORLD_BORDERS-0.3.shp", layer="TM_WORLD_BORDERS-0.3")
## OGR data source with driver: ESRI Shapefile 
## Source: "TM_WORLD_BORDERS-0.3.shp", layer: "TM_WORLD_BORDERS-0.3"
## with 246 features
## It has 11 fields
## Integer64 fields read as strings:  POP2005
# Change "data" to your path in the above!
worldMap.fort <- fortify(worldMap, region = "ISO3")
# Fortifying a map makes the data frame ggplot uses to draw the map outlines.
# "region" or "id" identifies those polygons, and links them to your data. 
# Look at head(worldMap@data) to see other choices for id.
# Your data frame needs a column with matching ids to set as the map_id aesthetic in ggplot. 
idList <- worldMap@data$ISO3
# "coordinates" extracts centroids of the polygons, in the order listed at worldMap@data
centroids.df <- as.data.frame(coordinates(worldMap))
names(centroids.df) <- c("Longitude", "Latitude")  #more sensible column names
# This shapefile contained population data, let's plot it.
popList <- worldMap@data$POP2005

pop.df <- data.frame(id = idList, centroids.df)

Labs<-read.csv("Labels.csv")
indvs<-read.csv("20170615_mapping.csv")
names(indvs)[2]<-"LAT"
names(indvs)[1]<-"LONG"
all.spp<-read.csv("20170617_mapping.csv",header=1)
names(all.spp)[11]<-"LAT"
names(all.spp)[12]<-"LONG"

cr_map<-ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object 
  geom_map(fill="white",colour= "black", map = worldMap.fort) +
  expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
  coord_equal(xlim = c(-86,-83), ylim = c(8, 11)) + #let's view South America
  labs(x = "Longitude", y = "Latitude", title = "Costa Rica") +
  theme_bw()+
  geom_point(aes(map_id=NA,x = LONG, y = LAT, color=CR), 
             size = 2, data = indvs)+
  scale_color_manual(values=c("turquoise", "deepskyblue", "blue", "black"))+
  theme(legend.position="none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
## Warning: Ignoring unknown aesthetics: map_id
per.nvs_map<-ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object 
  geom_map(fill="white",colour= "black", map = worldMap.fort) +
  expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
  coord_equal(xlim = c(-80,-65), ylim = c(-20,0)) + #let's view South America
  labs(x = "Longitude", y = "Latitude", title = "Peru") +
  theme_bw()+
  geom_point(aes(map_id=NA,x = LONG, y = LAT, color=Peru.NvS), 
             size = 2, data = indvs)+
  scale_color_manual(values=c("darkorange", "orangered", "black"))+
  theme(legend.position="none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
## Warning: Ignoring unknown aesthetics: map_id
per.ec_map<-ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object 
  geom_map(fill="white",colour= "black", map = worldMap.fort) +
  expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
  coord_equal(xlim = c(-81,-70), ylim = c(-7,1)) + #let's view South America
  labs(x = "Longitude", y = "Latitude", title = "Ecuador and Peru") +
  theme_bw()+
  geom_point(aes(map_id=NA,x = LONG, y = LAT, color=NEcvP), 
             size = 2, data = indvs)+
  scale_color_manual(values=c("olivedrab3", "forestgreen","black"))+
  theme(legend.position="none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
## Warning: Ignoring unknown aesthetics: map_id
cntry_map<-ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object 
  geom_map(fill="white",colour= "black", map = worldMap.fort) +
  expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
  coord_equal(xlim = c(-90,-60), ylim = c(-25, 20)) + #let's view South America
  labs(x = "Longitude", y = "Latitude", title = "Country") +
  theme_bw()+
  geom_point(aes(map_id=NA,x = LONG, y = LAT, color=Country), 
             size = 2, data = indvs)+
  #scale_color_manual(values=c("green", "red", "blue","black"))+
  theme(legend.position="none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
## Warning: Ignoring unknown aesthetics: map_id
cavsa2_map<-ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object 
  geom_map(fill="white",colour= "black", map = worldMap.fort) +
  expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
  coord_equal(xlim = c(-90,-60), ylim = c(-25, 20)) + #let's view South America
  labs(x = "Longitude", y = "Latitude", title = "Continent") +
  theme_bw()+
  geom_point(aes(map_id=NA,x = LONG, y = LAT, color=CAvsSA), 
             size = 2, data = indvs)+
  scale_color_manual(values=c("lightcoral", "firebrick"))+
  theme(legend.position="none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
## Warning: Ignoring unknown aesthetics: map_id
spp.map<-ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object 
  geom_map(fill="white",colour= "black", map = worldMap.fort) +
  expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
  coord_equal(xlim = c(-90,-60), ylim = c(-25, 20)) + #let's view South America
  labs(x = "Longitude", y = "Latitude", title = "Species") +
  theme_bw()+
  geom_point(aes(map_id=NA,x = LONG, y = LAT, color=Species), 
             size = 2, data = all.spp)+
  scale_color_manual(values=c("#E69F00", "#56B4E9", "#009E73", "#0072B2", "#D55E00", "#CC79A7"), labels=c("C. angustifolia", "C.fissilis", "C. montana", "C. nebulosa","C. odorata", "C. saltensis"))+
  #geom_label(aes(map_id=NA, x=LONG, y=LAT,label=lab), data=Labs)+
  theme(legend.position="none",
        legend.text = element_text(face = "italic"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
## Warning: Ignoring unknown aesthetics: map_id

Plots

fsts<-grid.arrange(cr.plot,per.nvs.plot,per.ec.plot,cntry.plot,cavsa2.plot,cr_map,per.nvs_map,per.ec_map,cntry_map,cavsa2_map,nrow=2)

#ggsave("20170616_fsts.jpg",fsts,width=12,height=6)

Figure 1. FST distributions and maps showing partitions. Black points were excluded.

fst.spp<-grid.arrange(spp.plot,spp.map,ncol=2)
## Warning: Removed 91 rows containing non-finite values (stat_density).

#ggsave("20170617_fst.spp.jpg",fst.spp,width=8,height=5)

Figure 2. FST distribution and map for all species groups.

fst.maf<-grid.arrange(maf.spp.plot,maf.cntry.plot,maf.cavsa.plot,maf.peru.nvs.plot,spp.map,cntry_map,cavsa2_map,per.nvs_map,nrow=2)

#ggsave("20170619_fst.maf.jpg",fst.maf,width=12,height=6)

Figure 3. Scatterplot shows Minor Allele Frequency plotted with Fst, data paired. Maps show partitions used to calculate Fst.

comp_plot

Figure 4. Combined FST distributions.