This is an R Markdown document. The data for this analysis was collected on 23 January 2017. I have a pool of paired-end 100 sequences from Cedrela species and I have subsetted the data to Cedrela odorata only. These sequences were obtained via hybridization capture, targeted enrichment, and short-read sequencing on the Illumina HiSeq 3000.
Protocol: Samtools workflow found here http://www.htslib.org/workflow/#mapping_to_variant for variant calling with GATK-> .vcf files .vcf filters: –max-missing 0.85 –remove-indels –thin 10 –maf 0.05 –max-alleles 2 –min-alleles 2 –missing-indv (individuals with >15% missing data)
#load packages
library(rgdal) # used to read world map data
library(rgeos) # to fortify without needing gpclib
library(maptools)
library(ggplot2)
library(scales) # for formatting ggplot scales with commas
library(gridExtra)
library(maps)
#FSTs----
#NvsS (more or less Central American v. South America)
nvs<-read.csv("NvsS.weir.fst.rand.csv",header=1)
ggplot(nvs, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density()+theme_bw()
nvs$test<-rep("nvs", length(1000))
#NvsSvsM (divided by latitude into three equal groups)
nvsvm<-read.csv("NvsSvsM.weir.fst.rand.csv", header=1)
ggplot(nvsvm, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density()+theme_bw()
nvsvm$test<-rep("nvsvm", length(1000))
#CAvsSA Central America v South America
cavsa<-read.csv("CAvsSA.weir.fst.rand.csv",header=1)
ggplot(cavsa, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density()+theme_bw()
cavsa$test<-rep("cavsa", length(1000))
#Combine----
comp<-rbind(nvs,nvsvm,cavsa)
comp_plot<-ggplot(comp,aes(x=WEIR_AND_COCKERHAM_FST, color=test))+geom_density()+theme_bw()+
xlab("Fst")+
ylab("Density")+
theme(legend.justification=c(1,0), legend.position=c(0.9,0.7),
legend.background = element_blank())+
scale_color_discrete(name="Analysis",
breaks=c("nvs","nvsvm","cavsa"),
labels=c("North v South","North v Middle v South","Central Am. v South Am."))
#ggsave("comp_plot.jpg",comp_plot,width=6,height = 4)
#more FST tests
#CR populations within Costa Rica
cr<-read.csv("20170616_CR.weir.fst.csv",header=1)
cr.plot<-ggplot(cr, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density(color="blue")+theme_bw()+
xlab("Fst")+
ylab("Density")
#PeruNvS Populations with in Peru
per.nvs<-read.csv("20170616_PeruNvS.weir.fst.csv",header=1)
per.nvs.plot<-ggplot(per.nvs, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density(color="darkorange")+theme_bw()+
xlab("Fst")+
ylab("Density")
#Peru.Ec Populations in North Peru and Ecuador
per.ec<-read.csv("20170616_Peru.Ec2.weir.fst.csv",header=1)
per.ec.plot<-ggplot(per.ec, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density(color="forestgreen")+theme_bw()+
xlab("Fst")+
ylab("Density")
#country Samples separated by country of origin
cntry<-read.csv("20170616_country.weir.fst.csv",header=1)
cntry.plot<-ggplot(cntry, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density()+theme_bw()+
xlab("Fst")+
ylab("Density")
#CAvSA different sites and samples
cavsa2<-read.csv("20170616_CAvSA.weir.fst.csv",header=1)
cavsa2.plot<-ggplot(cavsa2, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density(color="firebrick")+theme_bw()+
xlab("Fst")+
ylab("Density")
#Fst with all spp.
spp<-read.csv("20170617_spp.weir.fst.csv",header=1)
spp.plot<-ggplot(spp, aes(x=WEIR_AND_COCKERHAM_FST))+geom_density()+theme_bw()+
xlab("Fst")+
ylab("Density")+ xlim(0,1.1)
#fst/maf
#maf = minor allele frequency
#CAvSA
maf.cavsa<-read.csv("20170619_CAvSA.frq.fst.csv", header=1)
maf.cavsa.plot<-ggplot(maf.cavsa, aes(x=fst, y=minor.frq))+geom_point(color="firebrick")+
theme_bw()+
xlab("Fst")+
ylab("Minor Allele Frequency")
#Peru.Ec
maf.peru.ec<-read.csv("20170619_Peru.Ec2.frq.fst.csv",header=1)
maf.peru.ec.plot<-ggplot(maf.peru.ec, aes(x=fst, y=minor.frq))+geom_point(color="forestgreen")+
theme_bw()+
xlab("Fst")+
ylab("Minor Allele Frequency")
#Peru NvS
maf.peru.nvs<-read.csv("20170619_PeruNvS.frq.fst.csv",header=1)
maf.peru.nvs.plot<-ggplot(maf.peru.nvs, aes(x=fst, y=minor.frq))+geom_point(color="darkorange")+
theme_bw()+
xlab("Fst")+
ylab("Minor Allele Frequency")
#All spp.
maf.spp<-read.csv("20170619_spp.frq.fst.csv",header=1)
maf.spp.plot<-ggplot(maf.spp, aes(x=fst, y=minor.frq))+geom_point(color="turquoise")+
theme_bw()+
xlab("Fst")+
ylab("Minor Allele Frequency")
#Country
maf.cntry<-read.csv("20170619_country.frq.fst.csv",header=1)
maf.cntry.plot<-ggplot(maf.cntry, aes(x=fst, y=minor.frq))+geom_point()+
theme_bw()+
xlab("Fst")+
ylab("Minor Allele Frequency")
#Costa Rica
maf.CR<-read.csv("20170619_CR.frq.fst.csv",header=1)
maf.CR.plot<-ggplot(maf.CR, aes(x=fst, y=minor.frq))+geom_point(color="blue")+
theme_bw()+
xlab("Fst")+
ylab("Minor Allele Frequency")
# Data from http://thematicmapping.org/downloads/world_borders.php.
# Direct link: http://thematicmapping.org/downloads/TM_WORLD_BORDERS_SIMPL-0.3.zip
# Unpack and put the files in a dir 'data'
worldMap <- readOGR(dsn="TM_WORLD_BORDERS-0.3.shp", layer="TM_WORLD_BORDERS-0.3")
## OGR data source with driver: ESRI Shapefile
## Source: "TM_WORLD_BORDERS-0.3.shp", layer: "TM_WORLD_BORDERS-0.3"
## with 246 features
## It has 11 fields
## Integer64 fields read as strings: POP2005
# Change "data" to your path in the above!
worldMap.fort <- fortify(worldMap, region = "ISO3")
# Fortifying a map makes the data frame ggplot uses to draw the map outlines.
# "region" or "id" identifies those polygons, and links them to your data.
# Look at head(worldMap@data) to see other choices for id.
# Your data frame needs a column with matching ids to set as the map_id aesthetic in ggplot.
idList <- worldMap@data$ISO3
# "coordinates" extracts centroids of the polygons, in the order listed at worldMap@data
centroids.df <- as.data.frame(coordinates(worldMap))
names(centroids.df) <- c("Longitude", "Latitude") #more sensible column names
# This shapefile contained population data, let's plot it.
popList <- worldMap@data$POP2005
pop.df <- data.frame(id = idList, centroids.df)
Labs<-read.csv("Labels.csv")
indvs<-read.csv("20170615_mapping.csv")
names(indvs)[2]<-"LAT"
names(indvs)[1]<-"LONG"
all.spp<-read.csv("20170617_mapping.csv",header=1)
names(all.spp)[11]<-"LAT"
names(all.spp)[12]<-"LONG"
cr_map<-ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object
geom_map(fill="white",colour= "black", map = worldMap.fort) +
expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
coord_equal(xlim = c(-86,-83), ylim = c(8, 11)) + #let's view South America
labs(x = "Longitude", y = "Latitude", title = "Costa Rica") +
theme_bw()+
geom_point(aes(map_id=NA,x = LONG, y = LAT, color=CR),
size = 2, data = indvs)+
scale_color_manual(values=c("turquoise", "deepskyblue", "blue", "black"))+
theme(legend.position="none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
## Warning: Ignoring unknown aesthetics: map_id
per.nvs_map<-ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object
geom_map(fill="white",colour= "black", map = worldMap.fort) +
expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
coord_equal(xlim = c(-80,-65), ylim = c(-20,0)) + #let's view South America
labs(x = "Longitude", y = "Latitude", title = "Peru") +
theme_bw()+
geom_point(aes(map_id=NA,x = LONG, y = LAT, color=Peru.NvS),
size = 2, data = indvs)+
scale_color_manual(values=c("darkorange", "orangered", "black"))+
theme(legend.position="none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
## Warning: Ignoring unknown aesthetics: map_id
per.ec_map<-ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object
geom_map(fill="white",colour= "black", map = worldMap.fort) +
expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
coord_equal(xlim = c(-81,-70), ylim = c(-7,1)) + #let's view South America
labs(x = "Longitude", y = "Latitude", title = "Ecuador and Peru") +
theme_bw()+
geom_point(aes(map_id=NA,x = LONG, y = LAT, color=NEcvP),
size = 2, data = indvs)+
scale_color_manual(values=c("olivedrab3", "forestgreen","black"))+
theme(legend.position="none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
## Warning: Ignoring unknown aesthetics: map_id
cntry_map<-ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object
geom_map(fill="white",colour= "black", map = worldMap.fort) +
expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
coord_equal(xlim = c(-90,-60), ylim = c(-25, 20)) + #let's view South America
labs(x = "Longitude", y = "Latitude", title = "Country") +
theme_bw()+
geom_point(aes(map_id=NA,x = LONG, y = LAT, color=Country),
size = 2, data = indvs)+
#scale_color_manual(values=c("green", "red", "blue","black"))+
theme(legend.position="none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
## Warning: Ignoring unknown aesthetics: map_id
cavsa2_map<-ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object
geom_map(fill="white",colour= "black", map = worldMap.fort) +
expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
coord_equal(xlim = c(-90,-60), ylim = c(-25, 20)) + #let's view South America
labs(x = "Longitude", y = "Latitude", title = "Continent") +
theme_bw()+
geom_point(aes(map_id=NA,x = LONG, y = LAT, color=CAvsSA),
size = 2, data = indvs)+
scale_color_manual(values=c("lightcoral", "firebrick"))+
theme(legend.position="none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
## Warning: Ignoring unknown aesthetics: map_id
spp.map<-ggplot(pop.df, aes(map_id = id)) + #"id" is col in your df, not in the map object
geom_map(fill="white",colour= "black", map = worldMap.fort) +
expand_limits(x = worldMap.fort$long, y = worldMap.fort$lat) +
coord_equal(xlim = c(-90,-60), ylim = c(-25, 20)) + #let's view South America
labs(x = "Longitude", y = "Latitude", title = "Species") +
theme_bw()+
geom_point(aes(map_id=NA,x = LONG, y = LAT, color=Species),
size = 2, data = all.spp)+
scale_color_manual(values=c("#E69F00", "#56B4E9", "#009E73", "#0072B2", "#D55E00", "#CC79A7"), labels=c("C. angustifolia", "C.fissilis", "C. montana", "C. nebulosa","C. odorata", "C. saltensis"))+
#geom_label(aes(map_id=NA, x=LONG, y=LAT,label=lab), data=Labs)+
theme(legend.position="none",
legend.text = element_text(face = "italic"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
## Warning: Ignoring unknown aesthetics: map_id
fsts<-grid.arrange(cr.plot,per.nvs.plot,per.ec.plot,cntry.plot,cavsa2.plot,cr_map,per.nvs_map,per.ec_map,cntry_map,cavsa2_map,nrow=2)
#ggsave("20170616_fsts.jpg",fsts,width=12,height=6)
Figure 1. FST distributions and maps showing partitions. Black points were excluded.
fst.spp<-grid.arrange(spp.plot,spp.map,ncol=2)
## Warning: Removed 91 rows containing non-finite values (stat_density).
#ggsave("20170617_fst.spp.jpg",fst.spp,width=8,height=5)
Figure 2. FST distribution and map for all species groups.
fst.maf<-grid.arrange(maf.spp.plot,maf.cntry.plot,maf.cavsa.plot,maf.peru.nvs.plot,spp.map,cntry_map,cavsa2_map,per.nvs_map,nrow=2)
#ggsave("20170619_fst.maf.jpg",fst.maf,width=12,height=6)
Figure 3. Scatterplot shows Minor Allele Frequency plotted with Fst, data paired. Maps show partitions used to calculate Fst.
comp_plot
Figure 4. Combined FST distributions.