Three population 15CO, 15CL, 15CH (hereafter refer to as H, L and O). In total 222,569 SNPs were identified from sequencing, but the set of SNPs selected for the analysis was filtered to include high confidence SNP. First the SNPs were filtered for biallelic SNP (having only two allele). It was required that every SNP was observed 40 times in each population and no more than 505 times. 505 corresponds to mean SNP depth plus one standard deviations. After all the filtering, 179,324 high confidence SNP were used for downstream analysis. Fst value were averaged over 25 SNPs sliding window.
library(ggplot2)
library(gridExtra)
setwd("C:/Users/Abiskar Gyawali/Desktop/")
fst <- read.delim("Rajan_freq_filter.txt", header = T)
fst <- na.omit(fst)
### The function below will calculate Fst. Inputs are as follows:
### pop1Freqs: Vector of allele frequencies for each locus in pop1
### pop2Freqs: Vector of allele frequencies for each locus in pop2
### pop3Freqs: Vector of allele frequencies for each locus in pop3, if there
### is a pop3. If not, leave blank.
### simple: Should the simplest formula for Fst be used? If T, the
### formula used is Fst = s^2 / (mean(p) * (1- mean(p)) .
### if false, a better formula which corrects for small number
### of populations is used: Weir and Cockerham, 1984
vectorFst <- function(pop1Freqs,pop2Freqs,pop3Freqs=NULL,simple=F){
if(simple==T){
if(is.null(pop3Freqs)){
mean <- (pop1Freqs+pop2Freqs) / 2
var <- (pop1Freqs-mean)^2 + (pop2Freqs-mean)^2
Fst <- var / (mean*(1-mean))
}
else {
mean <- (pop1Freqs+pop2Freqs+pop3Freqs) / 3
var <- ( (pop1Freqs-mean)^2 + (pop2Freqs-mean)^2 + (pop3Freqs-mean)^2 ) /2
Fst <- var / (mean*(1-mean))
}
}
else if(simple==F){
if(is.null(pop3Freqs)){
mean <- (pop1Freqs+pop2Freqs) / 2
var <- (pop1Freqs-mean)^2 + (pop2Freqs-mean)^2
Fst <- var / {(mean*(1-mean)) + var/2}
}
else {
mean <- (pop1Freqs+pop2Freqs+pop3Freqs) / 3
var <- ( (pop1Freqs-mean)^2 + (pop2Freqs-mean)^2 + (pop3Freqs-mean)^2 ) /2
Fst <- var / { (mean*(1-mean)) +var / 3}
}
}
return(Fst)
}
#### Run Fst
fst$HL <- vectorFst(fst$CH_freq, fst$CL_freq, simple = F)
fst$HO <- vectorFst(fst$CH_freq, fst$CO_freq, simple = F)
fst$LO <- vectorFst(fst$CL_freq, fst$CO_freq, simple = F)
fst <- na.omit(fst)
#Set window size. Note: Full window size = (2 * half_window) + 1
half_window <- 12
# Do this for each combination of population
##HL
window_center <- seq(half_window + 1,length(fst$HL) - half_window)
#####define the variable
Sliding_HL <- numeric(174844) ####number of indices
for (i in window_center){
# Do shit
Sliding_HL[i] <- mean(fst$HL[(i- half_window):(i+half_window)])
}
##HO
window_center <- seq(half_window + 1,length(fst$HO) - half_window)
#####define the variable
Sliding_HO <- numeric(174844) ####number of indices
for (i in window_center){
# Do shit
Sliding_HO[i] <- mean(fst$HO[(i- half_window):(i+half_window)])
}
##LO
window_center <- seq(half_window + 1,length(fst$LO) - half_window)
#####define the variable
Sliding_LO <- numeric(174844) ####number of indices
for (i in window_center){
# Do shit
Sliding_LO[i] <- mean(fst$LO[(i- half_window):(i+half_window)])
}
fst <- cbind(fst, Sliding_HL, Sliding_HO, Sliding_LO)
Part 1: Loading the fst as per the chromosome
sliding_chr1 <- newdata[which(newdata$Chromosome == '1'),]
sliding_chr2 <- newdata[which(newdata$Chromosome == '2'),]
sliding_chr3 <- newdata[which(newdata$Chromosome == '3'),]
sliding_chr4 <- newdata[which(newdata$Chromosome == '4'),]
sliding_chr5 <- newdata[which(newdata$Chromosome == '5'),]
sliding_chr6 <- newdata[which(newdata$Chromosome == '6'),]
sliding_chr7 <- newdata[which(newdata$Chromosome == '7'),]
sliding_chr8 <- newdata[which(newdata$Chromosome == '8'),]
sliding_chr9 <- newdata[which(newdata$Chromosome == '9'),]
sliding_chr10 <- newdata[which(newdata$Chromosome == '10'),]