OutFLANK analysis with all SNPs
FstDataFrame <- MakeDiploidFSTMat(SNPdata,locinames,cic_met$fishing_pressure)
## Calculating FSTs, may take a few minutes...
## [1] "10000 done of 54869"
## [1] "20000 done of 54869"
## [1] "30000 done of 54869"
## [1] "40000 done of 54869"
## [1] "50000 done of 54869"
FstDataFrame$PlotPos <- 1:nrow(FstDataFrame)
FstDataFrame$Chrom<- cic_rds$chromosome_int
## FST Correlation
plot(FstDataFrame$FST, FstDataFrame$FSTNoCorr,
xlim=c(-0.01,0.3), ylim=c(-0.01,0.3),
pch=20)
abline(0,1)

k <- 3 ## Number of pops
out_ini <- OutFLANK(FstDataFrame, NumberOfSamples=k) ## Run outflank on FST dataframe
# Plot results to compare chi-squared distribution vs. actual FST distribution
OutFLANKResultsPlotter(out_ini, withOutliers = TRUE,
NoCorr = TRUE, Hmin = 0.1, binwidth = 0.001, Zoom =
FALSE, RightZoomFraction = 0.05, titletext = NULL)

## Poor fit, particularly on right tail
OutFLANKResultsPlotter(out_ini, withOutliers = TRUE,
NoCorr = TRUE, Hmin = 0.1, binwidth = 0.001, Zoom =
TRUE, RightZoomFraction = 0.15, titletext = NULL)

# Histogram of P-values weird
hist(out_ini$results$pvaluesRightTail,breaks = 20)

#### LD Pruning ####
G<-add_code256(big_copy(t(cic_rds$G),type="raw"),code=bigsnpr:::CODE_012)
## Warning: Assignment will down cast from double to raw.
## Hint: To remove this warning, use options(bigstatsr.typecast.warning = FALSE).
newpc<-snp_autoSVD(G=G,infos.chr =cic_rds$chromosome_int,infos.pos = cic_rds$position)
## Phase of clumping at r2 > 0.2.. keep 10811 SNPs.
##
## Iteration 1:
## Computing SVD..
## 10 long-range LD regions were detected..
##
## Iteration 2:
## Computing SVD..
##
## Converged!
plot(newpc) # 3 k groups

which_pruned <- attr(newpc, which="subset") # Indexes of remaining SNPS after pruning
#### Evaluating OutFLANK with pruned data ####
out_trim <- OutFLANK(FstDataFrame[which_pruned,], NumberOfSamples=3)
head(out_trim$results)
## LocusName He FST T1 T2 FSTNoCorr
## 1 1_3190 0.02631111 -0.0106245867 -1.402192e-04 0.01319762 0.009626817
## 2 1_3203 0.08897778 0.0036302230 1.627347e-04 0.04482773 0.022694379
## 5 1_3311 0.02631111 0.0163152439 2.172200e-04 0.01331393 0.035663338
## 9 1_3340 0.03920000 0.0027356447 5.402162e-05 0.01974731 0.022401055
## 13 1_3376 0.15831111 0.0411934201 3.325202e-03 0.08072168 0.058098202
## 15 1_3424 0.24080000 0.0006484586 7.864950e-05 0.12128685 0.021967405
## T1NoCorr T2NoCorr meanAlleleFreq PlotPos Chrom indexOrder GoodH
## 1 0.0001270551 0.01319804 0.98666667 1 1 1 lowH
## 2 0.0010173685 0.04482910 0.04666667 2 1 2 lowH
## 5 0.0004748338 0.01331434 0.98666667 5 1 3 lowH
## 9 0.0004423745 0.01974793 0.98000000 9 1 4 lowH
## 13 0.0046899113 0.08072386 0.91333333 13 1 5 goodH
## 15 0.0026644483 0.12129099 0.86000000 15 1 6 goodH
## qvalues pvalues pvaluesRightTail OutlierFlag
## 1 0.9999584 0.4400478 0.7799761 FALSE
## 2 0.9999584 0.9743826 0.5128087 FALSE
## 5 0.9999584 0.6580625 0.3290313 FALSE
## 9 0.9999584 0.9642785 0.5178607 FALSE
## 13 0.9999584 0.2966123 0.1483061 FALSE
## 15 0.9999584 0.9491832 0.5254084 FALSE
OutFLANKResultsPlotter(out_trim, withOutliers = TRUE,
NoCorr = TRUE, Hmin = 0.1, binwidth = 0.001, Zoom =
FALSE, RightZoomFraction = 0.15, titletext = NULL)

OutFLANKResultsPlotter(out_trim, withOutliers = TRUE,
NoCorr = TRUE, Hmin = 0.1, binwidth = 0.001, Zoom =
TRUE, RightZoomFraction = 0.10, titletext = NULL)

# Decent distribution fit, no trimming needed.
hist(out_trim$results$pvaluesRightTail,breaks = 20)

# still a conservative histogram, which is typical of outflank
top_candidates <- out_trim$results$qvalues<0.01
# outliers identified conservatively based on qvalue less than 0.01
Plotting
# Plot FST for all SNPs (pre-pruning)
plot(FstDataFrame$PlotPos, FstDataFrame$FST,
col=grey((FstDataFrame$Chrom%%2+1)/3),
pch=20, main = "Pre-pruning (with post-pruned outliers)")
points(out_trim$results$PlotPos[top_candidates], out_trim$results$FST[top_candidates],
pch=21, cex=2, col=2)

# Simple manhattan plot with only indexed SNPs
# Note this is does not represent positions on the contigs, simply a way of roughly visuallizing
# relative position of outlier SNPs at each of the three contigs
plot(out_trim$results$PlotPos, out_trim$results$FST,
col=grey(((locinames_update[which_pruned,]$a)%%2+1)/3),
pch=20, main="Post-pruning")
# Circle outlier SNPs
points(out_trim$results$PlotPos[top_candidates], out_trim$results$FST[top_candidates],
pch=21, cex=2, col=2)
