This is the R script for doing Bulk Segregant Analysis in maize shoepeg population taking cycle zero and cycle 1 as two bulk to map out plant height

setwd("C:/Users/Abiskar Gyawali/Desktop/BSA_Tall_short/")
newmbf = read.delim("Combined_cycle_0.txt", header=T)
lengths = read.table("maize.genom", header=T)

###create a vector of cosecutive lenght
consec_pos = matrix(nrow=length(newmbf[,1]), ncol=1)
consec_pos[(which(newmbf[,1] == "1")),] = newmbf[which(newmbf[,1] == "1"),2]

##loop throught the remaining chromosome

for(i in 2:10){
  chromo=paste(i,sep="")
  consec_pos[(which(newmbf[,1] ==chromo)),] =
    (newmbf[(which(newmbf[,1] == chromo)),2]) + (lengths[(i-1),3])
}


##attach this new vector to exisiting matrix
newmbf = cbind(newmbf, consec_pos)

colnames(newmbf)

##  [1] "chr"               "Pos"               "Number.of.Taxa"   
##  [4] "Ref"               "Alt"               "Cycle0_REF_Allele"
##  [7] "Cycle0_REF_count"  "Cycle0_REF_Freq"   "Cycle0_ALT_Allele"
## [10] "Cycle0_ALT_count"  "Cycle0_ALT_Freq"   "Cycle1_REF_Allele"
## [13] "Cycle1_REF_count"  "Cycle1_REF_Freq"   "Cycle1_ALT_Allele"
## [16] "Cycle1_ALT_count"  "Cycle1_ALT_Freq"   "consec_pos"

mbfPH <- newmbf[,c(1,2,4,5,7,10,8,11,13,16,14,17,18)]

mbfPH <- mbfPH[-(which((mbfPH[,5]+mbfPH[,6])<100)),]
mbfPH <- mbfPH[-(which((mbfPH[,9]+mbfPH[,10])<100)),]

mbfPH <- mbfPH[order(mbfPH[,"consec_pos"], decreasing=F),]

###computing Z stat

ztest_ph = matrix(nrow=length(mbfPH[,1]), ncol = 14)
colnames(ztest_ph) = c("Pos","B73.Freq.Diff","count1","count2","sample1",
                       "sample2","p_hat","Z","Avg_Z","testable_z", "P_value", "-log10(P_value)","chr","consec")
colnames(ztest_ph)

##  [1] "Pos"             "B73.Freq.Diff"   "count1"         
##  [4] "count2"          "sample1"         "sample2"        
##  [7] "p_hat"           "Z"               "Avg_Z"          
## [10] "testable_z"      "P_value"         "-log10(P_value)"
## [13] "chr"             "consec"

colnames(mbfPH)

##  [1] "chr"              "Pos"              "Ref"             
##  [4] "Alt"              "Cycle0_REF_count" "Cycle0_ALT_count"
##  [7] "Cycle0_REF_Freq"  "Cycle0_ALT_Freq"  "Cycle1_REF_count"
## [10] "Cycle1_ALT_count" "Cycle1_REF_Freq"  "Cycle1_ALT_Freq" 
## [13] "consec_pos"

ztest_ph[,1] = mbfPH[,2]
ztest_ph[,2] = mbfPH[,7]-mbfPH[,11]
ztest_ph[,3] = mbfPH[,5]
ztest_ph[,4] = mbfPH[,9]
ztest_ph[,5] = mbfPH[,5]+mbfPH[,6]
ztest_ph[,6] = mbfPH[,9]+mbfPH[,10]
ztest_ph[,7] = ((ztest_ph[,"count1"]+ztest_ph[,"count2"])/(ztest_ph[,"sample1"]+ztest_ph[,"sample2"]))
ztest_ph[,8] = (ztest_ph[,2])/(sqrt((ztest_ph[,7])*(1-(ztest_ph[,7]))*((1/ztest_ph[,"sample1"])+(1/ztest_ph[,"sample2"]))))
ztest_ph[(which(mbfPH[,1]=="1")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="1")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="2")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="2")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="3")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="3")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="4")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="4")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="5")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="5")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="6")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="6")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="7")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="7")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="8")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="8")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="9")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="9")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="10")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="10")),8]), width=15, FUN=mean, fill=NA)

ztest_ph[,10] =-(abs(ztest_ph[,9]))
ztest_ph[,11] = 2*(pnorm(ztest_ph[,10],mean=0,sd=1))
ztest_ph[,12] =-(log10(ztest_ph[,11]))
ztest_ph[,13] = as.character(mbfPH[,1])
ztest_ph[,14] = mbfPH[,13]

## Warning: Ignoring unknown parameters: NA

## Warning: Removed 155 rows containing missing values (geom_point).

Bulk Segregant Analysis

Abiskar

December 7, 2017

This is the R script for doing Bulk Segregant Analysis in maize shoepeg population taking cycle zero and cycle 1 as two bulk to map out plant height