This is the R script for doing Bulk Segregant Analysis in maize shoepeg population taking cycle zero and cycle 1 as two bulk to map out plant height
setwd("C:/Users/Abiskar Gyawali/Desktop/BSA_Tall_short/")
newmbf = read.delim("Combined_cycle_0.txt", header=T)
lengths = read.table("maize.genom", header=T)
###create a vector of cosecutive lenght
consec_pos = matrix(nrow=length(newmbf[,1]), ncol=1)
consec_pos[(which(newmbf[,1] == "1")),] = newmbf[which(newmbf[,1] == "1"),2]
##loop throught the remaining chromosome
for(i in 2:10){
chromo=paste(i,sep="")
consec_pos[(which(newmbf[,1] ==chromo)),] =
(newmbf[(which(newmbf[,1] == chromo)),2]) + (lengths[(i-1),3])
}
##attach this new vector to exisiting matrix
newmbf = cbind(newmbf, consec_pos)
colnames(newmbf)
## [1] "chr" "Pos" "Number.of.Taxa"
## [4] "Ref" "Alt" "Cycle0_REF_Allele"
## [7] "Cycle0_REF_count" "Cycle0_REF_Freq" "Cycle0_ALT_Allele"
## [10] "Cycle0_ALT_count" "Cycle0_ALT_Freq" "Cycle1_REF_Allele"
## [13] "Cycle1_REF_count" "Cycle1_REF_Freq" "Cycle1_ALT_Allele"
## [16] "Cycle1_ALT_count" "Cycle1_ALT_Freq" "consec_pos"
mbfPH <- newmbf[,c(1,2,4,5,7,10,8,11,13,16,14,17,18)]
mbfPH <- mbfPH[-(which((mbfPH[,5]+mbfPH[,6])<100)),]
mbfPH <- mbfPH[-(which((mbfPH[,9]+mbfPH[,10])<100)),]
mbfPH <- mbfPH[order(mbfPH[,"consec_pos"], decreasing=F),]
###computing Z stat
ztest_ph = matrix(nrow=length(mbfPH[,1]), ncol = 14)
colnames(ztest_ph) = c("Pos","B73.Freq.Diff","count1","count2","sample1",
"sample2","p_hat","Z","Avg_Z","testable_z", "P_value", "-log10(P_value)","chr","consec")
colnames(ztest_ph)
## [1] "Pos" "B73.Freq.Diff" "count1"
## [4] "count2" "sample1" "sample2"
## [7] "p_hat" "Z" "Avg_Z"
## [10] "testable_z" "P_value" "-log10(P_value)"
## [13] "chr" "consec"
colnames(mbfPH)
## [1] "chr" "Pos" "Ref"
## [4] "Alt" "Cycle0_REF_count" "Cycle0_ALT_count"
## [7] "Cycle0_REF_Freq" "Cycle0_ALT_Freq" "Cycle1_REF_count"
## [10] "Cycle1_ALT_count" "Cycle1_REF_Freq" "Cycle1_ALT_Freq"
## [13] "consec_pos"
ztest_ph[,1] = mbfPH[,2]
ztest_ph[,2] = mbfPH[,7]-mbfPH[,11]
ztest_ph[,3] = mbfPH[,5]
ztest_ph[,4] = mbfPH[,9]
ztest_ph[,5] = mbfPH[,5]+mbfPH[,6]
ztest_ph[,6] = mbfPH[,9]+mbfPH[,10]
ztest_ph[,7] = ((ztest_ph[,"count1"]+ztest_ph[,"count2"])/(ztest_ph[,"sample1"]+ztest_ph[,"sample2"]))
ztest_ph[,8] = (ztest_ph[,2])/(sqrt((ztest_ph[,7])*(1-(ztest_ph[,7]))*((1/ztest_ph[,"sample1"])+(1/ztest_ph[,"sample2"]))))
ztest_ph[(which(mbfPH[,1]=="1")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="1")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="2")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="2")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="3")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="3")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="4")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="4")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="5")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="5")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="6")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="6")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="7")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="7")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="8")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="8")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="9")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="9")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[(which(mbfPH[,1]=="10")),9]=rollapply(data=as.numeric(ztest_ph[(which(mbfPH[,1]=="10")),8]), width=15, FUN=mean, fill=NA)
ztest_ph[,10] =-(abs(ztest_ph[,9]))
ztest_ph[,11] = 2*(pnorm(ztest_ph[,10],mean=0,sd=1))
ztest_ph[,12] =-(log10(ztest_ph[,11]))
ztest_ph[,13] = as.character(mbfPH[,1])
ztest_ph[,14] = mbfPH[,13]
## Warning: Ignoring unknown parameters: NA
## Warning: Removed 155 rows containing missing values (geom_point).
