johnlovell — Nov 24, 2014, 4:55 PM
library(qtl)
cross<-read.cross("csv",file="maize_9_1_2014.csv",
na.strings=c("NA"),
genotypes=c(0,2),
alleles=c("B73","CML228"),
crosstype="riself")
--Read the following data:
191 individuals
1105 markers
16 phenotypes
Warning: Some markers at the same position on chr 1,2,3,4,5,6,7,8,9,10;
use jittermap().
--Cross type: riself
cross2<-jittermap(cross) #best to do this if you have identical marker positions
#for publication, it is best to get the map nailed down beforehand.
#see https://github.com/jtlovell/r-QTL_functions/blob/master/cullmarkers.function
#I run this below... it retains only the best marker in each cM step of n size
require(RCurl)
Loading required package: RCurl
Loading required package: bitops
u <- "https://raw.githubusercontent.com/jtlovell/r-QTL_functions/master/cullmarkers.function"
script <- getURL(u, ssl.verifypeer = FALSE)
eval(parse(text = script))
cross<-cullmarkers(cross, step=1)#replace raw cross with culled cross
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
#part 1: diagnostics
#1.1 Map- where is data missing?
par(mfrow=c(2,1))
plot.missing(cross)
plot.info(cross)
#1.2 Map- any bad genotypes?
el<-calc.errorlod(cross, error.prob=0.001)
toperr<-top.errorlod(el, cutoff=3) #cross looks pretty good.
par(mfrow=c(3,2))
for(i in unique(toperr$chr)){plotGeno(el, chr=i, cutoff=3, ind=unique(toperr$id))}
print(toperr)
chr id marker errorlod
1 1 132 PZA03168.5 3.405
2 4 59 PZA01751.2 3.225
3 6 97 PZA03047.12 3.200
4 3 186 PHM15449.10 3.161
5 1 104 PZA00455.14/16 3.127
6 4 75 PZA03081.1 3.048
7 7 28 PZA03344.2 3.007
#If you want, you can remove these potential errors
cross.clean<-cross
for(i in 1:nrow(toperr)) {
chr <- toperr$chr[i]; id <- toperr$id[i]; mar <- toperr$marker[i]
cross.clean$geno[[chr]]$data[cross$pheno$id==id, mar] <- NA
}
cross<-cross.clean
#1.3 Map- any segregation distortion?
gt<-geno.table(cross)
gt$marker<-rownames(gt)
map<-pull.map(cross)
markers <- data.frame(chr = rep(names(map), sapply(map, length)),cm = unlist(map))
markers$marker<-markernames(cross)
gt.out<-merge(markers,gt, by=c("marker","chr"))
gt.out$propB73<-gt.out$B73B73/(gt.out$CML228CML228+gt.out$B73B73)
library(ggplot2)
ggplot(gt.out, aes(x=cm, y=propB73, color= P.value))+
geom_point()+
facet_wrap(~chr)
# there is some significant segregation distortion, but noting so bad that I would remove the markers
#perhaps something interesting on a few chromosomes to study selection (e.g. chr 1)
#1.4 Phenotypes- are they normal?
phenos<-pull.pheno(cross)
qtlphes<-phenames(cross)[4:16]
par(mfrow=c(2,2))
for(i in qtlphes){hist(phenos[,i], main=i)}
#these need to be normalized
tologtransform<-qtlphes[c(11,10,9)]
cross.log<-transformPheno(cross, pheno.col=tologtransform)
phenos<-pull.pheno(cross.log)
qtlphes<-phenames(cross.log)[4:16]
for(i in qtlphes){hist(phenos[,i], main=i)} # a little over transformed, but ok.
cross<-cross.log
# Part 2- 1-way qtlmapping
# 2.1 No covariates
cross<-calc.genoprob(cross, step=1, stepwidth="max", error.prob=0.00001, map.function="kosambi")
s1<-scanone(cross, method="hk",pheno.col=qtlphes,addcovar=NULL, intcovar=NULL)
Warning: Dropping 58 individuals with missing phenotypes.
par(mfrow=c(1,1))
maxlod<-max(apply(s1[,-c(1,2)], 2, max))
plot(s1,type="n", main="scanones, nocovariates", ylim=c(0,maxlod))
for (i in 1:length(qtlphes)){plot(s1, lodcolumn=i, add=T, col=i)}
# 2.2 Additive covariate
ac <- pull.pheno(cross, "P_code")
s1<-scanone(cross, method="hk",pheno.col=qtlphes,addcovar=ac,intcovar=NULL)
Warning: Dropping 58 individuals with missing phenotypes.
maxlod<-max(apply(s1[,-c(1,2)], 2, max))
plot(s1,type="n", main="scanones, additive covariates", ylim=c(0,maxlod))
for (i in 1:length(qtlphes)){plot(s1, lodcolumn=i, add=T, col=i)}
# 2.3 Interactive covariate
ac <- pull.pheno(cross, "P_code")
s1<-scanone(cross, method="hk",pheno.col=qtlphes,addcovar=ac,intcovar=ac)
Warning: Dropping 58 individuals with missing phenotypes.
maxlod<-max(apply(s1[,-c(1,2)], 2, max))
plot(s1,type="n", main="scanones, additive covariates", ylim=c(0,maxlod))
for (i in 1:length(qtlphes)){plot(s1, lodcolumn=i, add=T, col=i)}
#2.4permutations... on separate server
library(snow)
perms.nocov<-scanone(cross, method="hk",pheno.col=qtlphes,
addcovar=NULL, intcovar=NULL, n.cluster=12, n.perm=1000)
-Running permutations via a cluster of 12 nodes.
s1.nocov<-scanone(cross, method="hk",pheno.col=qtlphes,addcovar=NULL,intcovar=NULL)
Warning: Dropping 58 individuals with missing phenotypes.
perms.addcov<-scanone(cross, method="hk",pheno.col=qtlphes,
addcovar=ac, intcovar=NULL, n.cluster=12, n.perm=1000)
-Running permutations via a cluster of 12 nodes.
s1.addcov<-scanone(cross, method="hk",pheno.col=qtlphes,addcovar=ac,intcovar=NULL)
Warning: Dropping 58 individuals with missing phenotypes.
perms.intcov<-scanone(cross, method="hk",pheno.col=qtlphes,
addcovar=ac, intcovar=ac, n.cluster=12, n.perm=1000)
-Running permutations via a cluster of 12 nodes.
s1.intcov<-scanone(cross, method="hk",pheno.col=qtlphes,addcovar=ac,intcovar=ac)
Warning: Dropping 58 individuals with missing phenotypes.
#2.5 - Which traits have QTL?
u <- "https://raw.githubusercontent.com/jtlovell/r-QTL_functions/master/s1.qtlcheck"
script <- getURL(u, ssl.verifypeer = FALSE)
eval(parse(text = script))
allphes<-qtlphes
qtl.test<- s1.qtlcheck(s1=s1.nocov, s1.perms=perms.nocov,
s1.cov=s1.addcov, s1.perms.cov=perms.addcov,
s1.qn=s1.intcov, s1.perms.qn=perms.intcov,
alpha=0.1)
colnames(qtl.test)<-c("phenotype","nocov.thresh","nocov.max","qtl.nocov","addcov.thresh","addcov.max","qtl.addcov","intcov.thresh","intcov.max","qtl.intcov")
print(qtl.test)
phenotype nocov.thresh nocov.max qtl.nocov
1 leaf_area_35 2.78831354638633 3.29710904710419 TRUE
2 crown_whorls 2.81218350945347 1.69910795689105 FALSE
3 crown_roots 2.71378452912998 2.98640901614365 TRUE
4 shoot_wt 2.71234566382476 3.07076496986014 TRUE
5 root_wt1 2.81554194632083 2.4314567532893 FALSE
6 root_wt2 2.73990548485512 1.44359802969436 FALSE
7 root_wt3 2.77173080305011 2.12812411284739 FALSE
8 root_wt4 2.77735229231911 2.01591626714597 FALSE
9 root_wt5 2.74719847433705 1.18277539627358 FALSE
10 root_wt6 2.79019522129369 1.21819583625316 FALSE
11 crown_wt 2.69454105758703 2.03111579828268 FALSE
12 root_wt 2.76205112799507 1.85209357807094 FALSE
13 plant_wt 2.81908123698373 2.22500363085362 FALSE
addcov.thresh addcov.max qtl.addcov intcov.thresh
1 2.75920630958257 2.38094492541831 FALSE 3.36479483191886
2 2.78116979239925 2.4079367470777 FALSE 3.54184537703113
3 2.75535785520168 3.20815401720441 TRUE 3.3363764195824
4 2.64601023959003 2.69214712228163 TRUE 3.32360049293356
5 2.8161987011265 1.88633923419795 FALSE 3.34215988294871
6 2.80663155320675 1.89423295277297 FALSE 3.81878055004922
7 2.72689713382944 2.39387440789181 FALSE 3.38764763216439
8 2.67003092203467 2.46077929088369 FALSE 3.35094820443589
9 2.77820239713826 1.93192159125655 FALSE 3.6768922027325
10 2.78015503764933 2.08770397962027 FALSE 3.34912326995148
11 2.69341986617647 3.31042866624753 TRUE 3.66340085280628
12 2.8676592793169 1.91268155642911 FALSE 3.8681646257422
13 2.75756449123344 2.2856550336241 FALSE 3.82473421839877
intcov.max qtl.intcov
1 3.87915253221706 TRUE
2 2.78420093509969 FALSE
3 3.26035001118831 FALSE
4 2.70708864404327 FALSE
5 3.64018842931117 TRUE
6 3.14997612201617 FALSE
7 3.33221578709259 FALSE
8 3.06460431896292 FALSE
9 2.43905851638438 FALSE
10 2.4222061812294 FALSE
11 3.64218326838024 FALSE
12 3.49751468660904 FALSE
13 3.00384577979844 FALSE