1 Packages
2 Genotype simulation
2.1 RP
- random population
# h2 = as.numeric(arg[1]) # heritability
# s = as.numeric(arg[2]) # scenario
h2 = 0.5
s = 3
count = 20
m = 300
n = 200
Ga = matrix(NA, n, m)
Gd = matrix(0, n, m)
freq = runif(m, 0.05, 0.5)
for(i in 1:m)
{
Ga[,i] = rbinom(n, 2, freq[i])-1
Gd[which(Ga[,i]==0),i] = 1
}
# Additive(-1,0,1)
# Dominance(0,1,0)
# scaling by column
# Ga = as.matrix(apply(Ga, 2, scale))
# Gd = as.matrix(apply(Gd, 2, scale))
colnames(Gd) = colnames(Ga) = paste0("SNP",1:ncol(Ga))
rownames(Gd) = rownames(Ga) = paste0("GEO",1:nrow(Ga))3 Phenotype Simulation
3.1 (0-1)
m_l = round(0.01*m); m_s = m-m_l;
site_l = sample.int(m, m_l)
site_s = seq(1,m)[-site_l]
sigma_l = 0.1
sigma_s = 0.003
# X matrix
X_l = as.matrix(Ga[,site_l])
X_s = as.matrix(Ga[,site_s])
# g and y
y = matrix(NA, n, count)
for (t in 1:count)
{
b_l = rnorm(m_l, sd=sqrt(sigma_l))
b_s = rnorm(m_s, sd=sqrt(sigma_s))
g = X_l %*% b_l + X_s %*% b_s
sigma_e = (1/h2-1)*var(g)
error = rnorm(n, sd=sqrt(sigma_e))
y[,t] = g + error
}
y = apply(y, 2, function(x) (x-min(x))/(max(x)-min(x)))
thrd = 0.8
y[which(y>=thrd)] = 1
y[which(y<thrd)] = 0
y = as.data.frame(y, row.names=rownames(Ga))
colnames(y) = paste0("Trait",1:count)
y$id = as.factor(rownames(y))3.2 (A)
# number and site
# Scenario option
if(s==1){ # Scenario I
m_l = 0; m_s = m ;
site_s = seq(1,m)
sigma_s = 0.001
}else if(s==2){ # Scenario II
m_l = round(0.01*m); m_s = m-m_l;
site_l = sample.int(m, m_l)
site_s = seq(1,m)[-site_l]
sigma_l = 0.1
sigma_s = 0.003
}else if(s==3){ # Scenario III
m_l = round(0.01*m); m_s = m-m_l;
site_l = sample.int(m, m_l)
site_s = seq(1,m)[-site_l]
sigma_l = 0.1
sigma_s = 0.001
}else if(s==4){ # Scenario IV
m_l = round(0.01*m); m_s = 0;
site_l = sample.int(m, m_l)
sigma_l = 0.1
}else{
print("Undefined scenario!")
}
# X matrix
if(m_l>0) X_l = as.matrix(Ga[,site_l])
if(m_s>0) X_s = as.matrix(Ga[,site_s])
# g and y
y = matrix(NA, n, count)
for (t in 1:count)
{
if(m_l>0) b_l = rnorm(m_l, sd=sqrt(sigma_l))
if(m_s>0) b_s = rnorm(m_s, sd=sqrt(sigma_s))
if(m_l>0 && m_s>0){
g = X_l %*% b_l + X_s %*% b_s
} else if(m_l==0){
g = X_s %*% b_s
} else if(m_s==0){
g = X_l %*% b_l
}
sigma_e = (1/h2-1)*var(g)
error = rnorm(n, sd=sqrt(sigma_e))
y[,t] = g + error
}
y = scale(y) # scale by column
y = as.data.frame(y, row.names=rownames(Ga))
colnames(y) = paste0("Trait",1:count)
y$id = as.factor(rownames(y))3.3 (AD)
# number and site
# Scenario option
if(s==1){ # Scenario I
m_l = 0
m_s = m
site_s = seq(1,m)
sigmaA_s = 0.001
sigmaD_s = 0.0005
}else if(s==2){ # Scenario II
m_l = round(0.01*m)
m_s = m-m_l
site_l = sample.int(m, m_l)
site_s = seq(1,m)[-site_l]
sigmaA_l = 0.1
sigmaD_l = 0.05
sigmaA_s = 0.003
sigmaD_s = 0.0015
}else if(s==3){ # Scenario III
m_l = round(0.01*m)
m_s = m-m_l
site_l = sample.int(m, m_l)
site_s = seq(1,m)[-site_l]
sigmaA_l = 0.1
sigmaD_l = 0.05
sigmaA_s = 0.001
sigmaD_s = 0.0005
}else if(s==4){ # Scenario IV
m_l = round(0.01*m)
m_s = 0
site_l = sample.int(m, m_l)
sigmaA_l = 0.1
sigmaD_l = 0.05
}else{
print("Undefined scenario!")
}
# X matrix
if(m_l>0) {
Xa_l = as.matrix(Ga[,site_l])
Xd_l = as.matrix(Gd[,site_l])
}
if(m_s>0) {
Xa_s = as.matrix(Ga[,site_s])
Xd_s = as.matrix(Gd[,site_s])
}
# g and y
y = matrix(NA, n, count)
for (t in 1:count)
{
if(m_l>0) {
ba_l = rnorm(m_l, sd=sqrt(sigmaA_l))
bd_l = rnorm(m_l, sd=sqrt(sigmaD_l))
}
if(m_s>0) {
ba_s = rnorm(m_s, sd=sqrt(sigmaA_s))
bd_s = rnorm(m_s, sd=sqrt(sigmaD_s))
}
if(m_l>0 && m_s>0){
g = Xa_l %*% ba_l + Xa_s %*% ba_s + Xd_l %*% bd_l + Xd_s %*% bd_s
} else if(m_l==0){
g = Xa_s %*% ba_s + Xd_s %*% bd_s
} else if(m_s==0){
g = Xa_l %*% ba_l + Xd_l %*% bd_l
}
sigma_e = (1/h2-1)*var(g)
error = rnorm(n, sd=sqrt(sigma_e))
y[,t] = g + error
}
y = scale(y) # scale by column
y = as.data.frame(y)
colnames(y) = paste0("Trait",1:count)
rownames(y) = paste0("obs",1:n)
y$obs = as.factor(paste0("obs",1:n))
y$var = as.factor(rownames(Ga))3.4 (ADAA)
# number and site
m_l = round(0.01*m)
m_s = m-m_l
m_ll = m_l # number of AA effect pairs
site_l = sample.int(m, m_l)
site_s = seq(1,m)[-site_l]
site_ll = matrix(c(site_l, sample(seq(1,m)[-site_l], m_ll)), 2, m_ll,byrow = T)
sigmaA_l = 0.1
sigmaD_l = 0.05
sigmaA_s = 0.003
sigmaD_s = 0.0015
sigmaAA = 0.08
# X matrix
Xa_l = as.matrix(Ga[,site_l])
Xd_l = as.matrix(Gd[,site_l])
Xa_s = as.matrix(Ga[,site_s])
Xd_s = as.matrix(Gd[,site_s])
Xaa = as.matrix(Ga[,site_ll[1,]]) * as.matrix(Ga[,site_ll[2,]])
# g and y
y = matrix(NA, n, count)
for (t in 1:count)
{
ba_l = rnorm(m_l, sd=sqrt(sigmaA_l))
bd_l = rnorm(m_l, sd=sqrt(sigmaD_l))
ba_s = rnorm(m_s, sd=sqrt(sigmaA_s))
bd_s = rnorm(m_s, sd=sqrt(sigmaD_s))
baa_ll = rnorm(m_ll, sd=sqrt(sigmaAA))
g = Xa_l %*% ba_l + Xa_s %*% ba_s + Xd_l %*% bd_l + Xd_s %*% bd_s + Xaa %*% baa_ll
sigma_e = (1/h2-1)*var(g)
error = rnorm(n, sd=sqrt(sigma_e))
y[,t] = g + error
}
y = scale(y) # scale by column
y = as.data.frame(y)
colnames(y) = paste0("Trait",1:count)
rownames(y) = paste0("obs",1:n)
y$obs = as.factor(paste0("obs",1:n))
y$var = as.factor(rownames(Ga))3.5 (AE1)
env = 2
# number and site
m_l = round(0.01*m)
m_s = m-m_l
m_l_env = round(0.01*m) # 0.01,0.1,1
site_l = sample.int(m, m_l)
site_s = seq(1,m)[-site_l]
site_l_env = sample.int(m, m_l_env)
# variance
sigma_env = 0.01
sigma_l = 0.1
sigma_s = 0.001
sigma_l_env = 0.8 # 0.2,0.5,0.8
# X matrix
if(m_l>0) X_l = as.matrix(Ga[,site_l])
if(m_s>0) X_s = as.matrix(Ga[,site_s])
if(m_l_env>0) X_l_env = as.matrix(Ga[,site_l_env])
# g and y
y = matrix(NA, n*env, count)
for(t in 1:count)
{
# effect
b_env = rnorm(env, sd=sqrt(sigma_env))
b_l = rnorm(m_l, sd=sqrt(sigma_l))
b_s = rnorm(m_s, sd=sqrt(sigma_s))
b_l_env = list()
b_l_env[[1]] = rnorm(m_l_env, sd=sqrt(sigma_l_env))
b_l_env[[2]] = -b_l_env[[1]]
g = list()
g0 = X_l %*% b_l + X_s %*% b_s
for(e in 1:env)
{
g[[e]] = g0 + b_env + X_l_env %*% b_l_env[[e]]
}
g = unlist(g)
sigma_e = (1/h2-1)*var(g)
error = rnorm(n*env, sd=sqrt(sigma_e))
y[,t] = g + error
}
y = scale(y) # scale by column
y = as.data.frame(y)
colnames(y) = paste0("Trait",1:count)
rownames(y) = paste0("obs",1:(n*env))
y$obs = as.factor(paste0("obs",1:(n*env)))
y$var = as.factor(rep(rownames(Ga), env))
y$env = as.factor(rep(paste0("env",1:env), each=n))3.6 (AE2)
env = 3
# number and site
m_l = round(0.01*m)
m_s = m-m_l
m_l_env = round(0.01*m) # 0.01,0.1,1
site_l = sample.int(m, m_l)
site_s = seq(1,m)[-site_l]
site_l_env = sample.int(m, m_l_env)
# variance
sigma_env = 0.01
sigma_l = 0.1
sigma_s = 0.001
# sigma_l_env_main = 0.01
sigma_l_env_spec = runif(m_l_env, 0.1, 0.2)
# X matrix
if(m_l>0) X_l = as.matrix(Ga[,site_l])
if(m_s>0) X_s = as.matrix(Ga[,site_s])
if(m_l_env>0) X_l_env = as.matrix(Ga[,site_l_env])
# g and y
y = matrix(NA, n*env, count)
for(t in 1:count)
{
# effect
b_env = rnorm(env, sd=sqrt(sigma_env))
b_l = rnorm(m_l, sd=sqrt(sigma_l))
b_s = rnorm(m_s, sd=sqrt(sigma_s))
# b_l_env = matrix(rep(rnorm(m_l_env, sd=sqrt(sigma_l_env_main)),env), env, m_l_env, byrow = T)
b_l_env = apply(as.matrix(sigma_l_env_spec), 1, function(x) rnorm(env-1, sd=sqrt(x)))
b_l_env = rbind(b_l_env, -colSums(b_l_env))
g = list()
g0 = X_l %*% b_l + X_s %*% b_s
for(e in 1:env)
{
g[[e]] = g0 + b_env[e] + X_l_env %*% b_l_env[e,]
}
g = unlist(g)
sigma_e = (1/h2-1)*var(g)
error = rnorm(n*env, sd=sqrt(sigma_e))
y[,t] = g + error
}
y = scale(y) # scale by column
y = as.data.frame(y)
colnames(y) = paste0("Trait",1:count)
rownames(y) = paste0("obs",1:(n*env))
y$obs = as.factor(paste0("obs",1:(n*env)))
y$var = as.factor(rep(rownames(Ga), env))
y$env = as.factor(rep(paste0("env",1:env), each=n))3.7 (ADE)
env = 2
# number and site
m_l = round(0.01*m)
m_s = m-m_l
m_l_env = round(0.01*m) # 0.01,0.1,1
site_l = sample.int(m, m_l)
site_s = seq(1,m)[-site_l]
site_l_env = sample.int(m, m_l_env)
# variance
sigma_env = 0.01
sigmaA_l = 0.1
sigmaD_l = 0.05
sigmaA_s = 0.001
sigmaD_s = 0.0005
sigmaA_l_env = 0.5 # 0.2,0.5,0.8
sigmaD_l_env = 0.2
# X matrix
Xa_l = as.matrix(Ga[,site_l])
Xd_l = as.matrix(Gd[,site_l])
Xa_s = as.matrix(Ga[,site_s])
Xd_s = as.matrix(Gd[,site_s])
Xa_l_env = as.matrix(Ga[,site_l_env])
Xd_l_env = as.matrix(Gd[,site_l_env])
# g and y
y = matrix(NA, n*env, count)
for(t in 1:count)
{
# effect
b_env = rnorm(env, sd=sqrt(sigma_env))
ba_l = rnorm(m_l, sd=sqrt(sigmaA_l))
bd_l = rnorm(m_l, sd=sqrt(sigmaD_l))
ba_s = rnorm(m_s, sd=sqrt(sigmaA_s))
bd_s = rnorm(m_s, sd=sqrt(sigmaD_s))
ba_l_env = list()
bd_l_env = list()
ba_l_env[[1]] = rnorm(m_l_env, sd=sqrt(sigmaA_l_env))
ba_l_env[[2]] = -ba_l_env[[1]]
bd_l_env[[1]] = rnorm(m_l_env, sd=sqrt(sigmaD_l_env))
bd_l_env[[2]] = -bd_l_env[[1]]
g = list()
g0 = Xa_l %*% ba_l + Xa_s %*% ba_s + Xd_l %*% bd_l + Xd_s %*% bd_s
for(e in 1:env)
{
g[[e]] = g0 + b_env + Xa_l_env %*% ba_l_env[[e]] + Xd_l_env %*% bd_l_env[[e]]
}
g = unlist(g)
sigma_e = (1/h2-1)*var(g)
error = rnorm(n*env, sd=sqrt(sigma_e))
y[,t] = g + error
}
y = scale(y) # scale by column
y = as.data.frame(y)
colnames(y) = paste0("Trait",1:count)
rownames(y) = paste0("obs",1:(n*env))
y$obs = as.factor(paste0("obs",1:(n*env)))
y$var = as.factor(rep(rownames(Ga), env))
y$env = as.factor(rep(paste0("env",1:env), each=n))4 (A) Models
4.1 1-GBLUP
print("============== GBLUP ===============")
# Additive relationship matrix
A = A.mat(Ga)
colnames(A) = rownames(A) = rownames(Ga)
# repeats = count
mod1 = list()
pre1 = matrix(NA, count, 1)
for(c in 1:count)
{
dt = y[,c(c,count+1)]
colnames(dt)[1] = "X1"
# sampling training population
cv = sample(rownames(Ga), round(nrow(Ga)/5))
dt[cv, 1] = NA
## GBLUP
mod1[[c]] = mmer(X1~1,
random = ~vs(id, Gu=A),
rcov = ~units,
data = dt,
verbose = FALSE, date.warning = FALSE)
# Breeding value
BV = data.frame(rep(mod1[[c]]$Beta$Estimate, n) +
rep(mod1[[c]]$U$`u:id`$X1) ,
y[order(y$id),c(c,count+1)])
pre1[c,1] = cor(BV[cv,1],BV[cv,2])^2
## rrBLUP
# mod2[[c]] = mmer(X1~1,
# random=~vs(list(Ga)),
# rcov=~units,
# data=dt,
# verbose = FALSE, date.warning = FALSE)
# u = Ga %*% as.matrix(mod2[[c]]$U$`u:Ga`$X1) # BLUPs for individuals
# rownames(u) = rownames(Ga)
# pre = cor(u[cv,], y[cv,c])
# pre2 = c(pre2, pre)
#print(c)
}
colnames(pre1) = "GBLUP"4.2 2-MMIBLUP
t0=Sys.time()
print("============== MMIBLUP ===============")
mod2 = list()
pre2 = matrix(NA, count, 3)
# additive relationship matrix
A = A.mat(Ga)
for(c in 1:count)
{
# GWAS
dt = y[,c(c,count+1)] # Trait c and id
colnames(dt)[1] = "X1"
mod0 = GWAS(X1~1,
random = ~vs(id, Gu=A),
rcov = ~units,
data = dt,
M = Ga,
gTerm = "u:id",
verbose = FALSE, date.warning = FALSE)
site_qtl = which(mod0$scores>-log10(0.05/m)) # bonferroni
m_qtl = length(site_qtl)
# sampling training population
cv = sample(rownames(Ga), round(nrow(Ga)/5))
dt[cv, 1] = NA
if(m_qtl>0) {
# adjusted additive relationship matrix
Ka = A.mat(Ga[,-site_qtl])
colnames(Ka) = rownames(Ka) = rownames(Ga)
# include major effect SNP in data.frame dt
dt = cbind(dt, Ga[,site_qtl])
xname = colnames(dt)[3:ncol(dt)] = paste0("F",1:m_qtl)
# prepare fixed effect formula for sommer
fmla = reformulate(xname, "X1")
Xfix = dt[,-c(1:2)]
X = Ga[,-site_qtl]
} else {
Ka = A
fmla = reformulate("1","X1")
Xfix = rep(1, nrow(y))
X = Ga
}
# MMIBLUP
mod2[[c]] = mmer(fmla,
random = ~vs(id, Gu=Ka),
rcov = ~units,
data = dt,
verbose = FALSE, date.warning = FALSE)
# Breeding value
fix = data.frame(cbind(rep(1,n), Ga[,site_qtl]) %*% mod2[[c]]$Beta$Estimate,
row.names = rownames(dt))
BV = data.frame(fix[order(row.names(fix)),]+mod2[[c]]$U$`u:id`$X1,
y[order(y$id),c(c,count+1)])
pre2[c,1] = cor(BV[cv,1],BV[cv,2])^2
nIter=6000; burnIn=1000
# MMIBayesA
fmMM = BGLR(y=dt[,1],ETA=list(list(X=Xfix, model="FIXED"), list(X=X, model="BayesA")),
nIter=nIter,burnIn=burnIn, verbose = F)
yhat=matrix(fmMM$yHat, length(fmMM$yHat),1)
rownames(yhat) = rownames(y)
pre2[c,2] = cor(y[cv,c], yhat[cv,1])^2
# MMIBayesB
fmMM = BGLR(y=dt[,1],ETA=list(list(X=Xfix, model="FIXED"), list(X=X, model="BayesB")),
nIter=nIter,burnIn=burnIn, verbose = F)
yhat=matrix(fmMM$yHat, length(fmMM$yHat),1)
rownames(yhat) = rownames(y)
pre2[c,3] = cor(y[cv,c], yhat[cv,1])^2
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}
colnames(pre2) = c("MMIBLUP","MMIBayesA","MMIBayesB")4.3 3-Bayes
t0=Sys.time()
print("============== Bayes Alphabet ===============")
pre3 = matrix(NA, count, 5)
for(c in 1:count)
{
y0 = y[,c]
cv = sample(1:length(y0), round(length(y0)/5))
y0[cv] = NA
nIter=6000; burnIn=1000
# RR-BLUP
fmBRR=BGLR(y=y0,ETA=list( list(X=Ga,model='BRR')),
nIter=nIter,burnIn=burnIn, verbose = F)
pre3[c,1]=cor(y[cv,c], fmBRR$yHat[cv])^2
# Bayes A
fmBA=BGLR(y=y0,ETA=list( list(X=Ga,model='BayesA')),
nIter=nIter,burnIn=burnIn, verbose = F)
pre3[c,2]=cor(y[cv,c], fmBA$yHat[cv])^2
# Bayes Lasso
fmBL=BGLR(y=y0,ETA=list( list(X=Ga,model='BL')),
nIter=nIter,burnIn=burnIn, verbose = F)
pre3[c,3]=cor(y[cv,c], fmBL$yHat[cv])^2
# Bayes C
fmBC=BGLR(y=y0,ETA=list( list(X=Ga,model='BayesC')),
nIter=nIter,burnIn=burnIn, verbose = F)
pre3[c,4]=cor(y[cv,c], fmBC$yHat[cv])^2
# Bayes B
fmBB=BGLR(y=y0,ETA=list( list(X=Ga,model='BayesB')),
nIter=nIter,burnIn=burnIn, verbose = F)
pre3[c,5]=cor(y[cv,c], fmBB$yHat[cv])^2
# Bayes RHKS
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}
colnames(pre3) = c("BayesRR","BayesA","BayesL","BayesC","BayesB")5 (AD) Models
5.1 1-GBLUP-ad
t0=Sys.time()
print("============== GBLUP-ad ===============")
# Additive and Dominance relationship matrix
A = A.mat(Ga)
colnames(A) = rownames(A) = rownames(Ga)
D = D.mat(Ga)
colnames(D) = rownames(D) = rownames(Ga)
mod1 = list()
pre1 = c()
for(c in 1:count)
{
dt = y[,c(c,count+1,count+2,count+2)]
colnames(dt)[1] = "X1"
# sampling training population
cv = sample(rownames(dt), round(nrow(dt)/5))
dt[cv, 1] = NA
## GBLUP
mod1[[c]] = mmer(X1~1,
random = ~vs(var, Gu=A) + vs(var.1, Gu=D),
rcov = ~units,
data = dt,
na.method.Y = "include", #impute y with the median value
verbose = FALSE, date.warning = FALSE)
# Breeding value
BV = data.frame(rep(mod1[[c]]$Beta$Estimate, n) +
rep(mod1[[c]]$U$`u:var`$X1) +
rep(mod1[[c]]$U$`u:var.1`$X1),
y[order(y$var),c(c,count+1)],
row.names = y[order(y$var),count+1])
pre = cor(BV[cv,1],BV[cv,2])^2
pre1 = c(pre1, pre)
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}5.2 2-MMIBLUP-ad
t0=Sys.time()
print("============== MMIBLUP-ad ===============")
# Additive and Dominance relationship matrix
A = A.mat(Ga)
colnames(A) = rownames(A) = rownames(Ga)
D = D.mat(Ga)
colnames(D) = rownames(D) = rownames(Ga)
mod2 = list()
pre2 = c()
for(c in 1:count)
{
# GWAS
dt = y[,c(c,count+1, count+2, count+2)] # Trait c obs, var and var
colnames(dt)[1] = "X1"
mod0 = GWAS(X1~1,
random = ~vs(var, Gu=A) + vs(var.1, Gu=D),
rcov = ~units,
data = dt,
M = Ga,
gTerm = "u:var",
verbose = FALSE, date.warning = FALSE)
site_qtl = which(mod0$scores>-log10(0.05/m)) # bonferroni
m_qtl = length(site_qtl)
# sampling training population
cv = sample(rownames(dt), round(nrow(dt)/5))
dt[cv, 1] = NA
if(m_qtl>0) {
# adjusted additive and dominance relationship matrix
Ka = A.mat(Ga[,-site_qtl])
Kd = D.mat(Ga[,-site_qtl])
colnames(Ka) = rownames(Ka) = rownames(Ga)
colnames(Kd) = rownames(Kd) = rownames(Ga)
# include major effect SNP in data.frame dt
dt = cbind(dt, Ga[,site_qtl], Gd[,site_qtl])
xname = colnames(dt)[5:ncol(dt)] = paste0("F",1:(2*m_qtl))
# prepare fixed effect formula for sommer
fmla = reformulate(xname, "X1")
} else {
Ka = A
Kd = D
fmla = reformulate("1","X1")
}
# Full Model
mod2[[c]] = mmer(fmla,
random = ~vs(var, Gu=Ka) + vs(var.1, Gu=Kd),
rcov = ~units,
data = dt,
na.method.Y = "include", #impute y with the median value
verbose = FALSE, date.warning = FALSE)
# Breeding value
# fix = data.frame(cbind(rep(1,n), Ga[,site_qtl], Gd[,site_qtl]) %*% mod2[[c]]$Beta$Estimate,
# row.names = rownames(dt))
fix = data.frame(mod2[[c]]$fitted,row.names = rownames(dt))
BV = data.frame(fix[order(row.names(fix)),] +
mod2[[c]]$U$`u:var`$X1 +
mod2[[c]]$U$`u:var.1`$X1,
y[order(y$var),c(c,count+1)],
row.names = y[order(y$var),count+1])
pre = cor(BV[cv,1],BV[cv,2])^2
pre2 = c(pre2, pre)
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}6 (ADAA) Models
6.1 1-GBLUP-adaa
t0=Sys.time()
print("============== GBLUP-adaa ===============")
# Additive, Dominance and AA-epi relationship matrix
A = A.mat(Ga)
colnames(A) = rownames(A) = rownames(Ga)
D = D.mat(Ga)
colnames(D) = rownames(D) = rownames(Ga)
AA = E.mat(Ga, type="A#A")
colnames(AA) = rownames(AA) = rownames(Ga)
mod1 = list()
pre1 = c()
for(c in 1:count)
{
dt = y[,c(c,count+1,count+2,count+2,count+2)]
colnames(dt)[1] = "X1"
# sampling training population
cv = sample(rownames(dt), round(nrow(dt)/5))
dt[cv, 1] = NA
## GBLUP
mod1[[c]] = mmer(X1~1,
random = ~vs(var, Gu=A) + vs(var.1, Gu=D) + vs(var.2, Gu=AA) ,
rcov = ~units,
data = dt,
na.method.Y = "include", #impute y with the median value
verbose = FALSE, date.warning = FALSE)
# Breeding value
BV = data.frame(rep(mod1[[c]]$Beta$Estimate, n) +
rep(mod1[[c]]$U$`u:var`$X1) +
rep(mod1[[c]]$U$`u:var.1`$X1) +
rep(mod1[[c]]$U$`u:var.2`$X1),
y[order(y$var),c(c,count+1)],
row.names = y[order(y$var),count+1])
pre = cor(BV[cv,1],BV[cv,2])^2
pre1 = c(pre1, pre)
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}6.2 2-MMIBLUP-adaa
t0=Sys.time()
print("============== MMIBLUP-adaa ===============")
# Additive, Dominance and AA-epi relationship matrix
A = A.mat(Ga)
colnames(A) = rownames(A) = rownames(Ga)
D = D.mat(Ga)
colnames(D) = rownames(D) = rownames(Ga)
AA = E.mat(Ga, type="A#A")
colnames(AA) = rownames(AA) = rownames(Ga)
mod2 = list()
pre2 = c()
for(c in 1:count)
{
## GWAS
dt = y[,c(c,count+1, count+2, count+2, count+2)] # Trait c obs, var, var and var
colnames(dt)[1] = "X1"
## sampling training population
cv = sample(rownames(dt), round(nrow(dt)/5))
dt[cv, 1] = NA
mod0 = GWAS(X1~1,
random = ~vs(var, Gu=A) + vs(var.1, Gu=D) + vs(var.2, Gu=AA),
rcov = ~units,
data = dt,
M = Ga,
gTerm = "u:var",
verbose = FALSE, date.warning = FALSE)
site_qtl = which(mod0$scores>-log10(0.05/m) & mod0$scores != Inf) # bonferroni
m_qtl = length(site_qtl)
if(m_qtl>0) {
# adjusted additive and dominance relationship matrix
Ka = A.mat(Ga[,-site_qtl])
Kd = D.mat(Ga[,-site_qtl])
colnames(Ka) = rownames(Ka) = rownames(Ga)
colnames(Kd) = rownames(Kd) = rownames(Ga)
# include major effect SNP in data.frame dt
dt = cbind(dt, Ga[,site_qtl], Gd[,site_qtl])
xname = colnames(dt)[6:ncol(dt)] = paste0("F",1:(2*m_qtl))
# prepare fixed effect formula for sommer
fmla = reformulate(xname, "X1")
## GWAS-2D
site_qtl_2D = list()
m_qtl_2D = matrix(0, 1, m_qtl)
for(i in 1:m_qtl)
{
M = Ga[,site_qtl[i]]*Ga
mod0 = GWAS(fmla,
random = ~vs(var, Gu=Ka) + vs(var.1, Gu=Kd) + vs(var.2, Gu=AA),
rcov = ~units,
data = dt,
M = M,
gTerm = "u:var",
verbose = FALSE, date.warning = FALSE)
site_qtl_2D[[i]] = which(mod0$scores>-log10(0.05/m))
site_qtl_2D[[i]] = site_qtl_2D[[i]][-which(site_qtl_2D[[i]]==site_qtl[i])]
m_qtl_2D[i] = length(site_qtl_2D[[i]])
## add AA effect to fixed-effect fmla
if(m_qtl_2D[i]>0)
{
dt = cbind(dt, Ga[,site_qtl[[i]]]*Ga[,site_qtl_2D[[i]]])
xname = colnames(dt)[6:ncol(dt)] = paste0("F",1:(2*m_qtl+m_qtl_2D[i]))
fmla = reformulate(xname, "X1")
}
}
print(sum(m_qtl_2D))
print(site_qtl_2D)
} else {
Ka = A
Kd = D
fmla = reformulate("1","X1")
}
# Full Model
mod2[[c]] = mmer(fmla,
random = ~vs(var, Gu=Ka) + vs(var.1, Gu=Kd) + vs(var.2, Gu=AA),
rcov = ~units,
data = dt,
na.method.Y = "include", #impute y with the median value
verbose = FALSE, date.warning = FALSE)
# Breeding value
fix = data.frame(mod2[[c]]$fitted,row.names = rownames(dt))
BV = data.frame(fix[order(row.names(fix)),] +
mod2[[c]]$U$`u:var`$X1 +
mod2[[c]]$U$`u:var.1`$X1 +
mod2[[c]]$U$`u:var.2`$X1 ,
y[order(y$var),c(c,count+1)],
row.names = y[order(y$var),count+1])
pre = cor(BV[cv,1],BV[cv,2])^2
pre2 = c(pre2, pre)
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}7 (AE1) Models
7.1 1-GBLUP-gxe
t0=Sys.time()
print("============== GBLUP-gxe ===============")
# Additive relationship matrix
A = A.mat(Ga)
colnames(A) = rownames(A) = rownames(Ga)
E = diag(length(unique(y$env)))
rownames(E) = colnames(E) = unique(y$env)
EA = kronecker(E,A, make.dimnames = TRUE)
mod1 = list()
pre1 = c()
for(c in 1:count)
{
dt = y[,c(c,count+2,count+3)]
colnames(dt)[1] = "X1"
# sampling training population
cv = sample(rownames(dt), round(nrow(dt)/5))
dt[cv, 1] = NA
# Full Model1 (y = miu + Zu_g + Zu_e + Zu_ge + e)
mod1[[c]] = mmer(X1~1,
random = ~vs(var, Gu=A) + vs(env, Gu=E) + vs(env:var, Gu=EA),
rcov = ~units,
data = dt,
verbose = FALSE, date.warning = FALSE)
# Breeding value
BV = data.frame(rep(mod1[[c]]$Beta$Estimate, n*env) +
rep(mod1[[c]]$U$`u:var`$X1, env) +
rep(mod1[[c]]$U$`u:env`$X1, each = n) +
mod1[[c]]$U$`u:env:var`$X1,
y[order(y$env,y$var),c(c,count+1)])
pre = cor(BV[cv,1],BV[cv,2])^2
pre1 = c(pre1, pre)
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}7.2 2-MMIBLUP-gxe
t0=Sys.time()
print("============== GWAS + (GWAS-gxe) + MMIBLUP-gxe ===============")
mod2 = list()
pre2 = c()
mod3 = list()
pre3 = c()
pwr_g = c()
pwr_ge = c()
# Additive relationship matrix
A = A.mat(Ga)
colnames(A) = rownames(A) = rownames(Ga)
E = diag(length(unique(y$env)))
rownames(E) = colnames(E) = unique(y$env)
EA = kronecker(E,A, make.dimnames = TRUE)
for(c in 1:count)
{
#############################################################
# Form data.frame
dt = y[,c(c,count+2,count+3)] # Trait c, var, and env
colnames(dt)[1] = "X1"
# sampling training population
cv = sample(rownames(dt), round(nrow(dt)/5))
dt[cv, 1] = NA
##############################################################
# GWAS
mod0 = GWAS(X1~1,
random = ~ vs(var, Gu=A) + vs(env, Gu=E) + vs(ds(env), var, Gu=A),
rcov = ~units,
data = dt,
M = Ga,
gTerm = "u:var",
verbose = FALSE, date.warning = FALSE)
site_qtl = which(mod0$scores>-log10(0.05/m)) # bonferroni
print(site_qtl)
m_qtl = length(site_qtl)
pwr_g = c(pwr_g, round(length(which(site_qtl %in% site_l))/m_l,3))
###############################################################
# GWASgxe
site_env_qtl = c()
for(e in 1:env)
{
gterm = paste0(unique(y$env)[e],":var")
mod0 = GWAS(X1~1,
random = ~ vs(var, Gu=A) + vs(env, Gu=E) + vs(ds(env), var, Gu=A),
rcov = ~ units,
data = dt,
M = Ga,
gTerm = gterm,
verbose = FALSE, date.warning = FALSE)
site_env_qtl = c(site_env_qtl,which(mod0$scores>-log10(0.05/m))) # bonferroni
}
site_env_qtl = unique(site_env_qtl)
print(site_env_qtl)
m_env_qtl = length(site_env_qtl)
pwr_ge = c(pwr_ge, round(length(which(site_env_qtl %in% site_l_env))/m_l_env,3))
###############################################################
# reform variance matrix with GWAS-QTL (Ka)
# include major QTL in data.frame as fixed effects
if(m_qtl>0) {
# adjusted additive relationship matrix
Ka = A.mat(Ga[,-site_qtl])
colnames(Ka) = rownames(Ka) = rownames(Ga)
# include major effect SNP in data.frame dt
dt = cbind(dt, Ga[,site_qtl])
xname = colnames(dt)[4:ncol(dt)] = paste0("F",1:m_qtl)
# prepare fixed effect formula for sommer
fmla = reformulate(xname, "X1")
} else {
Ka = A
fmla = reformulate("1","X1")
}
###############################################################
# reform variance matrix with GWAS-gxe-QTL (EKa.m)
if(m_env_qtl>0){
if(m_env_qtl==1)
{
tmp = Ga[,site_env_qtl] %*% t(Ga[,site_env_qtl])
d = mean(diag(tmp))
Ka.m = tmp / d
}else{
Ka.m = A.mat(Ga[,site_env_qtl])
}
colnames(Ka.m) = rownames(Ka.m) = rownames(Ga)
EKa.m = kronecker(E,Ka.m, make.dimnames = TRUE)
}
else{
EKa.m = EA
}
###############################################################
# Full Model2 (y = miu + Xb(QTL) + Zu_g(-QTL) + Zu_e + Zu_ge + e)
mod2[[c]] = mmer(fmla,
random = ~vs(var, Gu=Ka) + vs(env, Gu=E) + vs(env:var, Gu=EA),
rcov = ~units,
data = dt,
verbose = FALSE, date.warning = FALSE)
fix = data.frame(rep(cbind(rep(1,n), Ga[,site_qtl]) %*% mod2[[c]]$Beta$Estimate, env),
row.names = rownames(dt))
BV = data.frame(fix[order(row.names(fix)),]+
rep(mod2[[c]]$U$`u:var`$X1, env) +
rep(mod2[[c]]$U$`u:env`$X1, each = n) +
mod2[[c]]$U$`u:env:var`$X1,
y[order(y$env,y$var),c(c,count+1)])
pre = cor(BV[cv,1],BV[cv,2])^2
pre2 = c(pre2, pre)
###############################################################
# Full Model3 (y = miu + Xb(QTL) + Zu_g(-QTL) + Zu_e + Zu_ge(gxe-QTL) + e)
mod3[[c]] = mmer(fmla,
random = ~vs(var, Gu=Ka) + vs(env, Gu=E) + vs(env:var, Gu=EKa.m),
rcov = ~units,
data = dt,
verbose = FALSE, date.warning = FALSE)
fix = data.frame(rep(cbind(rep(1,n), Ga[,site_qtl]) %*% mod3[[c]]$Beta$Estimate, env),
row.names = rownames(dt))
BV = data.frame(fix[order(row.names(fix)),]+
rep(mod3[[c]]$U$`u:var`$X1, env) +
rep(mod3[[c]]$U$`u:env`$X1, each = n) +
mod3[[c]]$U$`u:env:var`$X1,
y[order(y$env,y$var),c(c,count+1)])
pre = cor(BV[cv,1],BV[cv,2])^2
pre3 = c(pre3, pre)
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}8 (AE2) Models
8.1 1-GBLUP-gxe
t0=Sys.time()
print("============== GBLUP-gxe ===============")
# Additive relationship matrix
A = A.mat(Ga)
colnames(A) = rownames(A) = rownames(Ga)
E = diag(length(unique(y$env)))
rownames(E) = colnames(E) = unique(y$env)
EA = kronecker(E, A,make.dimnames = TRUE)
mod1 = list()
pre1 = matrix(NA, count, 1)
for(c in 1:count)
{
dt = y[,c(c,count+2,count+3)]
colnames(dt)[1] = "X1"
# sampling training population
cv = sample(rownames(dt), round(nrow(dt)/5))
dt[cv, 1] = NA
# Full Model1 (y = miu + Zu_g + Zu_e + Zu_ge + e)
mod1[[c]] = mmer(X1~1,
random = ~vs(var, Gu=A) + vs(env, Gu=E) + vs(env:var, Gu=EA),
rcov = ~units,
data = dt,
verbose = FALSE, date.warning = FALSE)
# Breeding value
BV = data.frame(rep(mod1[[c]]$Beta$Estimate, n*env) +
rep(mod1[[c]]$U$`u:var`$X1, env) +
rep(mod1[[c]]$U$`u:env`$X1, each = n) +
mod1[[c]]$U$`u:env:var`$X1,
y[order(y$env,y$var),c(c,count+1)])
pre1[c,1] = cor(BV[cv,1],BV[cv,2])^2
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}
colnames(pre1) = "GBLUP"8.2 2-MMIBLUP-gxe
t0=Sys.time()
print("============== GWAS + (GWAS-gxe) + MMIBLUP-gxe ===============")
mod2 = list()
pre2 = matrix(NA, count, 2)
pwr_g = c()
pwr_ge = c()
# Additive relationship matrix
A = A.mat(Ga)
colnames(A) = rownames(A) = rownames(Ga)
E = diag(length(unique(y$env)))
rownames(E) = colnames(E) = unique(y$env)
EA = kronecker(E, A, make.dimnames = TRUE)
for(c in 1:count)
{
#############################################################
# Form data.frame
dt = y[,c(c,count+2, count+2, count+3)] # Trait c, var, var.1, and env
colnames(dt)[1] = "X1"
# sampling training population
cv = sample(rownames(dt), round(nrow(dt)/5))
dt[cv, 1] = NA
##############################################################
# GWAS
mod0 = GWAS(X1~1,
random = ~ vs(var, Gu=A) + vs(env, Gu=E) + vs(ds(env), var, Gu=A),
rcov = ~units,
data = dt,
M = Ga,
gTerm = "u:var",
verbose = FALSE, date.warning = FALSE)
site_qtl = which(mod0$scores>-log10(0.05/m) & mod0$scores != Inf) # bonferroni
# print(site_qtl)
m_qtl = length(site_qtl)
pwr_g = c(pwr_g, round(length(which(site_qtl %in% site_l))/m_l,3)) # modified
###############################################################
# GWASgxe
site_env_qtl = c()
for(e in 1:3)
{
gterm = paste0(unique(y$env)[e],":var")
mod0 = GWAS(X1~1,
random = ~ vs(var, Gu=A) + vs(env, Gu=E) + vs(ds(env), var, Gu=A),
rcov = ~ units,
data = dt,
M = Ga,
gTerm = gterm,
verbose = FALSE, date.warning = FALSE)
site_env_qtl = c(site_env_qtl,which(mod0$scores>-log10(0.05/m) & mod0$scores != Inf)) # bonferroni
}
site_env_qtl = unique(site_env_qtl)
# Mgxe = kronecker(E, Ga, make.dimnames = T)
# mod0 = GWAS(X1~1,
# random = ~ vs(var, Gu=A) + vs(env, Gu=E) + vs(env:var, Gu=EA),
# rcov = ~ units,
# data = dt,
# M = Mgxe,
# gTerm = "u:env:var",
# verbose = FALSE, date.warning = FALSE)
# site_env_qtl = which(mod0$scores>-log10(0.05/m)) # modified m to m*env
# site_env_qtl = unique(ceiling(site_env_qtl/env))
# print(site_env_qtl)
m_env_qtl = length(site_env_qtl)
pwr_ge = c(pwr_ge, round(length(which(site_env_qtl %in% site_l_env))/m_l_env,3))
###############################################################
# reform variance matrix with GWAS-QTL (Ka)
# include major QTL in data.frame as fixed effects
if(m_qtl>0) {
# adjusted additive relationship matrix
Ka = A.mat(Ga[,-site_qtl])
colnames(Ka) = rownames(Ka) = rownames(Ga)
# include major effect SNP in data.frame dt
dt = cbind(dt, Ga[,site_qtl])
xname = colnames(dt)[5:ncol(dt)] = paste0("F",1:m_qtl)
# prepare fixed effect formula for sommer
fmla = reformulate(xname, "X1")
# for MMIBayesBA
Xfix = dt[,-c(1:4)]
X = kronecker(rep(1,env), Ga[,-site_qtl])
rownames(X) = rep(rownames(Ga),env)
colnames(X) = colnames(Ga[,-site_qtl])
} else {
Ka = A
fmla = reformulate("1","X1")
# for MMIBayesBA
Xfix = rep(1, nrow(y))
X = kronecker(rep(1,env), Ga)
rownames(X) = rep(rownames(Ga),env)
colnames(X) = colnames(Ga)
}
###############################################################
# reform variance matrix with GWAS-gxe-QTL
if(m_env_qtl>0){
# for MMBayesA
XE = kronecker(diag(1,env), Ga[,site_env_qtl])
rownames(XE) = rep(rownames(Ga),env)
colnames(XE) = paste0(rep(colnames(Ga[,site_env_qtl]),env),":env",rep(1:env,each=m_env_qtl))
# for MMBLUP
if(m_env_qtl==1)
{
tmp = Ga[,site_env_qtl] %*% t(Ga[,site_env_qtl])
d = mean(diag(tmp))
Ka_l_env = tmp / d
}else{
Ka_l_env = A.mat(Ga[,site_env_qtl])
}
Ka_s_env = A.mat(Ga[,-site_env_qtl])
colnames(Ka_l_env) = rownames(Ka_l_env) = rownames(Ga)
colnames(Ka_s_env) = rownames(Ka_s_env) = rownames(Ga)
KaE_l = kronecker(E, Ka_l_env, make.dimnames = TRUE)
KaE_s = kronecker(E, Ka_s_env, make.dimnames = TRUE)
}else{
XE = rep(1,nrow(dt))
}
###############################################################
# Full Model (y = miu + Xb(QTL) + Zu_g(-QTL) + Zu_e + Zu_ge(gxeQTL) + Zu_ge(-gxeQTL) + e)
if(m_env_qtl>0){
mod2[[c]] = mmer(fmla,
random = ~vs(var, Gu=Ka) + vs(env, Gu=E) + vs(env:var, Gu=KaE_l) ,
rcov = ~units,
data = dt,
verbose = FALSE, date.warning = FALSE)
fix = data.frame(rep(cbind(rep(1,n), Ga[,site_qtl]) %*% mod2[[c]]$Beta$Estimate, env),
row.names = rownames(dt))
BV = data.frame(fix[order(row.names(fix)),]+
rep(mod2[[c]]$U$`u:var`$X1, env) +
rep(mod2[[c]]$U$`u:env`$X1, each = n) +
mod2[[c]]$U$`u:env:var`$X1 ,
y[order(y$env,y$var),c(c,count+1)])
pre2[c,1] = cor(BV[cv,1],BV[cv,2])^2
}else{
mod2[[c]] = mmer(fmla,
random = ~vs(var, Gu=Ka) + vs(env, Gu=E)+ vs(env:var, Gu=EA),
rcov = ~units,
data = dt,
verbose = FALSE, date.warning = FALSE)
fix = data.frame(rep(cbind(rep(1,n), Ga[,site_qtl]) %*% mod2[[c]]$Beta$Estimate, env),
row.names = rownames(dt))
BV = data.frame(fix[order(row.names(fix)),]+
rep(mod2[[c]]$U$`u:var`$X1, env) +
rep(mod2[[c]]$U$`u:env`$X1, each = n) +
mod2[[c]]$U$`u:env:var`$X1 ,
y[order(y$env,y$var),c(c,count+1)])
pre2[c,1] = cor(BV[cv,1],BV[cv,2])^2
}
###############################################################
# MMIBayesBA
nIter=6000; burnIn=1000
fmMM = BGLR(y=dt[,1],ETA=list(list(X=Xfix, model="FIXED"), list(X=X, model="BayesB"), list(X=XE, model="BayesA")),
nIter=nIter,burnIn=burnIn, verbose = F)
yhat=matrix(fmMM$yHat, length(fmMM$yHat),1)
rownames(yhat) = rownames(y)
pre2[c,2] = cor(y[cv,c], yhat[cv,1])^2
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}
colnames(pre2) = c("MMIBLUP","MMIBayesBA")8.3 3-Bayes-gxe
t0=Sys.time()
print("============== Bayes Alphabet ===============")
pre3 = matrix(NA, count, 1)
for(c in 1:count)
{
y0 = y[,c]
cv = sample(1:length(y0), round(length(y0)/5))
y0[cv] = NA
X = cbind(kronecker(rep(1,env), Ga),kronecker(diag(1,env), Ga))
nIter=6000; burnIn=1000
fmBA=BGLR(y=y0,ETA=list( list(X=X,model='BayesA')),
nIter=nIter,burnIn=burnIn, verbose = F)
pre3[c,1]=cor(y[cv,c], fmBA$yHat[cv])^2
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}
colnames(pre3) = c("BayesB")9 (ADE) Models
9.1 1-GBLUP-ade
t0=Sys.time()
print("============== GBLUP-ade ===============")
# relationship matrix
A = A.mat(Ga)
colnames(A) = rownames(A) = rownames(Ga)
D = D.mat(Ga)
colnames(D) = rownames(D) = rownames(Ga)
E = diag(length(unique(y$env)))
rownames(E) = colnames(E) = unique(y$env)
EA = kronecker(E,A, make.dimnames = TRUE)
ED = kronecker(E,D, make.dimnames = TRUE)
mod1 = list()
pre1 = c()
for(c in 1:count)
{
dt = y[,c(c,count+2,count+2, count+3)]
colnames(dt)[1] = "X1"
# sampling training population
cv = sample(rownames(dt), round(nrow(dt)/5))
dt[cv, 1] = NA
# Full Model1 (y = miu + Zu_a + Zu_d + Zu_e + Zu_ae + Zu_de + e)
mod1[[c]] = mmer(X1~1,
random = ~vs(var, Gu=A) + vs(var.1, Gu=D) + vs(env, Gu=E) + vs(env:var, Gu=EA) + vs(env:var.1, Gu=ED),
rcov = ~units,
data = dt,
verbose = FALSE, date.warning = FALSE)
# Breeding value
BV = data.frame(rep(mod1[[c]]$Beta$Estimate, n*env) +
rep(mod1[[c]]$U$`u:var`$X1, env) +
rep(mod1[[c]]$U$`u:var.1`$X1, env) +
rep(mod1[[c]]$U$`u:env`$X1, each = n) +
mod1[[c]]$U$`u:env:var`$X1+
mod1[[c]]$U$`u:env:var.1`$X1,
y[order(y$env,y$var),c(c,count+1)])
pre = cor(BV[cv,1],BV[cv,2])^2
pre1 = c(pre1, pre)
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}9.2 2-MMIBLUP-ade
t0=Sys.time()
print("============== GWAS + (GWAS-gxe) + MMIBLUP-ade ===============")
# relationship matrix
A = A.mat(Ga)
colnames(A) = rownames(A) = rownames(Ga)
D = D.mat(Ga)
colnames(D) = rownames(D) = rownames(Ga)
E = diag(length(unique(y$env)))
rownames(E) = colnames(E) = unique(y$env)
EA = kronecker(E,A, make.dimnames = TRUE)
ED = kronecker(E,D, make.dimnames = TRUE)
mod2 = list()
pre2 = c()
mod3 = list()
pre3 = c()
pwr_g = c()
pwr_ge = c()
for(c in 1:count)
{
#############################################################
# Form data.frame
dt = y[,c(c,count+2,count+2,count+3)] # Trait c, var, var.1, and env
colnames(dt)[1] = "X1"
# sampling training population
cv = sample(rownames(dt), round(nrow(dt)/5))
dt[cv, 1] = NA
##############################################################
# GWAS
mod0 = GWAS(X1~1,
random = ~vs(var, Gu=A) + vs(var.1, Gu=D) + vs(env, Gu=E) + vs(env:var, Gu=EA) + vs(env:var.1, Gu=ED),
rcov = ~units,
data = dt,
M = Ga,
gTerm = "u:var",
verbose = FALSE, date.warning = FALSE)
site_qtl = which(mod0$scores>-log10(0.05/m)) # bonferroni
print(site_qtl)
m_qtl = length(site_qtl)
pwr_g = c(pwr_g, round(length(which(site_qtl %in% site_l))/m_l,3))
###############################################################
# GWAS for gxe interaction
site_env_qtl = c()
for(e in 1:env)
{
gterm = paste0(unique(y$env)[e],":var")
mod0 = GWAS(X1~1,
random = ~ vs(var, Gu=A) + vs(var.1, Gu=D) + vs(env, Gu=E) + vs(ds(env),var, Gu=A) + vs(env:var.1, Gu=ED),
rcov = ~ units,
data = dt,
M = Ga,
gTerm = gterm,
verbose = FALSE, date.warning = FALSE)
site_env_qtl = c(site_env_qtl,which(mod0$scores>-log10(0.05/m))) # bonferroni
}
site_env_qtl = unique(site_env_qtl)
print(site_env_qtl)
m_env_qtl = length(site_env_qtl)
pwr_ge = c(pwr_ge, round(length(which(site_env_qtl %in% site_l_env))/m_l_env,3))
###############################################################
# reform variance matrix with GWAS-QTL (Ka)
# include major QTL in data.frame as fixed effects
if(m_qtl>0) {
# adjusted relationship matrix
Ka = A.mat(Ga[,-site_qtl])
colnames(Ka) = rownames(Ka) = rownames(Ga)
Kd = D.mat(Ga[,-site_qtl])
colnames(Kd) = rownames(Kd) = rownames(Ga)
# include major effect SNP in data.frame dt
dt = cbind(dt, Ga[,site_qtl], Gd[,site_qtl])
xname = colnames(dt)[5:ncol(dt)] = paste0("F",1:(2*m_qtl))
# prepare fixed effect formula for sommer
fmla = reformulate(xname, "X1")
} else {
Ka = A
Kd = D
fmla = reformulate("1","X1")
}
###############################################################
# reform variance matrix with GWAS-gxe-QTL (EKa.m)
if(m_env_qtl>0){
if(m_env_qtl==1)
{
tmp = Ga[,site_env_qtl] %*% t(Ga[,site_env_qtl])
d = mean(diag(tmp))
Ka.m = tmp / d
tmp = Gd[,site_env_qtl] %*% t(Gd[,site_env_qtl])
d = mean(diag(tmp))
Kd.m = tmp / d
}else{
Ka.m = A.mat(Ga[,site_env_qtl])
Kd.m = D.mat(Ga[,site_env_qtl])
}
colnames(Ka.m) = rownames(Ka.m) = rownames(Ga)
colnames(Kd.m) = rownames(Kd.m) = rownames(Ga)
EKa.m = kronecker(E,Ka.m, make.dimnames = TRUE)
EKd.m = kronecker(E,Kd.m, make.dimnames = TRUE)
}
else{
EKa.m = EA
EKd.m = ED
}
###############################################################
# Full Model2 (y = miu + Xb(QTL) + Zu_g(-QTL) + Zu_e + Zu_ge + e)
mod2[[c]] = mmer(fmla,
random = ~vs(var, Gu=Ka) + vs(var.1, Gu=Kd) + vs(env, Gu=E) + vs(env:var, Gu=EA) + vs(env:var.1, Gu=ED),
rcov = ~units,
data = dt,
verbose = FALSE, date.warning = FALSE)
fix = data.frame(rep(cbind(rep(1,n), Ga[,site_qtl], Gd[, site_qtl]) %*% mod2[[c]]$Beta$Estimate, env),
row.names = rownames(dt))
BV = data.frame(fix[order(row.names(fix)),] +
rep(mod2[[c]]$U$`u:var`$X1, env) +
rep(mod2[[c]]$U$`u:var.1`$X1, env) +
rep(mod2[[c]]$U$`u:env`$X1, each = n) +
mod2[[c]]$U$`u:env:var`$X1 +
mod2[[c]]$U$`u:env:var.1`$X1,
y[order(y$env,y$var),c(c,count+1)])
pre = cor(BV[cv,1],BV[cv,2])^2
pre2 = c(pre2, pre)
###############################################################
# Full Model3 (y = miu + Xb(QTL) + Zu_g(-QTL) + Zu_e + Zu_ge(gxe-QTL) + e)
mod3[[c]] = mmer(fmla,
random = ~vs(var, Gu=Ka)+ vs(var.1, Gu=Kd) + vs(env, Gu=E) + vs(env:var, Gu=EKa.m) + vs(env:var.1, Gu=EKd.m),
rcov = ~units,
data = dt,
verbose = FALSE, date.warning = FALSE)
fix = data.frame(rep(cbind(rep(1,n), Ga[,site_qtl], Gd[,site_qtl]) %*% mod3[[c]]$Beta$Estimate, env),
row.names = rownames(dt))
BV = data.frame(fix[order(row.names(fix)),]+
rep(mod3[[c]]$U$`u:var`$X1, env) +
rep(mod3[[c]]$U$`u:var.1`$X1, env) +
rep(mod3[[c]]$U$`u:env`$X1, each = n) +
mod3[[c]]$U$`u:env:var`$X1 +
mod3[[c]]$U$`u:env:var.1`$X1,
y[order(y$env,y$var),c(c,count+1)])
pre = cor(BV[cv,1],BV[cv,2])^2
pre3 = c(pre3, pre)
t1=Sys.time()
print(paste("Repeat Time:",c,"Collapse Time:"))
print(t1-t0)
}#shell
<!-- ADAA -->
#!/bin/bash
h2=(0.2 0.5 0.8)
m=3000
n=(500 1000 1500 2000 2500)
for i in ${h2[@]}
do
{
for j in ${n[@]}
do
{
nohup Rscript GSAddDomEpi.R $m $j $i > adaa.$m.$j.$i.log 2>&1
} &
done
}
done
<!-- AE -->
#!/bin/bash
h2=(0.2 0.5 0.8)
m=3000
n=(500 1000 1500)
for i in ${h2[@]}
do
{
for j in ${n[@]}
do
{
nohup Rscript GSAddDomEpi.R $m $j $i > adaa.$m.$j.$i.log 2>&1
} &
done
}
done#Plot ## Boxplot1
setwd("/Users/apple/Desktop/GS_R_code/1-Rdata")
## read .Rdata
count=20
df0 = data.frame()
for (s in 1:4){
for (h2 in c(0.2,0.5,0.8)){
f = paste0("A_S",s,"_h2_",h2,"_plus.RData")
load(file = f)
# row = dim(df)[1]
# df = data.frame(Predictability = c(df[,1],df[,2]), Scenario = s, h2 = h2,
# Model = c(rep("GBLUP-adaa", row),rep("MMIBLUP-adaa", row)))
# df.p = data.frame(Power=c(rep(NA,row),df[,4],df[,5]), Scenario = s, h2 = h2,
# Model = c(rep("GBLUPgxe", row),rep("GWAS", row),rep("GWASgxe", row)))
df = data.frame(Rsq = as.vector(as.matrix(df)), Scenario = s, h2 = as.factor(h2),
Model = rep(colnames(df), each=count))
df0 = rbind(df0, df)
}
}
df0$h2 = as.factor(df0$h2)
df0$Scenario = as.factor(df0$Scenario)
df0$Model[which(df0$Model=="MMIBLUP")] = "mmGBLUP"
df0$Model[which(df0$Model=="MMIBayesA")] = "mmBayesA"
df0$Model[which(df0$Model=="MMIBayesB")] = "mmBayesB"
df0$Model = factor(df0$Model,
levels = c("GBLUP", "mmGBLUP", "BayesRR", "BayesC" ,"BayesL", "BayesA", "BayesB",
"mmBayesA", "mmBayesB"),
ordered = TRUE)
# by scenario
ggplot(data = df0, aes(x = h2, y = Rsq, fill = Model)) +
geom_boxplot() +
scale_y_continuous(name = "Accuracy", limits = c(0,1)) +
scale_x_discrete(name = expression(h^{2})) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"), axis.title = element_text(face = "bold")) +
scale_fill_lancet() +
facet_wrap(~Scenario, ncol = 2,labeller = label_both)
ggsave(p, filename = "Simulation.tiff",width = 10, height = 10)
# by h2
p = ggplot(data = df0, aes(x = Scenario, y = Predictability, fill = Model)) +
geom_boxplot() +
scale_y_continuous(name = "Predictability", limits = c(0,1)) +
scale_x_discrete(name = "Scenario") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"), axis.title = element_text(face = "bold")) +
scale_fill_lancet() +
facet_wrap(~h2, labeller = label_both)
ggsave(p, filename = "Fig2-Simu-AD.tiff",width = 15, height = 5)
# power
p = ggplot(data = df0, aes(x = Scenario, y = Power, fill = Model)) +
geom_boxplot() +
scale_y_continuous(name = "Power", limits = c(0,1)) +
scale_x_discrete(name = "Simulated variance for large gxe effects") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"), axis.title = element_text(face = "bold")) +
scale_fill_lancet() +
facet_wrap(~h2, labeller = label_both)
ggsave(p, filename = "Fig2-Simu-gxe-power.tiff",width = 15, height = 5)9.3 Boxplot2
- a demo
df0 = data.frame()
df = data.frame(pre3, pre1, pre2)
df = data.frame(Rsq = as.vector(as.matrix(df)), Scenario = s, h2 = as.factor(h2),
Model = rep(colnames(df), each=count))
df0 = rbind(df0, df)
df0$h2 = as.factor(df0$h2)
df0$Scenario = as.factor(df0$Scenario)
ggplot(data = df0, aes(x = h2, y = Rsq, fill = Model)) +
geom_boxplot() +
scale_y_continuous(name = "Predictability", limits = c(0,1)) +
scale_x_discrete(name = "h2") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"), axis.title = element_text(face = "bold")) +
scale_fill_lancet() +
facet_wrap(~Scenario, labeller = label_both, ncol = 2)
ggplot(data = df0, aes(x = Scenario, y = Rsq, fill = Model)) +
geom_boxplot() +
scale_y_continuous(name = "Predictability", limits = c(0,1)) +
scale_x_discrete(name = "Scenario") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"), axis.title = element_text(face = "bold")) +
scale_fill_lancet() +
facet_wrap(~h2, labeller = label_both, ncol = 4)9.4 AE-Boxplot-acc-tim-pwr
###################### Plot ########################
setwd("/Users/apple/Desktop/GS_R_code/1-Rdata")
## read .Rdata
m=300
count=20
acc0 = data.frame()
tim0 = data.frame()
pwr0 = data.frame()
for (n in c(100,200,300)){
for (h2 in c(0.2,0.5,0.8)){
f = paste0("AddEnv_m_",m,"_n_",n,"_h2_",h2,".RData")
load(file = f)
acc0 = rbind(acc0, acc)
tim0 = rbind(tim0, tim)
pwr0 = rbind(pwr0, pwr)
}
}
acc0$model = as.character(acc0$model)
tim0$model = as.character(tim0$model)
acc0$model[which(acc0$model=="GBLUP-gxe")] = "GBLUP-ae"
tim0$model[which(tim0$model=="GBLUP-gxe")] = "GBLUP-ae"
acc0$model[which(acc0$model=="MMIBLUP-gxe")] = "mmGBLUP-ae"
tim0$model[which(tim0$model=="MMIBLUP-gxe")] = "mmGBLUP-ae"
acc0$model[which(acc0$model=="BayesB-gxe")] = "BayesB-ae"
tim0$model[which(tim0$model=="BayesB-gxe")] = "BayesB-ae"
acc0$model[which(acc0$model=="MMIBayesB-gxe")] = "mmBayesB-ae"
tim0$model[which(tim0$model=="MMIBayesB-gxe")] = "mmBayesB-ae"
# AE
acc0$model = factor(acc0$model,
levels = c("GBLUP-ae", "mmGBLUP-ae", "BayesB-ae", "mmBayesB-ae"),
ordered = TRUE)
tim0$model = factor(tim0$model,
levels = c("GBLUP-ae", "mmGBLUP-ae", "BayesB-ae", "mmBayesB-ae"),
ordered = TRUE)
## 1-acc
ggplot(data = acc0, aes(x = h2, y = acc, fill = model)) +
geom_boxplot() +
scale_y_continuous(name = "Accuracy", limits = c(0,1)) +
scale_x_discrete(name = "Heritability") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"), axis.title = element_text(face = "bold")) +
scale_fill_lancet()+
facet_wrap(~n, labeller = label_both)
## 2-tim
ggplot(data = tim0, aes(x = n, y = time, group = model, color = model)) +
geom_point() +
geom_line() +
scale_y_continuous(name = "CPU Time (secs)") +
scale_x_discrete(name = "Training Pop Size") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"), axis.title = element_text(face = "bold")) +
scale_color_lancet()+
facet_wrap(~h2, labeller = label_both)
## 3-pwr
ggplot(data = pwr0, aes(x = h2, y = pwr, color = h2)) +
geom_jitter(width = 0.1) +
scale_y_continuous(name = "Power", limits = c(0,1)) +
scale_x_discrete(name = "model") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"), axis.title = element_text(face = "bold")) +
scale_color_futurama() +
facet_grid(model~n, labeller = label_both)9.5 ADAA-Boxplot-acc-tim-pwr
###################### Plot ########################
setwd("/Users/apple/Desktop/GS_R_code/1-Rdata")
## read .Rdata
m=3000
count=20
acc0 = data.frame()
tim0 = data.frame()
pwr0 = data.frame()
for (n in seq(500, 2500, 500) ){
for (h2 in c(0.2,0.5,0.8)){
f = paste0("ADAAnew_m_",m,"_n_",n,"_h2_",h2,".RData")
load(file = f)
acc0 = rbind(acc0, acc)
tim0 = rbind(tim0, tim)
pwr0 = rbind(pwr0, pwr)
}
}
acc0$model = as.character(acc0$model)
tim0$model = as.character(tim0$model)
acc0$model[which(acc0$model=="MMIBLUP-adaa")] = "mmGBLUP-adaa"
tim0$model[which(tim0$model=="MMIBLUP-adaa")] = "mmGBLUP-adaa"
acc0$model[which(acc0$model=="MMIBayesB-adaa")] = "mmBayesB-adaa"
tim0$model[which(tim0$model=="MMIBayesB-adaa")] = "mmBayesB-adaa"
# ADAA
acc0$model = factor(acc0$model,
levels = c("GBLUP-adaa", "mmGBLUP-adaa", "BayesB-adaa", "mmBayesB-adaa"),
ordered = TRUE)
tim0$model = factor(tim0$model,
levels = c("GBLUP-adaa", "mmGBLUP-adaa", "BayesB-adaa", "mmBayesB-adaa"),
ordered = TRUE)
## 1-acc
ggplot(data = acc0, aes(x = h2, y = acc, fill = model)) +
geom_boxplot() +
scale_y_continuous(name = "Accuracy", limits = c(0,1)) +
scale_x_discrete(name = "Heritability") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"), axis.title = element_text(face = "bold")) +
scale_fill_lancet()+
facet_wrap(~n, labeller = label_both)
## 2-tim
ggplot(data = tim0, aes(x = n, y = time, group = model, color = model)) +
geom_point() +
geom_line() +
scale_y_continuous(name = "CPU Time (secs)") +
scale_x_discrete(name = "Training Pop Size") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"), axis.title = element_text(face = "bold")) +
scale_color_lancet()+
facet_wrap(~h2, labeller = label_both)
## 3-pwr
ggplot(data = pwr0, aes(x = h2, y = pwr, color = h2, fill = h2)) +
geom_boxplot(width = 0.5) +
scale_y_continuous(name = "Power", limits = c(-0.05,0.4)) +
scale_x_discrete(name = "Heritability") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"), axis.title = element_text(face = "bold")) +
scale_color_aaas() +
scale_fill_aaas(alpha = 0.5) +
facet_grid(model~n, labeller = label_both)10 Me calculation
srm = t(Ga) %*% Ga / n # snp relationship matrix
grm = Ga %*% t(Ga) / m # genetic relationship matrix
# load("E:/ZQX/GS/Simu.RData")
# snp_cor_mtr = cor2(data, nproc = 15) # lower-diagonal 3000*2999/2
# ind_cor_mtr = cor2(as.data.frame(t(data)), nproc = 15) # lower-diagonal 2000*1999/2
me1 = m*m/sum(srm^2)
#me1 = m*m/(2*sum(snp_cor_mtr^2)+m)
E_r1 = h2/(1 + me1/(n*h2))
me2 = 1/var(grm[lower.tri(grm)])
#me2 = 1/var(ind_cor_mtr)
E_r2 = h2/(1 + me2/(n*h2))