数据
表型数据
来源于两个部分:
- 常规表型:来自版本号为UKB40268 (获取时间应该是2020年,比较老)的81个重要的数量性状,与齐国安计算遗传力的性状保持一致
## # A tibble: 6 × 2
## Field_ID Field
## <chr> <chr>
## 1 21021 Pulse wave Arterial Stiffness index
## 2 23099 Body fat percentage
## 3 23102 Whole body water mass
## 4 23106 Impedance of whole body
## 5 23107 Impedance of leg (right)
## 6 23108 Impedance of leg (left)
- 代谢表型:来自版本号为UKB674363 (获取时间应该是2023年,比较新)的251个代谢性状,与钱雨做代谢UKC的性状保持一致
met_info<-read.table("/disk195/zz/HE_GE/miss_number_metablism.txt",h=T,sep="\t")
head(met_info[,2,drop=FALSE])## V2
## 1 3-Hydroxybutyrate
## 2 Acetate
## 3 Acetoacetate
## 4 Acetone
## 5 Alanine
## 6 Albumin
基因型数据
也分为两个部分: + UKB中的Unrelated British样本,个体数为292223,SNP数量为784256
urltd_fam<-read.table("/disk201/zzy/1_GenomicSelection/2-Phenotype/4.UKB/ukb/british/unrelated_british/urltd_British.fam",h=F)
dim(urltd_fam)## [1] 292223 6
urltd_bim<-fread("/disk201/zzy/1_GenomicSelection/2-Phenotype/4.UKB/ukb/british/unrelated_british/urltd_British.bim",h=F)
dim(urltd_bim)## [1] 784256 6
- UKB中的Sib pair样本,来自家系18281,个体数为36562,位点数为784256
sib_fam<-read.table("/disk201/zzy/1_GenomicSelection/2-Phenotype/4.UKB/ukb/british/ukbSib/British.2fullSib.newFID.fam",h=F)
data.frame(Family=length(unique(sib_fam$V1)),
Ind=length(unique(sib_fam$V2)))## Family Ind
## 1 18281 36562
sib_bim<-read.table("/disk201/zzy/1_GenomicSelection/2-Phenotype/4.UKB/ukb/british/ukbSib/British.2fullSib.newFID.bim",h=F)
dim(urltd_bim)## [1] 784256 6
比较奇怪的是Unrelated个体与Sib个体之间存在53个overlap
## [1] 53
分析内容
基于Sib数据的基因型计算位点层面的IBS
程序为/disk195/zz/HE_GE/01_snp-wise_ibs.R,对于某一个位点\(k\),同一家系的两个个体\(i\)与个体\(j\)之间的IBS计算公式为 \[
\text{IBS}_{ijk} = \frac{(x_{ik}-2p_i)(x_{jk}-2p_i)}{2p_i(1-p_i)}
\tag{1}
\]
根据上式,首先根据Sib群体的基因型计算每个位点的等位基因频率,然后针对每个家系分别计算Sib之间的所有位点的IBS,核心代码如下:
# 先用plink的freqx命令计算基因型频率,之后用公式p=A+1/2H计算等位基因频率
cmd<-str_glue("{plink} --bfile {gen_dir}/British.2fullSib.newFID --freqx --out {out_dir}/sib")
system(cmd)
frq<-fread(str_glue("{out_dir}/sib.frqx"))
p<-frq$`C(HOM A2)`/(frq$`C(HOM A1)`+frq$`C(HET)`+frq$`C(HOM A2)`) +
0.5*(frq$`C(HET)`/(frq$`C(HOM A1)`+frq$`C(HET)`+frq$`C(HOM A2)`))
# 之后拆分每个家系,并用下面的命令计算计算位点层面的IBS
worker<-function(famid,out_dir,plink2,gen_dir){
#famid<-1
out_dir2<-str_glue("{out_dir}/ibs/fam{famid}")
dir.create(out_dir2,showWarnings = F,recursive = T)
write.table(famid,file=str_glue("{out_dir2}/ind"),
row.names=F,col.names = F,quote=F)
cmd<-str_glue("{plink2} --bfile {gen_dir}/British.2fullSib.newFID --keep-fam {out_dir2}/ind --export vcf 'vcf-dosage=DS-only' --out {out_dir2}/geno")
system(cmd)
geno<-fread(str_glue("{out_dir2}/geno.vcf"))
a<-2-as.numeric(geno[[names(geno)[10]]])
b<-2-as.numeric(geno[[names(geno)[11]]])
pq<-2*p*(1-p)
ibs<-data.frame(IBS=(a-2*p)*(b-2*p)/pq)
fwrite(ibs,file=str_glue("{out_dir2}/ibs"),row.names=F,
col.names=F,quote=F,sep="\t",na="NA")
}当然,对每对sib-pair的所有位点的IBS取均值就可以得到两个个体之间的平均IBS,整个sib群体的平均IBS分布如下
knitr::opts_chunk$set(dpi=300)
ibs_mean<-read.table("/disk224/zz/HE_GE/ukb_sib_geno/ibs_mean.txt",h=T)
male_id<-read.table("/disk224/zz/HE_GE/he_herit_sexGroup/ysq_male.txt",h=T,check.names = F)[,1]
female_id<-read.table("/disk224/zz/HE_GE/he_herit_sexGroup/ysq_female.txt",h=T,check.names = F)[,1]
mix_id<-read.table("/disk224/zz/HE_GE/he_herit_sexGroup/ysq_mix.txt",h=T,check.names = F)[,1]
add_id<-read.table("/disk224/zz/HE_GE/he_herit_sexGroup/ysq_add.txt",h=T,check.names = F)[,1]
p1<-ggplot(ibs_mean, aes(x=IBS))+
geom_density(color="darkblue", fill="lightblue")+
geom_vline(aes(xintercept=mean(IBS)),
color="blue", linetype="dashed", linewidth=1)+
theme_classic() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
#legend.title = element_blank(),
plot.background = element_rect(fill ='white', color ='white'),
# axis.text.x = element_blank(),
axis.text.y = element_text(size=10,color="black"),
axis.text.x = element_text(size=10,color="black"),
text = element_text(size=10,color="black"),
legend.position = "bottom"
)+ggtitle("All")
p2<-ggplot(ibs_mean[ibs_mean$FAM%in%male_id,], aes(x=IBS))+
geom_density(color="darkblue", fill="lightblue")+
geom_vline(aes(xintercept=mean(IBS)),
color="blue", linetype="dashed", linewidth=1)+
theme_classic() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
#legend.title = element_blank(),
plot.background = element_rect(fill ='white', color ='white'),
# axis.text.x = element_blank(),
axis.text.y = element_text(size=10,color="black"),
axis.text.x = element_text(size=10,color="black"),
text = element_text(size=10,color="black"),
legend.position = "bottom"
)+ggtitle("Brothers")
p3<-ggplot(ibs_mean[ibs_mean$FAM%in%female_id,], aes(x=IBS))+
geom_density(color="darkblue", fill="lightblue")+
geom_vline(aes(xintercept=mean(IBS)),
color="blue", linetype="dashed", linewidth=1)+
theme_classic() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
#legend.title = element_blank(),
plot.background = element_rect(fill ='white', color ='white'),
# axis.text.x = element_blank(),
axis.text.y = element_text(size=10,color="black"),
axis.text.x = element_text(size=10,color="black"),
text = element_text(size=10,color="black"),
legend.position = "bottom"
)+ggtitle("Sisters")
p4<-ggplot(ibs_mean[ibs_mean$FAM%in%mix_id,], aes(x=IBS))+
geom_density(color="darkblue", fill="lightblue")+
geom_vline(aes(xintercept=mean(IBS)),
color="blue", linetype="dashed", linewidth=1)+
theme_classic() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
#legend.title = element_blank(),
plot.background = element_rect(fill ='white', color ='white'),
# axis.text.x = element_blank(),
axis.text.y = element_text(size=10,color="black"),
axis.text.x = element_text(size=10,color="black"),
text = element_text(size=10,color="black"),
legend.position = "bottom"
)+ggtitle("Mix")
p5<-ggplot(ibs_mean[ibs_mean$FAM%in%add_id,], aes(x=IBS))+
geom_density(color="darkblue", fill="lightblue")+
geom_vline(aes(xintercept=mean(IBS)),
color="blue", linetype="dashed", linewidth=1)+
theme_classic() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
#legend.title = element_blank(),
plot.background = element_rect(fill ='white', color ='white'),
# axis.text.x = element_blank(),
axis.text.y = element_text(size=10,color="black"),
axis.text.x = element_text(size=10,color="black"),
text = element_text(size=10,color="black"),
legend.position = "bottom"
)+ggtitle("Brothers+Sisters")
ggarrange(p1,p2,p3,p4,p5,nrow=3,ncol=2)计算IBS的方差,并取倒数:
- 所有家系:
## [1] 585.609
- 兄弟家系:
## [1] 630.4095
- 姐妹家系:
## [1] 573.5377
- 混合家系:
## [1] 577.3346
- 兄弟+姐妹家系:
## [1] 592.6263
基于sib的IBS,利用HE方法计算81个传统性状的遗传力
对于Sib的遗传力,首先利用Sib的表型信息校正掉三个主成分以及性别的固定效应,并对残差进行标准化,然后分别获取Sib-pair的性状差值的平方以及Sib-pair表型值的乘积两类表型,具体代码如下:
# 读入主成分、性别、家系等信息
gen_dir<-"/disk201/zzy/1_GenomicSelection/2-Phenotype/4.UKB/ukb/british/ukbSib"
fam<-read.table(str_glue("{gen_dir}/British.2fullSib.newFID.fam"),h=F)
pc_dir<-"/disk224/zz/HE_GE/reml_herit"
pcs<-read.table(str_glue("{pc_dir}/sib.eigenvec"),h=F)
dat<-cbind(fam[,1:5],pcs[,3:5])
colnames(dat)<-c("FAMID","ID","SIRE","DAM","SEX","PC1",
"PC2","PC3")
dat$SEX<-as.factor(dat$SEX)
# 利用LM校正掉固定效应的影响,然后进行标准化
y<-map_dfc(1:ncol(phe),~{
dat$TRT<-phe[,.x]
dat2<-dat[complete.cases(dat),]
if(length(unique(dat2$SEX))<2){
mod<-lm(TRT~PC1+PC2+PC3-1,data=dat)
}else{
mod<-lm(TRT~SEX+PC1+PC2+PC3-1,data=dat)
}
resid<-residuals(mod)
resid<-resid[match(1:nrow(dat), names(resid))]
res<-data.frame(PHE=scale(resid))
colnames(res)<-colnames(phe)[.x]
return(res)
})
y<-cbind(dat[,1:2],y)
# 按照家系计算表性差方与表型乘积
y2<-y %>%
group_split(FAMID)
ysq<-map_dfr(y2,~{
tmp<-.x[,3:ncol(.x)]
res<-data.frame(FAMID=.x[1,1])
ysq<-(tmp[1,]-tmp[2,])^2
res<-cbind(res,ysq)
return(res)
})
yprod<-map_dfr(y2,~{
tmp<-.x[,3:ncol(.x)]
res<-data.frame(FAMID=.x[1,1])
ysq<-tmp[1,]*tmp[2,]
res<-cbind(res,ysq)
return(res)
})
# 输出到计算机
ibs_dir<-"/disk224/zz/HE_GE/ukb_sib_geno"
ibs_mean<-read.table(str_glue("{ibs_dir}/ibs_mean.txt"),h=T)
ysq<-ysq[match(ibs_mean$FAM,ysq$FAMID),]
yprod<-yprod[match(ibs_mean$FAM,yprod$FAMID),]
write.table(ysq,file=str_glue("{out_dir}/ysq.txt"),row.names = F,
col.names = F,quote = F,sep = "\t")
write.table(yprod,file=str_glue("{out_dir}/yprod.txt"),row.names = F,
col.names = F,quote = F,sep = "\t")之后使用线性模型计算遗传力,代码为:
knitr::opts_chunk$set(dpi=300)
# calculate heritability
ibs_dir<-"/disk224/zz/HE_GE/ukb_sib_geno"
ibs_mean<-read.table(str_glue("{ibs_dir}/ibs_mean.txt"),h=T)
ysq<-read.table("/disk224/zz/HE_GE/he_herit/ysq.txt",h=T,sep="\t",check.names = F)
yprod<-read.table("/disk224/zz/HE_GE/he_herit/yprod.txt",h=T,sep="\t",check.names = F)
h2_ysq<-map_dfr(2:ncol(ysq),function(i){
mod<-lm(ysq[,i]~ibs_mean$IBS)
res<-summary(mod)$coefficients
res<-data.frame(TRT=colnames(ysq)[i],
h2=-res[2,1]/2,se=res[2,2],pval=res[2,4])
})
h2_prod<-map_dfr(2:ncol(yprod),function(i){
mod<-lm(yprod[,i]~ibs_mean$IBS)
res<-summary(mod)$coefficients
res<-data.frame(TRT=colnames(yprod)[i],
h2=res[2,1],se=res[2,2],pval=res[2,4])
})再读取利用RHE计算的无关个体的遗传力,然后利用散点图进行对比
knitr::opts_chunk$set(dpi=300)
phe_info<-read_xlsx("/disk195/zz/HE_GE/VC.RHE.202406.split2.xlsx")
codes<-unique(phe_info$Field_ID)
# h2_rand_dir<-"/disk224/zz/HE_GE/he_herit_rand"
# h2_rand<-map_dfr(codes,~{
# h2<-read.table(str_glue("{h2_rand_dir}/{.x}/rand.heReg"),skip = 1,h=F,sep="\t",fill=T)
# return(data.frame(ID=.x,
# h2=as.numeric(str_match(h2[2,2],"\\) (.+)")[,2])
# ))
# })
h2_rand<-phe_info[phe_info$B==50,c("Field_ID","h2")]
colnames(h2_rand)<-c("ID","h2")
h2<-data.frame(TRT=h2_prod$TRT,
h2_prod=h2_prod$h2,
h2_ysq=h2_ysq$h2,
h2_rand=as.numeric(h2_rand$h2[match(h2_prod$TRT,h2_rand$ID)]))
h2$h2_prod<-as.numeric(h2$h2_prod)
h2$h2_ysq<-as.numeric(h2$h2_ysq)
h2$h2_rand<-as.numeric(h2$h2_rand)
# 有一个性状3872有点问题
# 画图
h2<-h2[complete.cases(h2),]
h2$Name<-phe_size$Trait[match(h2$TRT,phe_size$Code)]
h2$Name[!h2$TRT%in%c(50,2976,21001)]<-"Other"
h2$Name[h2$TRT%in%c(50)]<-"Height"
h2$Name[h2$TRT%in%c(2976)]<-"AgeDiabetes"
h2$Name[h2$TRT%in%c(21001)]<-"BMI"
p1<-ggplot(h2, aes(x=h2_rand, y=h2_ysq, color=Name)) +
geom_point(size=2)+
geom_abline(slope=1, intercept=0)+
theme_minimal()+xlab("Unrelated heritability")+
ylab("Sibs heritability (square)")+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Warning: Could not calculate the predicate for layer 2; ignored
## label_key: Name
p2<-ggplot(h2, aes(x=h2_rand, y=h2_prod, color=Name)) +
geom_point(size=2)+
geom_abline(slope=1, intercept=0)+xlab("Unrelated heritability")+
ylab("Sibs heritability (production)")+
theme_minimal()+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Could not calculate the predicate for layer 2; ignored
## label_key: Name
Corss-Production VS Square
knitr::opts_chunk$set(dpi=300)
h2<-read.table("/disk224/zz/HE_GE/he_herit/h2.txt",h=F)
colnames(h2)<-c("TRT","h2_prod","h2_ysq","h2_rand")
h2$Name<-phe_size$Trait[match(h2$TRT,phe_size$Code)]
h2$Name[!h2$TRT%in%c(50,2976,21001)]<-"Other"
h2$Name[h2$TRT%in%c(50)]<-"Height"
h2$Name[h2$TRT%in%c(2976)]<-"AgeDiabetes"
h2$Name[h2$TRT%in%c(21001)]<-"BMI"
# 画图
ggplot(h2, aes(x=h2_prod, y=h2_ysq,color=Name)) +
geom_point(size=2)+
geom_abline(slope=1, intercept=0)+
theme_minimal()+xlab("Cross-Production")+
ylab("Square")+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Warning: Could not calculate the predicate for layer 2; ignored
## label_key: Name
把家系分为兄弟家庭、姐妹家庭、混合家庭
一共从家系中分出了3581个兄弟家庭、6441个姐妹家庭、8259个混合家庭。
先看兄弟的情况:
#兄弟
knitr::opts_chunk$set(dpi=300)
h2<-read.table("/disk224/zz/HE_GE/he_herit_sexGroup/h2_male.txt",h=T)
h2$Name<-phe_size$Trait[match(h2$TRT,phe_size$Code)]
h2$Name[!h2$TRT%in%c(50,2976,21001)]<-"Other"
h2$Name[h2$TRT%in%c(50)]<-"Height"
h2$Name[h2$TRT%in%c(2976)]<-"AgeDiabetes"
h2$Name[h2$TRT%in%c(21001)]<-"BMI"
h2_rand<-phe_info[phe_info$B==50,c("Field_ID","h2")]
colnames(h2_rand)<-c("ID","h2")
h2$h2_rand<-as.numeric(h2_rand$h2[match(h2$TRT,h2_rand$ID)])
p1<-ggplot(h2, aes(x=h2_rand, y=h2_ysq, color=Name)) +
geom_point(size=2)+
geom_abline(slope=1, intercept=0)+
theme_minimal()+xlab("Unrelated heritability")+
ylab("Bros heritability (square)")+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Warning: Could not calculate the predicate for layer 2; ignored
## label_key: Name
p2<-ggplot(h2, aes(x=h2_rand, y=h2_prod, color=Name)) +
geom_point(size=2)+
geom_abline(slope=1, intercept=0)+xlab("Unrelated heritability")+
ylab("Bros heritability (production)")+
theme_minimal()+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Could not calculate the predicate for layer 2; ignored
## label_key: Name
## Warning: Removed 3 rows containing missing values (`geom_point()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).
再看姐妹的情况
#姐妹
knitr::opts_chunk$set(dpi=300)
h2<-read.table("/disk224/zz/HE_GE/he_herit_sexGroup/h2_female.txt",h=T)
h2$Name<-phe_size$Trait[match(h2$TRT,phe_size$Code)]
h2$Name[!h2$TRT%in%c(50,2976,21001)]<-"Other"
h2$Name[h2$TRT%in%c(50)]<-"Height"
h2$Name[h2$TRT%in%c(2976)]<-"AgeDiabetes"
h2$Name[h2$TRT%in%c(21001)]<-"BMI"
h2_rand<-phe_info[phe_info$B==50,c("Field_ID","h2")]
colnames(h2_rand)<-c("ID","h2")
h2$h2_rand<-as.numeric(h2_rand$h2[match(h2$TRT,h2_rand$ID)])
p1<-ggplot(h2, aes(x=h2_rand, y=h2_ysq, color=Name)) +
geom_point(size=2)+
geom_abline(slope=1, intercept=0)+
theme_minimal()+xlab("Unrelated heritability")+
ylab("Sisters heritability (square)")+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Warning: Could not calculate the predicate for layer 2; ignored
## label_key: Name
p2<-ggplot(h2, aes(x=h2_rand, y=h2_prod, color=Name)) +
geom_point(size=2)+
geom_abline(slope=1, intercept=0)+xlab("Unrelated heritability")+
ylab("Sisters heritability (production)")+
theme_minimal()+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Could not calculate the predicate for layer 2; ignored
## label_key: Name
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
再看混合的情况
#混合家系
knitr::opts_chunk$set(dpi=300)
h2<-read.table("/disk224/zz/HE_GE/he_herit_sexGroup/h2_mix.txt",h=T)
h2$Name<-phe_size$Trait[match(h2$TRT,phe_size$Code)]
h2$Name[!h2$TRT%in%c(50,2976,21001)]<-"Other"
h2$Name[h2$TRT%in%c(50)]<-"Height"
h2$Name[h2$TRT%in%c(2976)]<-"AgeDiabetes"
h2$Name[h2$TRT%in%c(21001)]<-"BMI"
h2_rand<-phe_info[phe_info$B==50,c("Field_ID","h2")]
colnames(h2_rand)<-c("ID","h2")
h2$h2_rand<-as.numeric(h2_rand$h2[match(h2$TRT,h2_rand$ID)])
p1<-ggplot(h2, aes(x=h2_rand, y=h2_ysq, color=Name)) +
geom_point(size=2)+
geom_abline(slope=1, intercept=0)+
theme_minimal()+xlab("Unrelated heritability")+
ylab("Mixed heritability (square)")+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Warning: Could not calculate the predicate for layer 2; ignored
## label_key: Name
p2<-ggplot(h2, aes(x=h2_rand, y=h2_prod, color=Name)) +
geom_point(size=2)+
geom_abline(slope=1, intercept=0)+xlab("Unrelated heritability")+
ylab("Mixed heritability (production)")+
theme_minimal()+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Could not calculate the predicate for layer 2; ignored
## label_key: Name
## Warning: Removed 4 rows containing missing values (`geom_point()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).
最后看单纯兄弟+姐妹的情况
#兄弟+姐妹
knitr::opts_chunk$set(dpi=300)
h2<-read.table("/disk224/zz/HE_GE/he_herit_sexGroup/h2_add.txt",h=T)
h2$Name<-phe_size$Trait[match(h2$TRT,phe_size$Code)]
h2$Name[!h2$TRT%in%c(50,2976,21001)]<-"Other"
h2$Name[h2$TRT%in%c(50)]<-"Height"
h2$Name[h2$TRT%in%c(2976)]<-"AgeDiabetes"
h2$Name[h2$TRT%in%c(21001)]<-"BMI"
h2_rand<-phe_info[phe_info$B==50,c("Field_ID","h2")]
colnames(h2_rand)<-c("ID","h2")
h2$h2_rand<-as.numeric(h2_rand$h2[match(h2$TRT,h2_rand$ID)])
p1<-ggplot(h2, aes(x=h2_rand, y=h2_ysq, color=Name)) +
geom_point(size=2)+
geom_abline(slope=1, intercept=0)+
theme_minimal()+xlab("Unrelated heritability")+
ylab("Brother+Sister heritability (square)")+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Warning: Could not calculate the predicate for layer 2; ignored
## label_key: Name
p2<-ggplot(h2, aes(x=h2_rand, y=h2_prod, color=Name)) +
geom_point(size=2)+
geom_abline(slope=1, intercept=0)+xlab("Unrelated heritability")+
ylab("Brother+Sister heritability (production)")+
theme_minimal()+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Could not calculate the predicate for layer 2; ignored
## label_key: Name
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
从每个家系中提取出一个个体,用RHE的方法计算81个传统性状的遗传力
从每个家系中随机选择一个个体,利用RHE计算遗传力,参数与前面保持一致,继续与利用IBS的Sib遗传力进行对比
knitr::opts_chunk$set(dpi=300)
h2_singleSib_rand_dir<-"/disk224/zz/HE_GE/he_herit_rand_singleSib"
# h2_rand_singleSib<-map_dfr(codes,~{
# h2<-read.table(str_glue("{h2_singleSib_rand_dir}/{.x}/rand.heReg"),skip = 1,h=F,sep="\t",fill=T)
# return(data.frame(ID=.x,
# h2=as.numeric(str_match(h2[2,2],"\\) (.+)")[,2])
# ))
# })
h2_rand_singleSib<-map_dfr(codes,~{
h2<-read.table(str_glue("{h2_singleSib_rand_dir}/{.x}/gcta.he.HEreg"),skip = 6,h=T,fill=T)
return(data.frame(ID=.x,
h2=h2[2,2]))
})
h2<-data.frame(TRT=h2_prod$TRT,
h2_prod=h2_prod$h2,
h2_ysq=h2_ysq$h2,
h2_rand=as.numeric(h2_rand_singleSib$h2[match(h2_prod$TRT,h2_rand_singleSib$ID)]))
h2$h2_prod<-as.numeric(h2$h2_prod)
h2$h2_ysq<-as.numeric(h2$h2_ysq)
h2$h2_rand<-as.numeric(h2$h2_rand)
h2$Name<-phe_size$Trait[match(h2$TRT,phe_size$Code)]
h2$Name[!h2$TRT%in%c(50,2976,21001)]<-"Other"
h2$Name[h2$TRT%in%c(50)]<-"Height"
h2$Name[h2$TRT%in%c(2976)]<-"AgeDiabetes"
h2$Name[h2$TRT%in%c(21001)]<-"BMI"
# 画图
h2<-h2[complete.cases(h2),]
p1<-ggplot(h2, aes(x=h2_rand, y=h2_ysq,color=Name)) +
geom_point()+
geom_abline(slope=1, intercept=0)+
theme_minimal()+xlab("SingleSib heritability")+
ylab("Sibs heritability (square)")+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Warning: Could not calculate the predicate for layer 2; ignored
## label_key: Name
p2<-ggplot(h2, aes(x=h2_rand, y=h2_prod,color=Name)) +
geom_point()+
geom_abline(slope=1, intercept=0)+xlab("SingleSib heritability")+
ylab("Sibs heritability (production)")+
theme_minimal()+
gghighlight(Name!="Other")## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## Could not calculate the predicate for layer 2; ignored
## label_key: Name
基于sib的IBS,利用HE方法计算251个代谢性状的遗传力
代码在/disk195/zz/HE_GE/03_heritability_metab.R,模式与上面常规性状的处理较为类似,重点看作图部分
knitr::opts_chunk$set(dpi=300)
h2<-read.table("/disk224/zz/HE_GE/he_herit_metab/h2.txt",h=F)
colnames(h2)<-c("TRT","h2_prod","h2_ysq","h2_rand")
# 画图
p1<-ggplot(h2, aes(x=h2_rand, y=h2_ysq)) +
geom_point()+
geom_abline(slope=1, intercept=0)+
theme_minimal()+xlab("Unrelated heritability")+
ylab("Sibs heritability (square)")
p2<-ggplot(h2, aes(x=h2_rand, y=h2_prod)) +
geom_point()+
geom_abline(slope=1, intercept=0)+xlab("Unrelated heritability")+
ylab("Sibs heritability (production)")+
theme_minimal()
ggarrange(p1,p2,nrow=1)Corss-Production VS Square
h2<-read.table("/disk224/zz/HE_GE/he_herit_metab/h2.txt",h=F)
colnames(h2)<-c("TRT","h2_prod","h2_ysq","h2_rand")
# 画图
ggplot(h2, aes(x=h2_prod, y=h2_ysq)) +
geom_point()+
geom_abline(slope=1, intercept=0)+
theme_minimal()+xlab("Cross-Production")+
ylab("Square")从每个家系中提取出一个个体,用RHE的方法计算251个代谢性状的遗传力
代码与上面类似,重点看作图部分
knitr::opts_chunk$set(dpi=300)
h2<-read.table("/disk224/zz/HE_GE/he_herit_metab/h2_singleSib.txt",h=F)
colnames(h2)<-c("TRT","h2_prod","h2_ysq","h2_rand")
p1<-ggplot(h2, aes(x=h2_rand, y=h2_ysq)) +
geom_point()+
geom_abline(slope=1, intercept=0)+
theme_minimal()+xlab("SingleSib heritability")+
ylab("Sibs heritability (square)")
p2<-ggplot(h2, aes(x=h2_rand, y=h2_prod)) +
geom_point()+
geom_abline(slope=1, intercept=0)+xlab("SingleSib heritability")+
ylab("Sibs heritability (production)")+
theme_minimal()
ggarrange(p1,p2,nrow=1)