# Select Gene-SNP pair with minimum P value
rm(list = ls())
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 366301 19.6 592000 31.7 460000 24.6
## Vcells 578185 4.5 1308461 10.0 786181 6.0
setwd("/Volumes/Transcend/Thesis_project/3.merge.liver.lung.cis.eqtl")
# load mouse lung cis eqtl result
lung.mouse.eQTL<-read.table(file="2015-12-04 mouselung.cis.1M.eqtls.txt", header=T)
# load mouse liver cis eqtl result
liver.mouse.eQTL<-read.table(file="2015-12-07 mouseliver.cis.1M.eqtls.txt", header=T)
mouse4302ensembl_id<-read.table(file="2015-12-04 mouse4302ensembl_id.txt", header=T)
mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt", header=T)
# Add ensemble id annoatation to the data
lung.mouse.eQTL<-merge(lung.mouse.eQTL, mouse4302ensembl_id, by.x = "gene", by.y="probe_id")
liver.mouse.eQTL<-merge(liver.mouse.eQTL, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
head(lung.mouse.eQTL)
## gene snps statistic pvalue FDR beta
## 1 1415670_at rs13475374 1.0503967 0.2994035 0.5712395 0.06286667
## 2 1415670_at rs13478876 0.9133601 0.3661462 0.6356597 0.05583410
## 3 1415670_at rs3713705 1.1363521 0.2621028 0.5310916 0.06786667
## 4 1415670_at rs13478880 1.1363521 0.2621028 0.5310916 0.06786667
## 5 1415670_at gnf06.086.089 0.9133601 0.3661462 0.6356597 0.05583410
## 6 1415672_at rs13479651 -1.3052534 0.1987480 0.4510718 -0.03993582
## beta_se ensembl_id
## 1 0.05985040 ENSMUSG00000030058
## 2 0.06113044 ENSMUSG00000030058
## 3 0.05972327 ENSMUSG00000030058
## 4 0.05972327 ENSMUSG00000030058
## 5 0.06113044 ENSMUSG00000030058
## 6 0.03059622 ENSMUSG00000015341
head(liver.mouse.eQTL)
## gene snps statistic pvalue FDR beta
## 1 1415670_at rs13478876 0.3577236 0.7232327 0.9060716 0.022464286
## 2 1415670_at rs13475374 0.1359233 0.8928544 0.9654697 0.008533333
## 3 1415670_at gnf06.086.089 0.3577236 0.7232327 0.9060716 0.022464286
## 4 1415670_at rs13478880 0.3577236 0.7232327 0.9060716 0.022464286
## 5 1415670_at rs3713705 0.3577236 0.7232327 0.9060716 0.022464286
## 6 1415671_at rs3675894 1.0520305 0.3017805 0.6480655 0.038909091
## beta_se ensembl_id
## 1 0.06279788 ENSMUSG00000030058
## 2 0.06278048 ENSMUSG00000030058
## 3 0.06279788 ENSMUSG00000030058
## 4 0.06279788 ENSMUSG00000030058
## 5 0.06279788 ENSMUSG00000030058
## 6 0.03698476 ENSMUSG00000013160
library(data.table)
# Select Gene-SNP pair with minimum P value
lung.mouse.eQTL.min <- data.table(lung.mouse.eQTL, key=c('ensembl_id', "pvalue"))
lung.mouse.eQTL.min<-lung.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
lung.mouse.eQTL.min<-as.data.frame(lung.mouse.eQTL.min)
liver.mouse.eQTL.min <- data.table(liver.mouse.eQTL, key=c('ensembl_id', "pvalue"))
liver.mouse.eQTL.min<-liver.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
liver.mouse.eQTL.min<-as.data.frame(liver.mouse.eQTL.min)
library(plyr)
lung.mouse.eQTL.min<-rename(lung.mouse.eQTL.min, c("pvalue"="lung_pvalue", "beta"="lung.beta", "beta_se"="lung.beta_se"))
liver.mouse.eQTL.min<-rename(liver.mouse.eQTL.min, c("pvalue"="liver_pvalue", "beta"="liver.beta", "beta_se"="liver.beta_se"))
head(lung.mouse.eQTL.min)
## gene snps statistic lung_pvalue FDR
## 1 1428645_at rs3702359 -1.4324741 0.1592383 0.3935788
## 2 1421514_a_at gnfX.141.820 -0.1728806 0.8635565 0.9457289
## 3 1416677_at rs3720981 -0.1308388 0.8965131 0.9603432
## 4 1451677_at CEL-11_120628029 0.4574870 0.6496243 0.8393760
## 5 1425955_at rs13478643 -1.0185410 0.3141176 0.5866782
## 6 1426241_a_at rs4224744 0.9192838 0.3630755 0.6327046
## lung.beta lung.beta_se ensembl_id
## 1 -0.059800000 0.04174596 ENSMUSG00000000001
## 2 -0.005911765 0.03419566 ENSMUSG00000000037
## 3 -0.023660714 0.18083869 ENSMUSG00000000049
## 4 0.016446429 0.03594950 ENSMUSG00000000056
## 5 -0.078136752 0.07671439 ENSMUSG00000000058
## 6 0.052847059 0.05748721 ENSMUSG00000000085
head(liver.mouse.eQTL.min)
## gene snps statistic liver_pvalue FDR
## 1 1428645_at rs13477321 -0.6238563 5.377718e-01 0.8222409897
## 2 1449320_at CEL-X_71438949 1.3340954 1.929225e-01 0.5174275252
## 3 1421514_a_at CEL-X_154048891 -0.2835863 7.788138e-01 0.9261630010
## 4 1416677_at rs3670642 5.4086629 9.084753e-06 0.0001847233
## 5 1451678_at CEL-11_121219118 1.2354816 2.269175e-01 0.5622059072
## 6 1425955_at NAT_6_18.199327 -1.9727688 5.846922e-02 0.2581607284
## liver.beta liver.beta_se ensembl_id
## 1 -0.031972222 0.05124934 ENSMUSG00000000001
## 2 0.033761364 0.02530656 ENSMUSG00000000003
## 3 -0.007730159 0.02725858 ENSMUSG00000000037
## 4 0.084466667 0.01561692 ENSMUSG00000000049
## 5 0.127208145 0.10296240 ENSMUSG00000000056
## 6 -0.051222222 0.02596463 ENSMUSG00000000058
tail(liver.mouse.eQTL.min)
## gene snps statistic liver_pvalue FDR
## 10962 1434694_at rs13459062 -2.3077250 0.02861902 0.1563120
## 10963 1437645_at rs4231033 2.0047987 0.05474021 0.2465394
## 10964 1449939_s_at rs13481636 0.6744392 0.50556296 0.8021978
## 10965 1422547_at rs4165065 -1.6676237 0.10653819 0.3695183
## 10966 1451476_at rs4165065 0.3972613 0.69418700 0.8956391
## 10967 1448115_at rs4165069 1.5491597 0.13257346 0.4209997
## liver.beta liver.beta_se ensembl_id
## 10962 -0.12408929 0.05377126 ENSMUSG00000099041
## 10963 0.06022624 0.03004104 ENSMUSG00000099083
## 10964 0.01886111 0.02796562 ENSMUSG00000099116
## 10965 -0.07874163 0.04721786 ENSMUSG00000099164
## 10966 0.01569378 0.03950493 ENSMUSG00000099262
## 10967 0.08752778 0.05650017 ENSMUSG00000099305
dim(lung.mouse.eQTL.min)
## [1] 11015 8
dim(liver.mouse.eQTL.min)
## [1] 10967 8
# lung, liver eqtl with ensemble_id
merged.mouse.eQTL.min<-merge(lung.mouse.eQTL.min, liver.mouse.eQTL.min, by.x = "ensembl_id", by.y="ensembl_id")
head(merged.mouse.eQTL.min)
## ensembl_id gene.x snps.x statistic.x lung_pvalue
## 1 ENSMUSG00000000001 1428645_at rs3702359 -1.4324741 0.1592383
## 2 ENSMUSG00000000037 1421514_a_at gnfX.141.820 -0.1728806 0.8635565
## 3 ENSMUSG00000000049 1416677_at rs3720981 -0.1308388 0.8965131
## 4 ENSMUSG00000000056 1451677_at CEL-11_120628029 0.4574870 0.6496243
## 5 ENSMUSG00000000058 1425955_at rs13478643 -1.0185410 0.3141176
## 6 ENSMUSG00000000085 1426241_a_at rs4224744 0.9192838 0.3630755
## FDR.x lung.beta lung.beta_se gene.y snps.y
## 1 0.3935788 -0.059800000 0.04174596 1428645_at rs13477321
## 2 0.9457289 -0.005911765 0.03419566 1421514_a_at CEL-X_154048891
## 3 0.9603432 -0.023660714 0.18083869 1416677_at rs3670642
## 4 0.8393760 0.016446429 0.03594950 1451678_at CEL-11_121219118
## 5 0.5866782 -0.078136752 0.07671439 1425955_at NAT_6_18.199327
## 6 0.6327046 0.052847059 0.05748721 1426241_a_at CEL-4_120039566
## statistic.y liver_pvalue FDR.y liver.beta liver.beta_se
## 1 -0.6238563 5.377718e-01 0.8222409897 -0.031972222 0.05124934
## 2 -0.2835863 7.788138e-01 0.9261630010 -0.007730159 0.02725858
## 3 5.4086629 9.084753e-06 0.0001847233 0.084466667 0.01561692
## 4 1.2354816 2.269175e-01 0.5622059072 0.127208145 0.10296240
## 5 -1.9727688 5.846922e-02 0.2581607284 -0.051222222 0.02596463
## 6 -2.2103759 3.542074e-02 0.1820043924 -0.072100000 0.03261889
dim(merged.mouse.eQTL.min)
## [1] 10422 15
merged.mouse.eQTL.min<-data.frame(merged.mouse.eQTL.min)
merged.mouse.eQTL.min<-merged.mouse.eQTL.min[, c(1, 5, 7, 8, 12, 14, 15 )]
head(merged.mouse.eQTL.min)
## ensembl_id lung_pvalue lung.beta lung.beta_se liver_pvalue
## 1 ENSMUSG00000000001 0.1592383 -0.059800000 0.04174596 5.377718e-01
## 2 ENSMUSG00000000037 0.8635565 -0.005911765 0.03419566 7.788138e-01
## 3 ENSMUSG00000000049 0.8965131 -0.023660714 0.18083869 9.084753e-06
## 4 ENSMUSG00000000056 0.6496243 0.016446429 0.03594950 2.269175e-01
## 5 ENSMUSG00000000058 0.3141176 -0.078136752 0.07671439 5.846922e-02
## 6 ENSMUSG00000000085 0.3630755 0.052847059 0.05748721 3.542074e-02
## liver.beta liver.beta_se
## 1 -0.031972222 0.05124934
## 2 -0.007730159 0.02725858
## 3 0.084466667 0.01561692
## 4 0.127208145 0.10296240
## 5 -0.051222222 0.02596463
## 6 -0.072100000 0.03261889
# check the relationships between teh variables of interest
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
# plot lung_pvalue and liver_pvalue
ggplot(merged.mouse.eQTL.min, aes(x=-log10(lung_pvalue), y=-log10(liver_pvalue))) +geom_point(shape=1) +geom_smooth(method=lm)

summary(lm(-log10(lung_pvalue) ~ -log10(liver_pvalue), data = merged.mouse.eQTL.min))
##
## Call:
## lm(formula = -log10(lung_pvalue) ~ -log10(liver_pvalue), data = merged.mouse.eQTL.min)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.112 -1.736 -1.295 -0.183 44.515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.11221 0.03616 58.42 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.691 on 10421 degrees of freedom
ggplot(merged.mouse.eQTL.min, aes(x=lung_pvalue, y=liver_pvalue)) +geom_point(shape=1) +geom_smooth(method=lm)

summary(lm(lung_pvalue ~ liver_pvalue, data = merged.mouse.eQTL.min))
##
## Call:
## lm(formula = lung_pvalue ~ liver_pvalue, data = merged.mouse.eQTL.min)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4149 -0.1803 -0.0961 0.1566 0.8109
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.178070 0.003671 48.51 <2e-16 ***
## liver_pvalue 0.237474 0.008952 26.53 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2593 on 10420 degrees of freedom
## Multiple R-squared: 0.06326, Adjusted R-squared: 0.06317
## F-statistic: 703.7 on 1 and 10420 DF, p-value: < 2.2e-16
# check the relationship between liver and lung after sebsetting the sample:p_value < 0.01
merged.mouse.eQTL.min.1<- subset(merged.mouse.eQTL.min, lung_pvalue < 0.01)
merged.mouse.eQTL.min.1<- subset(merged.mouse.eQTL.min.1,liver_pvalue < 0.01)
head(merged.mouse.eQTL.min.1)
## ensembl_id lung_pvalue lung.beta lung.beta_se liver_pvalue
## 7 ENSMUSG00000000088 1.444641e-03 0.1899762 0.05579620 1.987922e-05
## 24 ENSMUSG00000000184 3.879409e-13 -0.4867727 0.04742066 1.668832e-03
## 49 ENSMUSG00000000326 4.714490e-12 0.7639259 0.08086460 1.795190e-04
## 96 ENSMUSG00000000826 7.947931e-23 -0.5532700 0.02862322 4.183130e-08
## 97 ENSMUSG00000000827 5.523964e-11 0.1906300 0.02199819 6.134551e-07
## 113 ENSMUSG00000000959 3.836814e-06 0.1476333 0.02788356 1.453444e-03
## liver.beta liver.beta_se
## 7 0.1924067 0.03756628
## 24 -0.1072000 0.03082153
## 49 0.5906667 0.13687857
## 96 -0.5487738 0.07374116
## 97 0.2724570 0.04251236
## 113 -0.1612584 0.04566665
ggplot(merged.mouse.eQTL.min.1, aes(x=-log10(lung_pvalue), y=-log10(liver_pvalue))) +geom_point(shape=1) +geom_smooth(method=lm)

write.table(merged.mouse.eQTL.min,file="2016-02-22 merged.mouse.eQTL.min.txt", sep="\t", quote=FALSE)
library(data.table)
merged.mouse.eQTL.min <- as.data.table(merged.mouse.eQTL.min)
# add lung_pvalue and liver_pvalue ranking to the table
merged.mouse.eQTL.min.1<-transform(merged.mouse.eQTL.min,
lung_pvalue.rank = ave(lung_pvalue,
FUN = function(x) rank(-x, ties.method = "first")))
merged.mouse.eQTL.min.ranked<-transform(merged.mouse.eQTL.min.1,
liver_pvalue.rank = ave(liver_pvalue,
FUN = function(x) rank(-x, ties.method = "first")))
head(merged.mouse.eQTL.min.ranked)
## ensembl_id lung_pvalue lung.beta lung.beta_se liver_pvalue
## 1: ENSMUSG00000000001 0.1592383 -0.059800000 0.04174596 5.377718e-01
## 2: ENSMUSG00000000037 0.8635565 -0.005911765 0.03419566 7.788138e-01
## 3: ENSMUSG00000000049 0.8965131 -0.023660714 0.18083869 9.084753e-06
## 4: ENSMUSG00000000056 0.6496243 0.016446429 0.03594950 2.269175e-01
## 5: ENSMUSG00000000058 0.3141176 -0.078136752 0.07671439 5.846922e-02
## 6: ENSMUSG00000000085 0.3630755 0.052847059 0.05748721 3.542074e-02
## liver.beta liver.beta_se lung_pvalue.rank liver_pvalue.rank
## 1: -0.031972222 0.05124934 5105 2359
## 2: -0.007730159 0.02725858 321 852
## 3: 0.084466667 0.01561692 233 9751
## 4: 0.127208145 0.10296240 1186 5056
## 5: -0.051222222 0.02596463 3468 7356
## 6: -0.072100000 0.03261889 3035 7822
ggplot(merged.mouse.eQTL.min.ranked, aes(x=lung_pvalue.rank, y=liver_pvalue.rank)) +geom_point(shape=1) +geom_smooth(method=lm)

merged.mouse.eQTL.min.ranked <- merged.mouse.eQTL.min.ranked[order(lung_pvalue.rank),]
head(merged.mouse.eQTL.min.ranked)
## ensembl_id lung_pvalue lung.beta lung.beta_se liver_pvalue
## 1: ENSMUSG00000033004 1.0000000 0.000000e+00 NA 0.88726668
## 2: ENSMUSG00000021133 0.9996627 -1.190476e-05 0.02799500 0.62203679
## 3: ENSMUSG00000031104 0.9994782 3.333333e-05 0.05067690 0.12434129
## 4: ENSMUSG00000020010 0.9994212 -5.000000e-05 0.06852670 0.04484664
## 5: ENSMUSG00000028651 0.9989731 1.250000e-04 0.09655988 0.65136315
## 6: ENSMUSG00000031636 0.9985484 1.897233e-04 0.10367905 0.67556269
## liver.beta liver.beta_se lung_pvalue.rank liver_pvalue.rank
## 1: 0.005901786 0.04125367 1 373
## 2: 0.046401786 0.09308593 2 1775
## 3: -0.030111111 0.01900514 3 6280
## 4: -0.153918552 0.07328845 4 7632
## 5: -0.017507937 0.03833022 5 1598
## 6: -0.010392857 0.02457232 6 1434
# Chisq test with different threshold of Pvalue
Pvalue<-c(0.4, 0.2, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001, 0.00000005)
chisq.results <- matrix(0, nrow=length(Pvalue), ncol=5)
colnames(chisq.results)<-c("Pvalue_threshold","Pvalue_chisq.test","actual_shared", "expected_shared", "Foldchange" )
# Populate the said matrix
for (i in 1:length(Pvalue)) {
chisq.results[i,1] <- Pvalue[i]
chisq.results[i,2] <- chisq.test(table(merged.mouse.eQTL.min.ranked$lung_pvalue<Pvalue[i], merged.mouse.eQTL.min.ranked$liver_pvalue<Pvalue[i]),correct=T)$p.value
chisq.results[i,3] <- table(merged.mouse.eQTL.min.ranked$lung_pvalue<Pvalue[i], merged.mouse.eQTL.min.ranked$liver_pvalue<Pvalue[i])[2,2]
chisq.results[i,4] <- chisq.test(table(merged.mouse.eQTL.min.ranked$lung_pvalue<Pvalue[i], merged.mouse.eQTL.min.ranked$liver_pvalue<Pvalue[i]),correct=T)$expected[2,2]
chisq.results[i,5] <- chisq.results[i,3]/chisq.results[i,4]
}
print(chisq.results)
## Pvalue_threshold Pvalue_chisq.test actual_shared expected_shared
## [1,] 4e-01 8.020799e-63 5471 5115.48647
## [2,] 2e-01 2.130873e-103 3382 2834.52044
## [3,] 1e-01 5.922635e-162 2306 1645.29380
## [4,] 5e-02 3.123697e-229 1727 1020.35233
## [5,] 1e-02 1.155204e-311 1065 438.80445
## [6,] 1e-03 0.000000e+00 727 213.59969
## [7,] 1e-04 0.000000e+00 527 121.87795
## [8,] 1e-05 0.000000e+00 386 73.83708
## [9,] 1e-06 1.893351e-303 288 47.94387
## [10,] 1e-07 1.702579e-267 216 31.31203
## [11,] 5e-08 2.131666e-250 193 26.84226
## Foldchange
## [1,] 1.069498
## [2,] 1.193147
## [3,] 1.401573
## [4,] 1.692553
## [5,] 2.427049
## [6,] 3.403563
## [7,] 4.323998
## [8,] 5.227726
## [9,] 6.007025
## [10,] 6.898307
## [11,] 7.190155
chisq.results.df<-as.data.frame(chisq.results)
ggplot(chisq.results.df, aes(x=-log(Pvalue_threshold), y=Foldchange)) +geom_point()+geom_smooth(method=lm)

# retrieve gene expression and SNP for eqtl with minimum P value
BXD.geno.SNP.eqtl.for.liver<-read.table(file="2015-12-07 BXD.geno.SNP.eqtl.for.liver.txt", header=T, sep="")
head(BXD.geno.SNP.eqtl.for.liver)
## Locus BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19 BXD2
## 1 rs6269442 0 0 1 0 0 1 1 0 1 0
## 2 rs6365999 0 0 1 0 0 1 1 0 1 0
## 3 rs6376963 0 0 1 0 0 1 1 0 1 0
## 4 rs3677817 0 0 1 0 0 1 1 0 1 0
## 5 rs8236463 0 1 1 0 0 1 1 0 1 0
## 6 rs6333200 0 1 1 0 0 1 1 0 1 0
## BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33 BXD34 BXD36
## 1 1 1 0 0 0 1 1 0 0 0 0 0
## 2 1 1 0 0 0 1 1 0 0 0 0 0
## 3 1 1 0 0 0 1 1 0 0 0 0 0
## 4 1 1 0 0 0 1 1 0 0 0 0 0
## 5 1 1 0 0 0 1 1 0 0 0 0 0
## 6 1 1 0 0 0 1 1 0 0 0 0 0
## BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 1 0 1 0 0 1 1 1 0
## 2 0 1 0 0 1 1 1 0
## 3 0 1 0 0 1 1 1 0
## 4 0 1 0 0 1 1 1 0
## 5 0 1 0 0 1 1 1 0
## 6 0 1 0 0 1 1 1 0
mouse.liver.expression <- read.table(file="2015-12-07 mouse.liver.expression.eqtl.txt", header=T,sep="");
head(mouse.liver.expression)
## ProbeSet BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18
## 1 1415670_at 10.090 10.200 10.300 10.208 9.830 10.238 9.914 10.348
## 2 1415671_at 10.932 11.088 11.007 11.020 10.955 11.120 11.012 11.123
## 3 1415672_at 11.432 11.417 11.442 11.555 11.561 11.318 11.461 11.561
## 4 1415673_at 7.535 7.382 7.566 7.162 7.403 7.342 7.213 7.581
## 5 1415674_a_at 9.757 9.972 9.269 9.873 9.354 9.918 9.459 9.655
## 6 1415675_at 9.029 9.009 9.245 9.282 9.415 9.098 9.060 8.937
## BXD19 BXD2 BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31
## 1 9.939 9.871 10.077 10.159 9.746 9.890 10.286 10.177 9.959 9.882
## 2 10.922 10.802 10.988 10.969 11.104 10.979 10.905 11.013 11.071 10.996
## 3 11.575 11.426 11.367 11.328 11.499 11.531 11.666 11.500 11.502 11.446
## 4 7.551 7.368 7.251 7.373 7.408 7.473 7.238 7.424 7.475 7.491
## 5 9.544 9.557 9.460 9.322 9.795 9.758 9.628 9.263 9.671 9.645
## 6 8.995 9.104 9.123 9.086 9.087 9.040 8.844 9.173 8.951 9.131
## BXD32 BXD33 BXD34 BXD36 BXD38 BXD39 BXD40 BXD42 BXD5 BXD6
## 1 10.059 10.102 10.174 10.022 10.364 9.745 10.074 9.961 10.160 10.069
## 2 10.854 11.084 11.059 10.923 11.053 11.030 11.067 10.891 10.878 11.043
## 3 11.548 11.511 11.490 11.545 11.496 11.516 11.543 11.457 11.374 11.504
## 4 7.419 7.386 7.496 7.369 7.265 7.130 7.149 7.383 7.439 7.233
## 5 9.368 9.741 9.721 9.684 9.387 9.649 9.693 9.793 9.840 9.519
## 6 9.108 9.122 9.153 9.008 9.138 9.126 9.233 9.060 9.134 9.081
## BXD8 BXD9
## 1 9.956 10.142
## 2 11.206 10.982
## 3 11.531 11.700
## 4 7.397 7.390
## 5 9.340 9.160
## 6 8.926 9.274
library(tidyr)
liver.mouse.eQTL.min <- liver.mouse.eQTL.min[liver.mouse.eQTL.min$ensembl_id %in% (merged.mouse.eQTL.min$ensembl_id), ]
dim(liver.mouse.eQTL.min)
## [1] 10422 8
expression_long <- gather(mouse.liver.expression, strain, expression, BXD1:BXD9)
expression_long <- expression_long[order(expression_long$ProbeSet, expression_long$strain), ]
rownames(expression_long) <- NULL
head(expression_long)
## ProbeSet strain expression
## 1 1415670_at BXD1 10.090
## 2 1415670_at BXD11 10.200
## 3 1415670_at BXD12 10.300
## 4 1415670_at BXD13 10.208
## 5 1415670_at BXD14 9.830
## 6 1415670_at BXD15 10.238
snp_long <- gather(BXD.geno.SNP.eqtl.for.liver, strain, snp, BXD1:BXD9)
snp_long <- snp_long[order(snp_long$Locus, snp_long$strain), ]
rownames(snp_long) <- NULL
head(snp_long)
## Locus strain snp
## 1 110Mit194 BXD1 0
## 2 110Mit194 BXD11 0
## 3 110Mit194 BXD12 0
## 4 110Mit194 BXD13 0
## 5 110Mit194 BXD14 1
## 6 110Mit194 BXD15 0
# Caculate the resdidual variance in eqtl analysis
liver.mv <- matrix(0, nrow=length(liver.mouse.eQTL.min$gene), ncol=5)
colnames(liver.mv)<-c("Probeset"," ensembl_id","SNP","beta", "model_variance" )
for (i in 1:length(liver.mouse.eQTL.min$gene)) {
liver.mv[i,1] <- as.character(liver.mouse.eQTL.min$gene[i])
liver.mv[i,2] <- as.character(liver.mouse.eQTL.min$ensembl_id[i])
liver.mv[i,3] <- as.character(liver.mouse.eQTL.min$snps[i])
liver.mv[i,4] <- summary(lm(expression ~ snp,
data=assign(paste("merged",i,sep=""), merge(
assign(paste("expression",i,sep=""), expression_long[(expression_long$ProbeSet %in% liver.mouse.eQTL.min$gene[i]), ]),
assign(paste("SNP",i,sep=""), snp_long[(snp_long$Locus %in% liver.mouse.eQTL.min$snps[i]), ]), by= "strain"))
))$coefficients[2,1]
liver.mv[i,5] <- summary(lm(expression ~ snp,
data=assign(paste("merged",i,sep=""), merge(
assign(paste("expression",i,sep=""), expression_long[(expression_long$ProbeSet %in% liver.mouse.eQTL.min$gene[i]), ]),
assign(paste("SNP",i,sep=""), snp_long[(snp_long$Locus %in% liver.mouse.eQTL.min$snps[i]), ]), by= "strain"))
))$sigma**2
}
write.table(liver.mv,file="2016-02-22-liver.mv.txt", sep="\t", row.names=FALSE, quote=FALSE)
liver.mv<-read.table(file="2016-02-22-liver.mv.txt", header=T, sep="", stringsAsFactors=FALSE)
colnames(liver.mv)<-c("Probeset"," ensembl_id", "SNP","beta", "liver_residual_variance")
# head(liver.mv)
# tail(liver.mv)
# head(merged.mouse.eQTL.min)
# tail(merged.mouse.eQTL.min)
merged.mouse.eQTL.min.variance<-cbind(merged.mouse.eQTL.min, liver.mv$liver_residual_variance)
names(merged.mouse.eQTL.min.variance)[8]<-"liver_residual_variance"
merged.mouse.eQTL.min.variance<-as.data.frame(merged.mouse.eQTL.min.variance)
merged.mouse.eQTL.min.variance2<-merged.mouse.eQTL.min.variance[, c(1,6:8,2:5)]
head(merged.mouse.eQTL.min.variance2)
## ensembl_id liver.beta liver.beta_se liver_residual_variance
## 1 ENSMUSG00000000001 -0.031972222 0.05124934 0.018910763
## 2 ENSMUSG00000000037 -0.007730159 0.02725858 0.004681090
## 3 ENSMUSG00000000049 0.084466667 0.01561692 0.001829162
## 4 ENSMUSG00000000056 0.127208145 0.10296240 0.078095912
## 5 ENSMUSG00000000058 -0.051222222 0.02596463 0.004853968
## 6 ENSMUSG00000000085 -0.072100000 0.03261889 0.007093279
## lung_pvalue lung.beta lung.beta_se liver_pvalue
## 1 0.1592383 -0.059800000 0.04174596 5.377718e-01
## 2 0.8635565 -0.005911765 0.03419566 7.788138e-01
## 3 0.8965131 -0.023660714 0.18083869 9.084753e-06
## 4 0.6496243 0.016446429 0.03594950 2.269175e-01
## 5 0.3141176 -0.078136752 0.07671439 5.846922e-02
## 6 0.3630755 0.052847059 0.05748721 3.542074e-02
write.table(merged.mouse.eQTL.min.variance2,file="2016-02-22-merged.mouse.eQTL.min.variance.txt", sep="\t", row.names=FALSE, quote=FALSE)
mouse.liver.expression <- read.table(file="2015-12-07 mouse.liver.expression.eqtl.txt", header=T)
head(mouse.liver.expression)
## ProbeSet BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18
## 1 1415670_at 10.090 10.200 10.300 10.208 9.830 10.238 9.914 10.348
## 2 1415671_at 10.932 11.088 11.007 11.020 10.955 11.120 11.012 11.123
## 3 1415672_at 11.432 11.417 11.442 11.555 11.561 11.318 11.461 11.561
## 4 1415673_at 7.535 7.382 7.566 7.162 7.403 7.342 7.213 7.581
## 5 1415674_a_at 9.757 9.972 9.269 9.873 9.354 9.918 9.459 9.655
## 6 1415675_at 9.029 9.009 9.245 9.282 9.415 9.098 9.060 8.937
## BXD19 BXD2 BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31
## 1 9.939 9.871 10.077 10.159 9.746 9.890 10.286 10.177 9.959 9.882
## 2 10.922 10.802 10.988 10.969 11.104 10.979 10.905 11.013 11.071 10.996
## 3 11.575 11.426 11.367 11.328 11.499 11.531 11.666 11.500 11.502 11.446
## 4 7.551 7.368 7.251 7.373 7.408 7.473 7.238 7.424 7.475 7.491
## 5 9.544 9.557 9.460 9.322 9.795 9.758 9.628 9.263 9.671 9.645
## 6 8.995 9.104 9.123 9.086 9.087 9.040 8.844 9.173 8.951 9.131
## BXD32 BXD33 BXD34 BXD36 BXD38 BXD39 BXD40 BXD42 BXD5 BXD6
## 1 10.059 10.102 10.174 10.022 10.364 9.745 10.074 9.961 10.160 10.069
## 2 10.854 11.084 11.059 10.923 11.053 11.030 11.067 10.891 10.878 11.043
## 3 11.548 11.511 11.490 11.545 11.496 11.516 11.543 11.457 11.374 11.504
## 4 7.419 7.386 7.496 7.369 7.265 7.130 7.149 7.383 7.439 7.233
## 5 9.368 9.741 9.721 9.684 9.387 9.649 9.693 9.793 9.840 9.519
## 6 9.108 9.122 9.153 9.008 9.138 9.126 9.233 9.060 9.134 9.081
## BXD8 BXD9
## 1 9.956 10.142
## 2 11.206 10.982
## 3 11.531 11.700
## 4 7.397 7.390
## 5 9.340 9.160
## 6 8.926 9.274
mouse.liver.expression.min <- mouse.liver.expression[mouse.liver.expression$ProbeSet %in% liver.mouse.eQTL.min$gene,]
mouse.liver.expression.min <- merge(mouse.liver.expression.min, liver.mouse.eQTL.min[, c(1:2, 8)], by.x = "ProbeSet", by.y = "gene" )
mouse.liver.expression.min <- mouse.liver.expression.min[order(mouse.liver.expression.min$ensembl_id),]
mouse.liver.expression.min$ensembl_id <- NULL
mouse.liver.expression.min$snps <- NULL
head(mouse.liver.expression.min)
## ProbeSet BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18
## 6168 1428645_at 10.890 10.897 10.871 11.098 11.036 10.888 10.939 11.083
## 2852 1421514_a_at 6.437 6.281 6.259 6.405 6.352 6.406 6.361 6.309
## 514 1416677_at 14.754 14.599 14.656 14.724 14.700 14.626 14.700 14.695
## 9246 1451678_at 9.204 9.297 9.205 9.469 9.447 9.178 9.169 9.619
## 5079 1425955_at 6.568 6.601 6.496 6.694 6.577 6.619 6.492 6.693
## 5196 1426241_a_at 7.037 7.110 7.018 6.778 7.086 7.032 7.018 6.929
## BXD19 BXD2 BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31
## 6168 10.937 10.770 10.602 10.904 11.075 11.154 11.012 10.978 10.877 10.842
## 2852 6.390 6.433 6.329 6.433 6.295 6.385 6.316 6.366 6.419 6.360
## 514 14.724 14.745 14.693 14.764 14.757 14.695 14.553 14.615 14.665 14.619
## 9246 8.924 9.215 8.641 8.715 9.215 9.344 8.984 9.057 8.817 9.401
## 5079 6.714 6.724 6.790 6.655 6.539 6.500 6.561 6.604 6.715 6.706
## 5196 6.905 6.909 7.203 7.050 7.075 7.022 7.096 6.995 7.042 7.116
## BXD32 BXD33 BXD34 BXD36 BXD38 BXD39 BXD40 BXD42 BXD5 BXD6
## 6168 11.098 10.953 11.136 10.898 11.067 10.897 10.875 10.644 10.858 11.052
## 2852 6.277 6.230 6.382 6.304 6.237 6.295 6.438 6.471 6.362 6.371
## 514 14.775 14.626 14.634 14.680 14.640 14.620 14.651 14.779 14.696 14.741
## 9246 9.149 8.968 9.031 9.611 9.156 8.923 8.988 9.380 9.788 8.781
## 5079 6.671 6.613 6.679 6.599 6.582 6.561 6.651 6.594 6.627 6.625
## 5196 7.039 7.109 7.049 7.046 7.022 7.095 6.999 7.142 6.899 7.007
## BXD8 BXD9
## 6168 10.820 11.122
## 2852 6.475 6.391
## 514 14.774 14.715
## 9246 8.905 9.510
## 5079 6.600 6.678
## 5196 7.061 6.853
dim(mouse.liver.expression.min)
## [1] 10422 31
mouse.SNP <- read.table(file="2015-12-07 BXD.geno.SNP.eqtl.for.liver.txt", header=T)
head(mouse.SNP)
## Locus BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19 BXD2
## 1 rs6269442 0 0 1 0 0 1 1 0 1 0
## 2 rs6365999 0 0 1 0 0 1 1 0 1 0
## 3 rs6376963 0 0 1 0 0 1 1 0 1 0
## 4 rs3677817 0 0 1 0 0 1 1 0 1 0
## 5 rs8236463 0 1 1 0 0 1 1 0 1 0
## 6 rs6333200 0 1 1 0 0 1 1 0 1 0
## BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33 BXD34 BXD36
## 1 1 1 0 0 0 1 1 0 0 0 0 0
## 2 1 1 0 0 0 1 1 0 0 0 0 0
## 3 1 1 0 0 0 1 1 0 0 0 0 0
## 4 1 1 0 0 0 1 1 0 0 0 0 0
## 5 1 1 0 0 0 1 1 0 0 0 0 0
## 6 1 1 0 0 0 1 1 0 0 0 0 0
## BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 1 0 1 0 0 1 1 1 0
## 2 0 1 0 0 1 1 1 0
## 3 0 1 0 0 1 1 1 0
## 4 0 1 0 0 1 1 1 0
## 5 0 1 0 0 1 1 1 0
## 6 0 1 0 0 1 1 1 0
dim(mouse.SNP)
## [1] 3811 31
mouse.SNP.min <- mouse.SNP[mouse.SNP$Locus %in% liver.mouse.eQTL.min$snps, ]
mouse.SNP.min <- merge(liver.mouse.eQTL.min[, c(1:2,8)],mouse.SNP.min, by.x = "snps", by.y = "Locus" )
mouse.SNP.min <- mouse.SNP.min[order(mouse.SNP.min$ensembl_id),]
mouse.SNP.min$gene <- NULL
mouse.SNP.min$ensembl_id <- NULL
head(mouse.SNP.min)
## snps BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19
## 3096 rs13477321 0 1 1 0 0 1 0 0 0
## 920 CEL-X_154048891 0 0 0 1 0 1 0 0 1
## 6703 rs3670642 1 0 0 0 0 0 0 1 1
## 455 CEL-11_121219118 1 0 1 1 0 1 1 0 0
## 2102 NAT_6_18.199327 0 1 1 1 1 0 1 1 0
## 655 CEL-4_120039566 1 0 1 1 1 1 1 1 1
## BXD2 BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33
## 3096 1 0 1 0 0 0 1 0 0 0 1
## 920 0 0 0 1 1 1 0 0 0 1 0
## 6703 1 1 1 1 1 0 0 0 0 1 0
## 455 1 0 1 1 1 1 0 0 0 1 0
## 2102 0 0 0 1 1 1 1 0 0 1 1
## 655 0 0 1 0 0 1 1 1 0 1 1
## BXD34 BXD36 BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 3096 1 1 1 1 0 0 0 0 1 0
## 920 0 0 1 0 0 1 0 0 0 0
## 6703 1 0 0 0 0 1 1 1 1 1
## 455 0 1 0 0 0 1 1 1 1 1
## 2102 1 0 0 0 1 1 0 1 1 1
## 655 0 1 1 0 1 0 1 0 1 1
dim(mouse.SNP.min)
## [1] 10422 31
write.table(mouse.liver.expression.min,file="2016-04-21-mouse.liver.expression.min.txt", sep="\t", row.names=FALSE, quote=FALSE)
write.table(mouse.SNP.min,file="2016-04-21-mouse.SNP.min.txt", sep="\t", row.names=FALSE, quote=FALSE)