# Select Gene-SNP pair with minimum P value
rm(list = ls())
gc()
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 366301 19.6     592000 31.7   460000 24.6
## Vcells 578185  4.5    1308461 10.0   786181  6.0
setwd("/Volumes/Transcend/Thesis_project/3.merge.liver.lung.cis.eqtl")
# load mouse lung cis eqtl result
lung.mouse.eQTL<-read.table(file="2015-12-04 mouselung.cis.1M.eqtls.txt",  header=T)
# load mouse liver cis eqtl result
liver.mouse.eQTL<-read.table(file="2015-12-07 mouseliver.cis.1M.eqtls.txt",  header=T)
mouse4302ensembl_id<-read.table(file="2015-12-04 mouse4302ensembl_id.txt",  header=T)
mouse430aensembl_id<-read.table(file="2015-12-07 mouse430aensembl_id.txt",  header=T)
# Add ensemble id annoatation to the data 
lung.mouse.eQTL<-merge(lung.mouse.eQTL, mouse4302ensembl_id, by.x = "gene", by.y="probe_id")
liver.mouse.eQTL<-merge(liver.mouse.eQTL, mouse430aensembl_id, by.x = "gene", by.y="probe_id")
head(lung.mouse.eQTL)
##         gene          snps  statistic    pvalue       FDR        beta
## 1 1415670_at    rs13475374  1.0503967 0.2994035 0.5712395  0.06286667
## 2 1415670_at    rs13478876  0.9133601 0.3661462 0.6356597  0.05583410
## 3 1415670_at     rs3713705  1.1363521 0.2621028 0.5310916  0.06786667
## 4 1415670_at    rs13478880  1.1363521 0.2621028 0.5310916  0.06786667
## 5 1415670_at gnf06.086.089  0.9133601 0.3661462 0.6356597  0.05583410
## 6 1415672_at    rs13479651 -1.3052534 0.1987480 0.4510718 -0.03993582
##      beta_se         ensembl_id
## 1 0.05985040 ENSMUSG00000030058
## 2 0.06113044 ENSMUSG00000030058
## 3 0.05972327 ENSMUSG00000030058
## 4 0.05972327 ENSMUSG00000030058
## 5 0.06113044 ENSMUSG00000030058
## 6 0.03059622 ENSMUSG00000015341
head(liver.mouse.eQTL)
##         gene          snps statistic    pvalue       FDR        beta
## 1 1415670_at    rs13478876 0.3577236 0.7232327 0.9060716 0.022464286
## 2 1415670_at    rs13475374 0.1359233 0.8928544 0.9654697 0.008533333
## 3 1415670_at gnf06.086.089 0.3577236 0.7232327 0.9060716 0.022464286
## 4 1415670_at    rs13478880 0.3577236 0.7232327 0.9060716 0.022464286
## 5 1415670_at     rs3713705 0.3577236 0.7232327 0.9060716 0.022464286
## 6 1415671_at     rs3675894 1.0520305 0.3017805 0.6480655 0.038909091
##      beta_se         ensembl_id
## 1 0.06279788 ENSMUSG00000030058
## 2 0.06278048 ENSMUSG00000030058
## 3 0.06279788 ENSMUSG00000030058
## 4 0.06279788 ENSMUSG00000030058
## 5 0.06279788 ENSMUSG00000030058
## 6 0.03698476 ENSMUSG00000013160
library(data.table)
# Select Gene-SNP pair with minimum P value
lung.mouse.eQTL.min <- data.table(lung.mouse.eQTL, key=c('ensembl_id', "pvalue"))
lung.mouse.eQTL.min<-lung.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
lung.mouse.eQTL.min<-as.data.frame(lung.mouse.eQTL.min)

liver.mouse.eQTL.min <- data.table(liver.mouse.eQTL, key=c('ensembl_id', "pvalue"))
liver.mouse.eQTL.min<-liver.mouse.eQTL.min[J(unique(ensembl_id)),mult="first"]
liver.mouse.eQTL.min<-as.data.frame(liver.mouse.eQTL.min)

library(plyr)
lung.mouse.eQTL.min<-rename(lung.mouse.eQTL.min, c("pvalue"="lung_pvalue", "beta"="lung.beta", "beta_se"="lung.beta_se"))
liver.mouse.eQTL.min<-rename(liver.mouse.eQTL.min, c("pvalue"="liver_pvalue", "beta"="liver.beta", "beta_se"="liver.beta_se"))

head(lung.mouse.eQTL.min)
##           gene             snps  statistic lung_pvalue       FDR
## 1   1428645_at        rs3702359 -1.4324741   0.1592383 0.3935788
## 2 1421514_a_at     gnfX.141.820 -0.1728806   0.8635565 0.9457289
## 3   1416677_at        rs3720981 -0.1308388   0.8965131 0.9603432
## 4   1451677_at CEL-11_120628029  0.4574870   0.6496243 0.8393760
## 5   1425955_at       rs13478643 -1.0185410   0.3141176 0.5866782
## 6 1426241_a_at        rs4224744  0.9192838   0.3630755 0.6327046
##      lung.beta lung.beta_se         ensembl_id
## 1 -0.059800000   0.04174596 ENSMUSG00000000001
## 2 -0.005911765   0.03419566 ENSMUSG00000000037
## 3 -0.023660714   0.18083869 ENSMUSG00000000049
## 4  0.016446429   0.03594950 ENSMUSG00000000056
## 5 -0.078136752   0.07671439 ENSMUSG00000000058
## 6  0.052847059   0.05748721 ENSMUSG00000000085
head(liver.mouse.eQTL.min)
##           gene             snps  statistic liver_pvalue          FDR
## 1   1428645_at       rs13477321 -0.6238563 5.377718e-01 0.8222409897
## 2   1449320_at   CEL-X_71438949  1.3340954 1.929225e-01 0.5174275252
## 3 1421514_a_at  CEL-X_154048891 -0.2835863 7.788138e-01 0.9261630010
## 4   1416677_at        rs3670642  5.4086629 9.084753e-06 0.0001847233
## 5   1451678_at CEL-11_121219118  1.2354816 2.269175e-01 0.5622059072
## 6   1425955_at  NAT_6_18.199327 -1.9727688 5.846922e-02 0.2581607284
##     liver.beta liver.beta_se         ensembl_id
## 1 -0.031972222    0.05124934 ENSMUSG00000000001
## 2  0.033761364    0.02530656 ENSMUSG00000000003
## 3 -0.007730159    0.02725858 ENSMUSG00000000037
## 4  0.084466667    0.01561692 ENSMUSG00000000049
## 5  0.127208145    0.10296240 ENSMUSG00000000056
## 6 -0.051222222    0.02596463 ENSMUSG00000000058
tail(liver.mouse.eQTL.min)
##               gene       snps  statistic liver_pvalue       FDR
## 10962   1434694_at rs13459062 -2.3077250   0.02861902 0.1563120
## 10963   1437645_at  rs4231033  2.0047987   0.05474021 0.2465394
## 10964 1449939_s_at rs13481636  0.6744392   0.50556296 0.8021978
## 10965   1422547_at  rs4165065 -1.6676237   0.10653819 0.3695183
## 10966   1451476_at  rs4165065  0.3972613   0.69418700 0.8956391
## 10967   1448115_at  rs4165069  1.5491597   0.13257346 0.4209997
##        liver.beta liver.beta_se         ensembl_id
## 10962 -0.12408929    0.05377126 ENSMUSG00000099041
## 10963  0.06022624    0.03004104 ENSMUSG00000099083
## 10964  0.01886111    0.02796562 ENSMUSG00000099116
## 10965 -0.07874163    0.04721786 ENSMUSG00000099164
## 10966  0.01569378    0.03950493 ENSMUSG00000099262
## 10967  0.08752778    0.05650017 ENSMUSG00000099305
dim(lung.mouse.eQTL.min)
## [1] 11015     8
dim(liver.mouse.eQTL.min)
## [1] 10967     8
# lung, liver eqtl with ensemble_id
merged.mouse.eQTL.min<-merge(lung.mouse.eQTL.min, liver.mouse.eQTL.min, by.x = "ensembl_id", by.y="ensembl_id")
head(merged.mouse.eQTL.min)
##           ensembl_id       gene.x           snps.x statistic.x lung_pvalue
## 1 ENSMUSG00000000001   1428645_at        rs3702359  -1.4324741   0.1592383
## 2 ENSMUSG00000000037 1421514_a_at     gnfX.141.820  -0.1728806   0.8635565
## 3 ENSMUSG00000000049   1416677_at        rs3720981  -0.1308388   0.8965131
## 4 ENSMUSG00000000056   1451677_at CEL-11_120628029   0.4574870   0.6496243
## 5 ENSMUSG00000000058   1425955_at       rs13478643  -1.0185410   0.3141176
## 6 ENSMUSG00000000085 1426241_a_at        rs4224744   0.9192838   0.3630755
##       FDR.x    lung.beta lung.beta_se       gene.y           snps.y
## 1 0.3935788 -0.059800000   0.04174596   1428645_at       rs13477321
## 2 0.9457289 -0.005911765   0.03419566 1421514_a_at  CEL-X_154048891
## 3 0.9603432 -0.023660714   0.18083869   1416677_at        rs3670642
## 4 0.8393760  0.016446429   0.03594950   1451678_at CEL-11_121219118
## 5 0.5866782 -0.078136752   0.07671439   1425955_at  NAT_6_18.199327
## 6 0.6327046  0.052847059   0.05748721 1426241_a_at  CEL-4_120039566
##   statistic.y liver_pvalue        FDR.y   liver.beta liver.beta_se
## 1  -0.6238563 5.377718e-01 0.8222409897 -0.031972222    0.05124934
## 2  -0.2835863 7.788138e-01 0.9261630010 -0.007730159    0.02725858
## 3   5.4086629 9.084753e-06 0.0001847233  0.084466667    0.01561692
## 4   1.2354816 2.269175e-01 0.5622059072  0.127208145    0.10296240
## 5  -1.9727688 5.846922e-02 0.2581607284 -0.051222222    0.02596463
## 6  -2.2103759 3.542074e-02 0.1820043924 -0.072100000    0.03261889
dim(merged.mouse.eQTL.min)
## [1] 10422    15
merged.mouse.eQTL.min<-data.frame(merged.mouse.eQTL.min)
merged.mouse.eQTL.min<-merged.mouse.eQTL.min[, c(1, 5, 7, 8, 12, 14, 15 )]
head(merged.mouse.eQTL.min)
##           ensembl_id lung_pvalue    lung.beta lung.beta_se liver_pvalue
## 1 ENSMUSG00000000001   0.1592383 -0.059800000   0.04174596 5.377718e-01
## 2 ENSMUSG00000000037   0.8635565 -0.005911765   0.03419566 7.788138e-01
## 3 ENSMUSG00000000049   0.8965131 -0.023660714   0.18083869 9.084753e-06
## 4 ENSMUSG00000000056   0.6496243  0.016446429   0.03594950 2.269175e-01
## 5 ENSMUSG00000000058   0.3141176 -0.078136752   0.07671439 5.846922e-02
## 6 ENSMUSG00000000085   0.3630755  0.052847059   0.05748721 3.542074e-02
##     liver.beta liver.beta_se
## 1 -0.031972222    0.05124934
## 2 -0.007730159    0.02725858
## 3  0.084466667    0.01561692
## 4  0.127208145    0.10296240
## 5 -0.051222222    0.02596463
## 6 -0.072100000    0.03261889
# check the relationships between teh variables of interest
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
# plot lung_pvalue and liver_pvalue
ggplot(merged.mouse.eQTL.min, aes(x=-log10(lung_pvalue), y=-log10(liver_pvalue))) +geom_point(shape=1) +geom_smooth(method=lm)

summary(lm(-log10(lung_pvalue) ~ -log10(liver_pvalue), data = merged.mouse.eQTL.min))
## 
## Call:
## lm(formula = -log10(lung_pvalue) ~ -log10(liver_pvalue), data = merged.mouse.eQTL.min)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.112 -1.736 -1.295 -0.183 44.515 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.11221    0.03616   58.42   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.691 on 10421 degrees of freedom
ggplot(merged.mouse.eQTL.min, aes(x=lung_pvalue, y=liver_pvalue)) +geom_point(shape=1) +geom_smooth(method=lm)

summary(lm(lung_pvalue ~ liver_pvalue, data = merged.mouse.eQTL.min))
## 
## Call:
## lm(formula = lung_pvalue ~ liver_pvalue, data = merged.mouse.eQTL.min)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4149 -0.1803 -0.0961  0.1566  0.8109 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.178070   0.003671   48.51   <2e-16 ***
## liver_pvalue 0.237474   0.008952   26.53   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2593 on 10420 degrees of freedom
## Multiple R-squared:  0.06326,    Adjusted R-squared:  0.06317 
## F-statistic: 703.7 on 1 and 10420 DF,  p-value: < 2.2e-16
# check the relationship between liver and lung after sebsetting the sample:p_value < 0.01
merged.mouse.eQTL.min.1<- subset(merged.mouse.eQTL.min, lung_pvalue < 0.01)
merged.mouse.eQTL.min.1<- subset(merged.mouse.eQTL.min.1,liver_pvalue < 0.01)
head(merged.mouse.eQTL.min.1)
##             ensembl_id  lung_pvalue  lung.beta lung.beta_se liver_pvalue
## 7   ENSMUSG00000000088 1.444641e-03  0.1899762   0.05579620 1.987922e-05
## 24  ENSMUSG00000000184 3.879409e-13 -0.4867727   0.04742066 1.668832e-03
## 49  ENSMUSG00000000326 4.714490e-12  0.7639259   0.08086460 1.795190e-04
## 96  ENSMUSG00000000826 7.947931e-23 -0.5532700   0.02862322 4.183130e-08
## 97  ENSMUSG00000000827 5.523964e-11  0.1906300   0.02199819 6.134551e-07
## 113 ENSMUSG00000000959 3.836814e-06  0.1476333   0.02788356 1.453444e-03
##     liver.beta liver.beta_se
## 7    0.1924067    0.03756628
## 24  -0.1072000    0.03082153
## 49   0.5906667    0.13687857
## 96  -0.5487738    0.07374116
## 97   0.2724570    0.04251236
## 113 -0.1612584    0.04566665
ggplot(merged.mouse.eQTL.min.1, aes(x=-log10(lung_pvalue), y=-log10(liver_pvalue))) +geom_point(shape=1) +geom_smooth(method=lm)

write.table(merged.mouse.eQTL.min,file="2016-02-22 merged.mouse.eQTL.min.txt", sep="\t", quote=FALSE)
library(data.table)
merged.mouse.eQTL.min <- as.data.table(merged.mouse.eQTL.min)
# add lung_pvalue and liver_pvalue ranking to the table 
merged.mouse.eQTL.min.1<-transform(merged.mouse.eQTL.min, 
                                lung_pvalue.rank = ave(lung_pvalue, 
                                                        FUN = function(x) rank(-x, ties.method = "first")))
merged.mouse.eQTL.min.ranked<-transform(merged.mouse.eQTL.min.1, 
                                   liver_pvalue.rank = ave(liver_pvalue, 
                                                          FUN = function(x) rank(-x, ties.method = "first")))

head(merged.mouse.eQTL.min.ranked)
##            ensembl_id lung_pvalue    lung.beta lung.beta_se liver_pvalue
## 1: ENSMUSG00000000001   0.1592383 -0.059800000   0.04174596 5.377718e-01
## 2: ENSMUSG00000000037   0.8635565 -0.005911765   0.03419566 7.788138e-01
## 3: ENSMUSG00000000049   0.8965131 -0.023660714   0.18083869 9.084753e-06
## 4: ENSMUSG00000000056   0.6496243  0.016446429   0.03594950 2.269175e-01
## 5: ENSMUSG00000000058   0.3141176 -0.078136752   0.07671439 5.846922e-02
## 6: ENSMUSG00000000085   0.3630755  0.052847059   0.05748721 3.542074e-02
##      liver.beta liver.beta_se lung_pvalue.rank liver_pvalue.rank
## 1: -0.031972222    0.05124934             5105              2359
## 2: -0.007730159    0.02725858              321               852
## 3:  0.084466667    0.01561692              233              9751
## 4:  0.127208145    0.10296240             1186              5056
## 5: -0.051222222    0.02596463             3468              7356
## 6: -0.072100000    0.03261889             3035              7822
ggplot(merged.mouse.eQTL.min.ranked, aes(x=lung_pvalue.rank, y=liver_pvalue.rank)) +geom_point(shape=1) +geom_smooth(method=lm)

merged.mouse.eQTL.min.ranked <- merged.mouse.eQTL.min.ranked[order(lung_pvalue.rank),] 
head(merged.mouse.eQTL.min.ranked)
##            ensembl_id lung_pvalue     lung.beta lung.beta_se liver_pvalue
## 1: ENSMUSG00000033004   1.0000000  0.000000e+00           NA   0.88726668
## 2: ENSMUSG00000021133   0.9996627 -1.190476e-05   0.02799500   0.62203679
## 3: ENSMUSG00000031104   0.9994782  3.333333e-05   0.05067690   0.12434129
## 4: ENSMUSG00000020010   0.9994212 -5.000000e-05   0.06852670   0.04484664
## 5: ENSMUSG00000028651   0.9989731  1.250000e-04   0.09655988   0.65136315
## 6: ENSMUSG00000031636   0.9985484  1.897233e-04   0.10367905   0.67556269
##      liver.beta liver.beta_se lung_pvalue.rank liver_pvalue.rank
## 1:  0.005901786    0.04125367                1               373
## 2:  0.046401786    0.09308593                2              1775
## 3: -0.030111111    0.01900514                3              6280
## 4: -0.153918552    0.07328845                4              7632
## 5: -0.017507937    0.03833022                5              1598
## 6: -0.010392857    0.02457232                6              1434
# Chisq test with different threshold of Pvalue
Pvalue<-c(0.4, 0.2, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001, 0.00000005)
chisq.results <- matrix(0, nrow=length(Pvalue), ncol=5)
colnames(chisq.results)<-c("Pvalue_threshold","Pvalue_chisq.test","actual_shared", "expected_shared", "Foldchange" )

# Populate the said matrix
for (i in 1:length(Pvalue)) {
  chisq.results[i,1] <- Pvalue[i]
  chisq.results[i,2] <- chisq.test(table(merged.mouse.eQTL.min.ranked$lung_pvalue<Pvalue[i], merged.mouse.eQTL.min.ranked$liver_pvalue<Pvalue[i]),correct=T)$p.value
  chisq.results[i,3] <- table(merged.mouse.eQTL.min.ranked$lung_pvalue<Pvalue[i], merged.mouse.eQTL.min.ranked$liver_pvalue<Pvalue[i])[2,2]
  chisq.results[i,4] <- chisq.test(table(merged.mouse.eQTL.min.ranked$lung_pvalue<Pvalue[i], merged.mouse.eQTL.min.ranked$liver_pvalue<Pvalue[i]),correct=T)$expected[2,2]
  chisq.results[i,5] <- chisq.results[i,3]/chisq.results[i,4]
}
print(chisq.results)
##       Pvalue_threshold Pvalue_chisq.test actual_shared expected_shared
##  [1,]            4e-01      8.020799e-63          5471      5115.48647
##  [2,]            2e-01     2.130873e-103          3382      2834.52044
##  [3,]            1e-01     5.922635e-162          2306      1645.29380
##  [4,]            5e-02     3.123697e-229          1727      1020.35233
##  [5,]            1e-02     1.155204e-311          1065       438.80445
##  [6,]            1e-03      0.000000e+00           727       213.59969
##  [7,]            1e-04      0.000000e+00           527       121.87795
##  [8,]            1e-05      0.000000e+00           386        73.83708
##  [9,]            1e-06     1.893351e-303           288        47.94387
## [10,]            1e-07     1.702579e-267           216        31.31203
## [11,]            5e-08     2.131666e-250           193        26.84226
##       Foldchange
##  [1,]   1.069498
##  [2,]   1.193147
##  [3,]   1.401573
##  [4,]   1.692553
##  [5,]   2.427049
##  [6,]   3.403563
##  [7,]   4.323998
##  [8,]   5.227726
##  [9,]   6.007025
## [10,]   6.898307
## [11,]   7.190155
chisq.results.df<-as.data.frame(chisq.results)
ggplot(chisq.results.df, aes(x=-log(Pvalue_threshold), y=Foldchange)) +geom_point()+geom_smooth(method=lm)

# retrieve gene expression and SNP for eqtl with minimum P value
BXD.geno.SNP.eqtl.for.liver<-read.table(file="2015-12-07 BXD.geno.SNP.eqtl.for.liver.txt",  header=T, sep="")
head(BXD.geno.SNP.eqtl.for.liver)
##       Locus BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19 BXD2
## 1 rs6269442    0     0     1     0     0     1     1     0     1    0
## 2 rs6365999    0     0     1     0     0     1     1     0     1    0
## 3 rs6376963    0     0     1     0     0     1     1     0     1    0
## 4 rs3677817    0     0     1     0     0     1     1     0     1    0
## 5 rs8236463    0     1     1     0     0     1     1     0     1    0
## 6 rs6333200    0     1     1     0     0     1     1     0     1    0
##   BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33 BXD34 BXD36
## 1     1     1     0      0     0     1     1     0     0     0     0     0
## 2     1     1     0      0     0     1     1     0     0     0     0     0
## 3     1     1     0      0     0     1     1     0     0     0     0     0
## 4     1     1     0      0     0     1     1     0     0     0     0     0
## 5     1     1     0      0     0     1     1     0     0     0     0     0
## 6     1     1     0      0     0     1     1     0     0     0     0     0
##   BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 1     0     1     0     0    1    1    1    0
## 2     0     1     0     0    1    1    1    0
## 3     0     1     0     0    1    1    1    0
## 4     0     1     0     0    1    1    1    0
## 5     0     1     0     0    1    1    1    0
## 6     0     1     0     0    1    1    1    0
mouse.liver.expression <- read.table(file="2015-12-07 mouse.liver.expression.eqtl.txt", header=T,sep="");
head(mouse.liver.expression)
##       ProbeSet   BXD1  BXD11  BXD12  BXD13  BXD14  BXD15  BXD16  BXD18
## 1   1415670_at 10.090 10.200 10.300 10.208  9.830 10.238  9.914 10.348
## 2   1415671_at 10.932 11.088 11.007 11.020 10.955 11.120 11.012 11.123
## 3   1415672_at 11.432 11.417 11.442 11.555 11.561 11.318 11.461 11.561
## 4   1415673_at  7.535  7.382  7.566  7.162  7.403  7.342  7.213  7.581
## 5 1415674_a_at  9.757  9.972  9.269  9.873  9.354  9.918  9.459  9.655
## 6   1415675_at  9.029  9.009  9.245  9.282  9.415  9.098  9.060  8.937
##    BXD19   BXD2  BXD20  BXD21  BXD24 BXD24a  BXD27  BXD28  BXD29  BXD31
## 1  9.939  9.871 10.077 10.159  9.746  9.890 10.286 10.177  9.959  9.882
## 2 10.922 10.802 10.988 10.969 11.104 10.979 10.905 11.013 11.071 10.996
## 3 11.575 11.426 11.367 11.328 11.499 11.531 11.666 11.500 11.502 11.446
## 4  7.551  7.368  7.251  7.373  7.408  7.473  7.238  7.424  7.475  7.491
## 5  9.544  9.557  9.460  9.322  9.795  9.758  9.628  9.263  9.671  9.645
## 6  8.995  9.104  9.123  9.086  9.087  9.040  8.844  9.173  8.951  9.131
##    BXD32  BXD33  BXD34  BXD36  BXD38  BXD39  BXD40  BXD42   BXD5   BXD6
## 1 10.059 10.102 10.174 10.022 10.364  9.745 10.074  9.961 10.160 10.069
## 2 10.854 11.084 11.059 10.923 11.053 11.030 11.067 10.891 10.878 11.043
## 3 11.548 11.511 11.490 11.545 11.496 11.516 11.543 11.457 11.374 11.504
## 4  7.419  7.386  7.496  7.369  7.265  7.130  7.149  7.383  7.439  7.233
## 5  9.368  9.741  9.721  9.684  9.387  9.649  9.693  9.793  9.840  9.519
## 6  9.108  9.122  9.153  9.008  9.138  9.126  9.233  9.060  9.134  9.081
##     BXD8   BXD9
## 1  9.956 10.142
## 2 11.206 10.982
## 3 11.531 11.700
## 4  7.397  7.390
## 5  9.340  9.160
## 6  8.926  9.274
library(tidyr)

liver.mouse.eQTL.min <- liver.mouse.eQTL.min[liver.mouse.eQTL.min$ensembl_id %in% (merged.mouse.eQTL.min$ensembl_id), ]
dim(liver.mouse.eQTL.min)
## [1] 10422     8
expression_long <- gather(mouse.liver.expression, strain, expression, BXD1:BXD9)
expression_long <- expression_long[order(expression_long$ProbeSet, expression_long$strain), ]
rownames(expression_long) <- NULL
head(expression_long)
##     ProbeSet strain expression
## 1 1415670_at   BXD1     10.090
## 2 1415670_at  BXD11     10.200
## 3 1415670_at  BXD12     10.300
## 4 1415670_at  BXD13     10.208
## 5 1415670_at  BXD14      9.830
## 6 1415670_at  BXD15     10.238
snp_long <- gather(BXD.geno.SNP.eqtl.for.liver, strain, snp, BXD1:BXD9)
snp_long <- snp_long[order(snp_long$Locus, snp_long$strain), ]
rownames(snp_long) <- NULL
head(snp_long)
##       Locus strain snp
## 1 110Mit194   BXD1   0
## 2 110Mit194  BXD11   0
## 3 110Mit194  BXD12   0
## 4 110Mit194  BXD13   0
## 5 110Mit194  BXD14   1
## 6 110Mit194  BXD15   0
# Caculate the resdidual variance in eqtl analysis
liver.mv <- matrix(0, nrow=length(liver.mouse.eQTL.min$gene), ncol=5)
colnames(liver.mv)<-c("Probeset"," ensembl_id","SNP","beta", "model_variance" )

for (i in 1:length(liver.mouse.eQTL.min$gene))  {
    liver.mv[i,1] <- as.character(liver.mouse.eQTL.min$gene[i])
    liver.mv[i,2] <- as.character(liver.mouse.eQTL.min$ensembl_id[i])
    liver.mv[i,3] <- as.character(liver.mouse.eQTL.min$snps[i])
    liver.mv[i,4] <- summary(lm(expression ~ snp, 
                                data=assign(paste("merged",i,sep=""), merge(
                                  assign(paste("expression",i,sep=""), expression_long[(expression_long$ProbeSet %in%  liver.mouse.eQTL.min$gene[i]), ]),
                                  assign(paste("SNP",i,sep=""), snp_long[(snp_long$Locus %in%  liver.mouse.eQTL.min$snps[i]), ]), by= "strain"))
    ))$coefficients[2,1]
    liver.mv[i,5] <- summary(lm(expression ~ snp, 
                                data=assign(paste("merged",i,sep=""), merge(
                                  assign(paste("expression",i,sep=""), expression_long[(expression_long$ProbeSet %in%  liver.mouse.eQTL.min$gene[i]), ]),
                                  assign(paste("SNP",i,sep=""), snp_long[(snp_long$Locus %in%  liver.mouse.eQTL.min$snps[i]), ]), by= "strain"))
    ))$sigma**2 
        
}

write.table(liver.mv,file="2016-02-22-liver.mv.txt", sep="\t", row.names=FALSE, quote=FALSE)

liver.mv<-read.table(file="2016-02-22-liver.mv.txt",  header=T, sep="", stringsAsFactors=FALSE)

colnames(liver.mv)<-c("Probeset"," ensembl_id", "SNP","beta", "liver_residual_variance")
# head(liver.mv)
# tail(liver.mv)
# head(merged.mouse.eQTL.min)
# tail(merged.mouse.eQTL.min)
merged.mouse.eQTL.min.variance<-cbind(merged.mouse.eQTL.min, liver.mv$liver_residual_variance)
names(merged.mouse.eQTL.min.variance)[8]<-"liver_residual_variance"
merged.mouse.eQTL.min.variance<-as.data.frame(merged.mouse.eQTL.min.variance)
merged.mouse.eQTL.min.variance2<-merged.mouse.eQTL.min.variance[, c(1,6:8,2:5)]
head(merged.mouse.eQTL.min.variance2)
##           ensembl_id   liver.beta liver.beta_se liver_residual_variance
## 1 ENSMUSG00000000001 -0.031972222    0.05124934             0.018910763
## 2 ENSMUSG00000000037 -0.007730159    0.02725858             0.004681090
## 3 ENSMUSG00000000049  0.084466667    0.01561692             0.001829162
## 4 ENSMUSG00000000056  0.127208145    0.10296240             0.078095912
## 5 ENSMUSG00000000058 -0.051222222    0.02596463             0.004853968
## 6 ENSMUSG00000000085 -0.072100000    0.03261889             0.007093279
##   lung_pvalue    lung.beta lung.beta_se liver_pvalue
## 1   0.1592383 -0.059800000   0.04174596 5.377718e-01
## 2   0.8635565 -0.005911765   0.03419566 7.788138e-01
## 3   0.8965131 -0.023660714   0.18083869 9.084753e-06
## 4   0.6496243  0.016446429   0.03594950 2.269175e-01
## 5   0.3141176 -0.078136752   0.07671439 5.846922e-02
## 6   0.3630755  0.052847059   0.05748721 3.542074e-02
write.table(merged.mouse.eQTL.min.variance2,file="2016-02-22-merged.mouse.eQTL.min.variance.txt", sep="\t", row.names=FALSE, quote=FALSE)


mouse.liver.expression <- read.table(file="2015-12-07 mouse.liver.expression.eqtl.txt",  header=T)
head(mouse.liver.expression)
##       ProbeSet   BXD1  BXD11  BXD12  BXD13  BXD14  BXD15  BXD16  BXD18
## 1   1415670_at 10.090 10.200 10.300 10.208  9.830 10.238  9.914 10.348
## 2   1415671_at 10.932 11.088 11.007 11.020 10.955 11.120 11.012 11.123
## 3   1415672_at 11.432 11.417 11.442 11.555 11.561 11.318 11.461 11.561
## 4   1415673_at  7.535  7.382  7.566  7.162  7.403  7.342  7.213  7.581
## 5 1415674_a_at  9.757  9.972  9.269  9.873  9.354  9.918  9.459  9.655
## 6   1415675_at  9.029  9.009  9.245  9.282  9.415  9.098  9.060  8.937
##    BXD19   BXD2  BXD20  BXD21  BXD24 BXD24a  BXD27  BXD28  BXD29  BXD31
## 1  9.939  9.871 10.077 10.159  9.746  9.890 10.286 10.177  9.959  9.882
## 2 10.922 10.802 10.988 10.969 11.104 10.979 10.905 11.013 11.071 10.996
## 3 11.575 11.426 11.367 11.328 11.499 11.531 11.666 11.500 11.502 11.446
## 4  7.551  7.368  7.251  7.373  7.408  7.473  7.238  7.424  7.475  7.491
## 5  9.544  9.557  9.460  9.322  9.795  9.758  9.628  9.263  9.671  9.645
## 6  8.995  9.104  9.123  9.086  9.087  9.040  8.844  9.173  8.951  9.131
##    BXD32  BXD33  BXD34  BXD36  BXD38  BXD39  BXD40  BXD42   BXD5   BXD6
## 1 10.059 10.102 10.174 10.022 10.364  9.745 10.074  9.961 10.160 10.069
## 2 10.854 11.084 11.059 10.923 11.053 11.030 11.067 10.891 10.878 11.043
## 3 11.548 11.511 11.490 11.545 11.496 11.516 11.543 11.457 11.374 11.504
## 4  7.419  7.386  7.496  7.369  7.265  7.130  7.149  7.383  7.439  7.233
## 5  9.368  9.741  9.721  9.684  9.387  9.649  9.693  9.793  9.840  9.519
## 6  9.108  9.122  9.153  9.008  9.138  9.126  9.233  9.060  9.134  9.081
##     BXD8   BXD9
## 1  9.956 10.142
## 2 11.206 10.982
## 3 11.531 11.700
## 4  7.397  7.390
## 5  9.340  9.160
## 6  8.926  9.274
mouse.liver.expression.min <- mouse.liver.expression[mouse.liver.expression$ProbeSet %in% liver.mouse.eQTL.min$gene,] 
mouse.liver.expression.min <- merge(mouse.liver.expression.min, liver.mouse.eQTL.min[, c(1:2, 8)], by.x = "ProbeSet", by.y = "gene" )
mouse.liver.expression.min <- mouse.liver.expression.min[order(mouse.liver.expression.min$ensembl_id),]
mouse.liver.expression.min$ensembl_id <- NULL
mouse.liver.expression.min$snps <- NULL
head(mouse.liver.expression.min)
##          ProbeSet   BXD1  BXD11  BXD12  BXD13  BXD14  BXD15  BXD16  BXD18
## 6168   1428645_at 10.890 10.897 10.871 11.098 11.036 10.888 10.939 11.083
## 2852 1421514_a_at  6.437  6.281  6.259  6.405  6.352  6.406  6.361  6.309
## 514    1416677_at 14.754 14.599 14.656 14.724 14.700 14.626 14.700 14.695
## 9246   1451678_at  9.204  9.297  9.205  9.469  9.447  9.178  9.169  9.619
## 5079   1425955_at  6.568  6.601  6.496  6.694  6.577  6.619  6.492  6.693
## 5196 1426241_a_at  7.037  7.110  7.018  6.778  7.086  7.032  7.018  6.929
##       BXD19   BXD2  BXD20  BXD21  BXD24 BXD24a  BXD27  BXD28  BXD29  BXD31
## 6168 10.937 10.770 10.602 10.904 11.075 11.154 11.012 10.978 10.877 10.842
## 2852  6.390  6.433  6.329  6.433  6.295  6.385  6.316  6.366  6.419  6.360
## 514  14.724 14.745 14.693 14.764 14.757 14.695 14.553 14.615 14.665 14.619
## 9246  8.924  9.215  8.641  8.715  9.215  9.344  8.984  9.057  8.817  9.401
## 5079  6.714  6.724  6.790  6.655  6.539  6.500  6.561  6.604  6.715  6.706
## 5196  6.905  6.909  7.203  7.050  7.075  7.022  7.096  6.995  7.042  7.116
##       BXD32  BXD33  BXD34  BXD36  BXD38  BXD39  BXD40  BXD42   BXD5   BXD6
## 6168 11.098 10.953 11.136 10.898 11.067 10.897 10.875 10.644 10.858 11.052
## 2852  6.277  6.230  6.382  6.304  6.237  6.295  6.438  6.471  6.362  6.371
## 514  14.775 14.626 14.634 14.680 14.640 14.620 14.651 14.779 14.696 14.741
## 9246  9.149  8.968  9.031  9.611  9.156  8.923  8.988  9.380  9.788  8.781
## 5079  6.671  6.613  6.679  6.599  6.582  6.561  6.651  6.594  6.627  6.625
## 5196  7.039  7.109  7.049  7.046  7.022  7.095  6.999  7.142  6.899  7.007
##        BXD8   BXD9
## 6168 10.820 11.122
## 2852  6.475  6.391
## 514  14.774 14.715
## 9246  8.905  9.510
## 5079  6.600  6.678
## 5196  7.061  6.853
dim(mouse.liver.expression.min)
## [1] 10422    31
mouse.SNP <- read.table(file="2015-12-07 BXD.geno.SNP.eqtl.for.liver.txt",  header=T)
head(mouse.SNP)
##       Locus BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19 BXD2
## 1 rs6269442    0     0     1     0     0     1     1     0     1    0
## 2 rs6365999    0     0     1     0     0     1     1     0     1    0
## 3 rs6376963    0     0     1     0     0     1     1     0     1    0
## 4 rs3677817    0     0     1     0     0     1     1     0     1    0
## 5 rs8236463    0     1     1     0     0     1     1     0     1    0
## 6 rs6333200    0     1     1     0     0     1     1     0     1    0
##   BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33 BXD34 BXD36
## 1     1     1     0      0     0     1     1     0     0     0     0     0
## 2     1     1     0      0     0     1     1     0     0     0     0     0
## 3     1     1     0      0     0     1     1     0     0     0     0     0
## 4     1     1     0      0     0     1     1     0     0     0     0     0
## 5     1     1     0      0     0     1     1     0     0     0     0     0
## 6     1     1     0      0     0     1     1     0     0     0     0     0
##   BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 1     0     1     0     0    1    1    1    0
## 2     0     1     0     0    1    1    1    0
## 3     0     1     0     0    1    1    1    0
## 4     0     1     0     0    1    1    1    0
## 5     0     1     0     0    1    1    1    0
## 6     0     1     0     0    1    1    1    0
dim(mouse.SNP)
## [1] 3811   31
mouse.SNP.min <- mouse.SNP[mouse.SNP$Locus %in% liver.mouse.eQTL.min$snps, ]
mouse.SNP.min <- merge(liver.mouse.eQTL.min[, c(1:2,8)],mouse.SNP.min, by.x = "snps", by.y = "Locus" )
mouse.SNP.min <- mouse.SNP.min[order(mouse.SNP.min$ensembl_id),]
mouse.SNP.min$gene <- NULL
mouse.SNP.min$ensembl_id <- NULL
head(mouse.SNP.min)
##                  snps BXD1 BXD11 BXD12 BXD13 BXD14 BXD15 BXD16 BXD18 BXD19
## 3096       rs13477321    0     1     1     0     0     1     0     0     0
## 920   CEL-X_154048891    0     0     0     1     0     1     0     0     1
## 6703        rs3670642    1     0     0     0     0     0     0     1     1
## 455  CEL-11_121219118    1     0     1     1     0     1     1     0     0
## 2102  NAT_6_18.199327    0     1     1     1     1     0     1     1     0
## 655   CEL-4_120039566    1     0     1     1     1     1     1     1     1
##      BXD2 BXD20 BXD21 BXD24 BXD24a BXD27 BXD28 BXD29 BXD31 BXD32 BXD33
## 3096    1     0     1     0      0     0     1     0     0     0     1
## 920     0     0     0     1      1     1     0     0     0     1     0
## 6703    1     1     1     1      1     0     0     0     0     1     0
## 455     1     0     1     1      1     1     0     0     0     1     0
## 2102    0     0     0     1      1     1     1     0     0     1     1
## 655     0     0     1     0      0     1     1     1     0     1     1
##      BXD34 BXD36 BXD38 BXD39 BXD40 BXD42 BXD5 BXD6 BXD8 BXD9
## 3096     1     1     1     1     0     0    0    0    1    0
## 920      0     0     1     0     0     1    0    0    0    0
## 6703     1     0     0     0     0     1    1    1    1    1
## 455      0     1     0     0     0     1    1    1    1    1
## 2102     1     0     0     0     1     1    0    1    1    1
## 655      0     1     1     0     1     0    1    0    1    1
dim(mouse.SNP.min)
## [1] 10422    31
write.table(mouse.liver.expression.min,file="2016-04-21-mouse.liver.expression.min.txt", sep="\t", row.names=FALSE, quote=FALSE)
write.table(mouse.SNP.min,file="2016-04-21-mouse.SNP.min.txt", sep="\t", row.names=FALSE, quote=FALSE)