rm(list=ls())
setwd("/Volumes/Transcend/Thesis_project/1.2.lung.gene.snp")
library(mouse4302.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
##     grep, grepl, intersect, is.unsorted, lapply, lengths, Map,
##     mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
##     setdiff, sort, table, tapply, union, unique, unlist, unsplit
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## Loading required package: org.Mm.eg.db
## Loading required package: DBI
## 
## 
x <- mouse4302ENSEMBL
# Get the entrez gene IDs that are mapped to an Ensembl ID
mapped_genes <- mappedkeys(x)
# class(mapped_genes)
# head(mapped_genes)
# Convert to a list
xx <- toTable(x[mapped_genes])
xxx<-data.frame(unlist(xx))
xxx$ProbeSet<-rownames(xxx)
colnames(xxx)[1] <- "Ensem"
mouseanno<-xx
head(mouseanno)
##       probe_id         ensembl_id
## 1   1417246_at ENSMUSG00000030359
## 2 1421666_a_at ENSMUSG00000020804
## 3   1416936_at ENSMUSG00000025375
## 4   1421839_at ENSMUSG00000015243
## 5   1421840_at ENSMUSG00000015243
## 6   1450392_at ENSMUSG00000015243
# load lung expression data
mouse.lung<-read.csv("GN160_DataAvgAnnot.rev0614.csv", na.strings=c("", "NA"),  header = TRUE,)
head(mouse.lung)
##       ProbeSet Gene.Symbol Chr         Mb Gene.Id Strand.Gene
## 1   1415670_at        Copg   6  87.859681   54161           +
## 2   1415671_at    Atp6v0d1   8 108.048521   11972           -
## 3   1415672_at      Golga7   8  24.351869   57437           -
## 4   1415673_at        Psph   5 130.271465  100678           -
## 5 1415674_a_at     Trappc4   9  44.212489   60409           -
## 6   1415675_at        Dpm2   2  32.428524   13481           +
##   Blat.Mb.Start Blat.Mb.End
## 1     87.859681   87.859682
## 2    108.048521  108.048893
## 3     24.351869   24.352391
## 4    130.271465  130.272427
## 5     44.212489   44.213429
## 6     32.428524   32.428891
##                                               Description
## 1               coatomer protein complex, subunit gamma 1
## 2 ATPase, H+ transporting, lysosomal 38kDa, V0 subunit d1
## 3                golgi autoantigen, golgin subfamily a, 7
## 4                               phosphoserine phosphatase
## 5                  trafficking protein particle complex 4
## 6       dolichol-phosphate (beta-D) mannosyltransferase 2
##                                                                                                                                  Aliases
## 1                                                                                          Copg; AU019265; BC056168; D6Wsu16e; D6Ertd71e
## 2                                                         P39; Ac39; VATX; Vma6; Atp6d; AI267038; P39; VATX; VMA6; ATP6D; ATP6DV; VPATPD
## 3                                     GCP16; R75586; HSPC041; AB041568; MGC30271; GOLGA3AP1; MGC102434; MGC118465; C130038N16Rik; Golga7
## 4                                                                                                      PSP; PSPase; AI480570; Psph; PSPH
## 5 Sbd; Sbdn; TRS23; PTD009; Sdcbp2; HSPC172; AI303617; 1500017G03Rik; Trappc4; SBDN; TRS23; PTD009; CGI-104; HSPC172; SYNBINDIN; TRAPPC4
## 6                                                                                                                    MGC111193; MGC21559
##                                                                                                                                                                                                                                              Blat.Sequence
## 1                                GGCTGATCACATCCAAAAAGTCATGAGGAAACGTTCACCCTGTCTACTATCAAGACACTCGAAGAGGCTGTGGGCAATATTGTGAAGTTCCTGAATGCATCCTTGTGAGAGGTCAGACAAAGTGCCAGAAAACAAGAACACCCACACGCTGCTGCTAGCTGGAGTATTATCTTGTCCAACACTACGTCGAAGTTGTCACCATGCCTGCAAGGAGAGA
## 2     GGAACAGGAATGTCGCAACATCGTATGGATTGCTGAGTGCATACATCCCCATCTTTTAGCATTCTAACTTAAGGTTCCCAAGTGTCTGTGTGCTGTGGTGAAGCCCATCACTTGTCTGACCTGGTGAGGTGTAGCATTGTCCTGGGATTGTTTGCCCAGCTTCCATGACTTCTGGGACTACTCTTAGGCCTAAAGTTTACAGCCACTGACATGTCTAAGACTTGTATGACTTTCATGTCCCTCC
## 3               CAGCTCTGCCCGGTGTGGAAGAAGCTGTCCTAAGCACCTAACTCTAGTTTGTAACCTACTTTAAACTCTGATTCACATACAGTATTCAGGGCACAGACTCAAGTAAGTTTTGGCTTCCGTGTAACTTCAAGTTATCAGCTCTTCTTTATCTCTATACTCTCTTAGTATCGGAATTTCTTAGCTCATTTGTTCCTTTAAAGCCTATTCTAGTCTGTCTGCCTTTTAAACTTGTTG
## 4              AACCAATGTGTTTGCCAATAGGCTGAAGTTCTACTTTATGATGAGATGCAGCCGACAGCCGAGATGGAGCTACGGACATGGAAGCCTCCTCCTGCCTGATGCTTTCATTGGATCAGGCAGCAGGTTAAGGACAACGCCAAGTGGTACATCACTACTTGTAGCTTTCTGAGGTGCACACTTTCGGAGCATTGTGGAGCACGTTGCTGCAAAGCTCAATATCCCAACAACCAATGTG
## 5                                                                                                  ATGTTCCACTCGCTCTTTGCCATTGGTTCCCAGCTATCTCCGGAACAGGGCAGTTCAGGCATTGAGATAAACTGCACTGCTTCCAAACACTGACAGGGATCAAGTTTGTGGTCCTGGCAGACCCTAGGCAAGCTGGAATAGATTCTCTTCT
## 6 AGGCAGCCACACTAGACCACAATTGGCCTGTTTGGGAGAAGGCTCAGTGAAGGTCCAGCAGTTCTCCATATCTCCTCCAGAAAAGGGACCAAGGGTCACAGCTTACAAGATGGTCAAGCTGGCTGCACTGCAGGCTGTTTTCTTCTCAGTTTGACAAGGTGGGTTGGGAAGCCTCTCACTAGGCTGAACATTGCTGGTTGGTCCTGTCCATCTCACAGTCAAATGATGGATTGAGCAGGCCTTCCCGC
##   UniGeneId   OMIM HomoloGeneID B6D2F1 C57BL.6J DBA.2J   BXD1   BXD2
## 1 Mm.258785 615525        56745 10.456   10.449 10.230 10.393 10.459
## 2  Mm.17708 607028         3444 11.671   11.613 11.714 11.528 11.562
## 3 Mm.196269   <NA>         9385 11.781   11.854 11.844 11.786 11.808
## 4 Mm.271784 172480        31245  9.006    8.939  9.163  9.000  8.879
## 5  Mm.29814 610971       105453 10.484   10.535 10.578 10.623 10.462
## 6  Mm.22001 603564        99726 10.027    9.990 10.206 10.062 10.052
##     BXD6   BXD9  BXD12  BXD14  BXD16  BXD19  BXD21  BXD22  BXD25  BXD27
## 1 10.505 10.486 10.474 10.400 10.468 10.358 10.363 10.558 10.456 10.446
## 2 11.616 11.568 11.627 11.719 11.587 11.774 11.725 11.634 11.687 11.619
## 3 11.769 11.883 11.798 11.711 11.833 11.734 11.731 11.897 11.845 11.536
## 4  8.873  8.787  9.074  9.153  9.109  9.165  8.928  9.006  9.040  8.868
## 5 10.441 10.331 10.487 10.330 10.513 10.533 10.445 10.598 10.473 10.510
## 6 10.041 10.019 10.167 10.058 10.155 10.224 10.203 10.103 10.084 10.062
##    BXD31  BXD32  BXD33  BXD34  BXD39  BXD40  BXD42  BXD68  BXD43  BXD45
## 1 10.288 10.481 10.452 10.401 10.153 10.514 10.465 10.383 10.387 10.262
## 2 11.703 11.592 11.602 11.609 11.717 11.598 11.503 11.686 11.637 11.804
## 3 11.560 11.806 11.834 11.808 11.894 11.762 11.741 11.480 11.800 11.712
## 4  8.695  9.096  9.009  9.021  9.032  9.365  9.154  8.807  8.950  8.989
## 5 10.543 10.379 10.495 10.568 10.523 10.485 10.398 10.534 10.386 10.606
## 6 10.124 10.155 10.189 10.122 10.039 10.042  9.988 10.096 10.114 10.133
##    BXD48  BXD50  BXD51  BXD55  BXD56  BXD60  BXD62  BXD65  BXD66  BXD69
## 1 10.473 10.330 10.379 10.410 10.345 10.387 10.456 10.409 10.473 10.485
## 2 11.587 11.697 11.609 11.683 11.779 11.749 11.703 11.737 11.723 11.655
## 3 11.798 11.718 11.859 11.840 11.669 11.715 11.723 11.776 11.631 11.774
## 4  8.886  9.291  8.850  8.830  9.011  9.061  9.070  8.785  8.960  9.020
## 5 10.430 10.376 10.533 10.372 10.402 10.384 10.389 10.421 10.371 10.568
## 6 10.080 10.101  9.978 10.162  9.920 10.019 10.048 10.066  9.995 10.122
##    BXD70  BXD71  BXD73  BXD75  BXD83  BXD84  BXD86  BXD87  BXD89  BXD90
## 1 10.265 10.372 10.150 10.488 10.515 10.461 10.439 10.284 10.384 10.274
## 2 11.735 11.594 11.878 11.558 11.688 11.608 11.712 11.607 11.664 11.856
## 3 11.602 11.652 11.699 11.768 11.899 11.832 11.739 11.885 11.792 11.832
## 4  8.886  8.927  8.936  8.998  8.800  9.005  8.927  8.880  8.772  8.966
## 5 10.622 10.480 10.606 10.484 10.468 10.356 10.460 10.437 10.446 10.436
## 6 10.068 10.134 10.143 10.087 10.039 10.109 10.007 10.026 10.049 10.107
##    BXD96 BALB.cByJ  BXD97  BXD98  BXD99 BXD100 X129X1.SvJ FVB.NJ   LP.J
## 1 10.471    10.385 10.403  9.312 10.481 10.143     10.342 10.511 10.389
## 2 11.633    11.653 11.632 11.884 11.774 11.818     11.516 11.678 11.640
## 3 11.853    11.782 11.849 12.013 11.843 11.751     11.958 11.916 11.846
## 4  8.714     9.514  8.883  8.819  8.907  9.061      9.464  9.148  9.174
## 5 10.428    10.423 10.492 10.798 10.491 10.580     10.435 10.610 10.218
## 6 10.083    10.024 10.123 10.012 10.196 10.082      9.908 10.012 10.044
##    SJL.J WSB.EiJ D2B6F1
## 1 10.056  10.535 10.538
## 2 11.805  11.264 11.604
## 3 11.961  11.997 11.871
## 4  9.074   8.951  9.032
## 5 10.578  10.536 10.418
## 6 10.016  10.018 10.102
# Annotate probest id with ensembl id
mouse.lung<-merge(mouse.lung, mouseanno, by.x = "ProbeSet", by.y="probe_id")
# head(mouse.lung)
mouse.lung$Blat.Mb.Start<-as.numeric(as.character(mouse.lung$Blat.Mb.Start))
## Warning: NAs introduced by coercion
mouse.lung$Blat.Mb.End<-as.numeric(as.character(mouse.lung$Blat.Mb.End))
## Warning: NAs introduced by coercion
# delete genes without location information
mouse.lung<-mouse.lung[!is.na(mouse.lung$Blat.Mb.Start),]
mouse.lung<-mouse.lung[!is.na(mouse.lung$Blat.Mb.End),]
mouse.lung<-mouse.lung[!is.na(mouse.lung$Chr),]
mouse.lung <- mouse.lung[grep("_at", mouse.lung$ProbeSet), ]
mouse.lung<-mouse.lung[rowSums(is.na(mouse.lung)) != ncol(mouse.lung),]
dim(mouse.lung)
## [1] 34470    72
head(mouse.lung)
##       ProbeSet Gene.Symbol Chr         Mb Gene.Id Strand.Gene
## 1   1415670_at        Copg   6  87.859681   54161           +
## 2   1415671_at    Atp6v0d1   8 108.048521   11972           -
## 3   1415672_at      Golga7   8  24.351869   57437           -
## 4   1415673_at        Psph   5 130.271465  100678           -
## 5 1415674_a_at     Trappc4   9  44.212489   60409           -
## 6   1415675_at        Dpm2   2  32.428524   13481           +
##   Blat.Mb.Start Blat.Mb.End
## 1      87.85968    87.85968
## 2     108.04852   108.04889
## 3      24.35187    24.35239
## 4     130.27147   130.27243
## 5      44.21249    44.21343
## 6      32.42852    32.42889
##                                               Description
## 1               coatomer protein complex, subunit gamma 1
## 2 ATPase, H+ transporting, lysosomal 38kDa, V0 subunit d1
## 3                golgi autoantigen, golgin subfamily a, 7
## 4                               phosphoserine phosphatase
## 5                  trafficking protein particle complex 4
## 6       dolichol-phosphate (beta-D) mannosyltransferase 2
##                                                                                                                                  Aliases
## 1                                                                                          Copg; AU019265; BC056168; D6Wsu16e; D6Ertd71e
## 2                                                         P39; Ac39; VATX; Vma6; Atp6d; AI267038; P39; VATX; VMA6; ATP6D; ATP6DV; VPATPD
## 3                                     GCP16; R75586; HSPC041; AB041568; MGC30271; GOLGA3AP1; MGC102434; MGC118465; C130038N16Rik; Golga7
## 4                                                                                                      PSP; PSPase; AI480570; Psph; PSPH
## 5 Sbd; Sbdn; TRS23; PTD009; Sdcbp2; HSPC172; AI303617; 1500017G03Rik; Trappc4; SBDN; TRS23; PTD009; CGI-104; HSPC172; SYNBINDIN; TRAPPC4
## 6                                                                                                                    MGC111193; MGC21559
##                                                                                                                                                                                                                                              Blat.Sequence
## 1                                GGCTGATCACATCCAAAAAGTCATGAGGAAACGTTCACCCTGTCTACTATCAAGACACTCGAAGAGGCTGTGGGCAATATTGTGAAGTTCCTGAATGCATCCTTGTGAGAGGTCAGACAAAGTGCCAGAAAACAAGAACACCCACACGCTGCTGCTAGCTGGAGTATTATCTTGTCCAACACTACGTCGAAGTTGTCACCATGCCTGCAAGGAGAGA
## 2     GGAACAGGAATGTCGCAACATCGTATGGATTGCTGAGTGCATACATCCCCATCTTTTAGCATTCTAACTTAAGGTTCCCAAGTGTCTGTGTGCTGTGGTGAAGCCCATCACTTGTCTGACCTGGTGAGGTGTAGCATTGTCCTGGGATTGTTTGCCCAGCTTCCATGACTTCTGGGACTACTCTTAGGCCTAAAGTTTACAGCCACTGACATGTCTAAGACTTGTATGACTTTCATGTCCCTCC
## 3               CAGCTCTGCCCGGTGTGGAAGAAGCTGTCCTAAGCACCTAACTCTAGTTTGTAACCTACTTTAAACTCTGATTCACATACAGTATTCAGGGCACAGACTCAAGTAAGTTTTGGCTTCCGTGTAACTTCAAGTTATCAGCTCTTCTTTATCTCTATACTCTCTTAGTATCGGAATTTCTTAGCTCATTTGTTCCTTTAAAGCCTATTCTAGTCTGTCTGCCTTTTAAACTTGTTG
## 4              AACCAATGTGTTTGCCAATAGGCTGAAGTTCTACTTTATGATGAGATGCAGCCGACAGCCGAGATGGAGCTACGGACATGGAAGCCTCCTCCTGCCTGATGCTTTCATTGGATCAGGCAGCAGGTTAAGGACAACGCCAAGTGGTACATCACTACTTGTAGCTTTCTGAGGTGCACACTTTCGGAGCATTGTGGAGCACGTTGCTGCAAAGCTCAATATCCCAACAACCAATGTG
## 5                                                                                                  ATGTTCCACTCGCTCTTTGCCATTGGTTCCCAGCTATCTCCGGAACAGGGCAGTTCAGGCATTGAGATAAACTGCACTGCTTCCAAACACTGACAGGGATCAAGTTTGTGGTCCTGGCAGACCCTAGGCAAGCTGGAATAGATTCTCTTCT
## 6 AGGCAGCCACACTAGACCACAATTGGCCTGTTTGGGAGAAGGCTCAGTGAAGGTCCAGCAGTTCTCCATATCTCCTCCAGAAAAGGGACCAAGGGTCACAGCTTACAAGATGGTCAAGCTGGCTGCACTGCAGGCTGTTTTCTTCTCAGTTTGACAAGGTGGGTTGGGAAGCCTCTCACTAGGCTGAACATTGCTGGTTGGTCCTGTCCATCTCACAGTCAAATGATGGATTGAGCAGGCCTTCCCGC
##   UniGeneId   OMIM HomoloGeneID B6D2F1 C57BL.6J DBA.2J   BXD1   BXD2
## 1 Mm.258785 615525        56745 10.456   10.449 10.230 10.393 10.459
## 2  Mm.17708 607028         3444 11.671   11.613 11.714 11.528 11.562
## 3 Mm.196269   <NA>         9385 11.781   11.854 11.844 11.786 11.808
## 4 Mm.271784 172480        31245  9.006    8.939  9.163  9.000  8.879
## 5  Mm.29814 610971       105453 10.484   10.535 10.578 10.623 10.462
## 6  Mm.22001 603564        99726 10.027    9.990 10.206 10.062 10.052
##     BXD6   BXD9  BXD12  BXD14  BXD16  BXD19  BXD21  BXD22  BXD25  BXD27
## 1 10.505 10.486 10.474 10.400 10.468 10.358 10.363 10.558 10.456 10.446
## 2 11.616 11.568 11.627 11.719 11.587 11.774 11.725 11.634 11.687 11.619
## 3 11.769 11.883 11.798 11.711 11.833 11.734 11.731 11.897 11.845 11.536
## 4  8.873  8.787  9.074  9.153  9.109  9.165  8.928  9.006  9.040  8.868
## 5 10.441 10.331 10.487 10.330 10.513 10.533 10.445 10.598 10.473 10.510
## 6 10.041 10.019 10.167 10.058 10.155 10.224 10.203 10.103 10.084 10.062
##    BXD31  BXD32  BXD33  BXD34  BXD39  BXD40  BXD42  BXD68  BXD43  BXD45
## 1 10.288 10.481 10.452 10.401 10.153 10.514 10.465 10.383 10.387 10.262
## 2 11.703 11.592 11.602 11.609 11.717 11.598 11.503 11.686 11.637 11.804
## 3 11.560 11.806 11.834 11.808 11.894 11.762 11.741 11.480 11.800 11.712
## 4  8.695  9.096  9.009  9.021  9.032  9.365  9.154  8.807  8.950  8.989
## 5 10.543 10.379 10.495 10.568 10.523 10.485 10.398 10.534 10.386 10.606
## 6 10.124 10.155 10.189 10.122 10.039 10.042  9.988 10.096 10.114 10.133
##    BXD48  BXD50  BXD51  BXD55  BXD56  BXD60  BXD62  BXD65  BXD66  BXD69
## 1 10.473 10.330 10.379 10.410 10.345 10.387 10.456 10.409 10.473 10.485
## 2 11.587 11.697 11.609 11.683 11.779 11.749 11.703 11.737 11.723 11.655
## 3 11.798 11.718 11.859 11.840 11.669 11.715 11.723 11.776 11.631 11.774
## 4  8.886  9.291  8.850  8.830  9.011  9.061  9.070  8.785  8.960  9.020
## 5 10.430 10.376 10.533 10.372 10.402 10.384 10.389 10.421 10.371 10.568
## 6 10.080 10.101  9.978 10.162  9.920 10.019 10.048 10.066  9.995 10.122
##    BXD70  BXD71  BXD73  BXD75  BXD83  BXD84  BXD86  BXD87  BXD89  BXD90
## 1 10.265 10.372 10.150 10.488 10.515 10.461 10.439 10.284 10.384 10.274
## 2 11.735 11.594 11.878 11.558 11.688 11.608 11.712 11.607 11.664 11.856
## 3 11.602 11.652 11.699 11.768 11.899 11.832 11.739 11.885 11.792 11.832
## 4  8.886  8.927  8.936  8.998  8.800  9.005  8.927  8.880  8.772  8.966
## 5 10.622 10.480 10.606 10.484 10.468 10.356 10.460 10.437 10.446 10.436
## 6 10.068 10.134 10.143 10.087 10.039 10.109 10.007 10.026 10.049 10.107
##    BXD96 BALB.cByJ  BXD97  BXD98  BXD99 BXD100 X129X1.SvJ FVB.NJ   LP.J
## 1 10.471    10.385 10.403  9.312 10.481 10.143     10.342 10.511 10.389
## 2 11.633    11.653 11.632 11.884 11.774 11.818     11.516 11.678 11.640
## 3 11.853    11.782 11.849 12.013 11.843 11.751     11.958 11.916 11.846
## 4  8.714     9.514  8.883  8.819  8.907  9.061      9.464  9.148  9.174
## 5 10.428    10.423 10.492 10.798 10.491 10.580     10.435 10.610 10.218
## 6 10.083    10.024 10.123 10.012 10.196 10.082      9.908 10.012 10.044
##    SJL.J WSB.EiJ D2B6F1         ensembl_id
## 1 10.056  10.535 10.538 ENSMUSG00000030058
## 2 11.805  11.264 11.604 ENSMUSG00000013160
## 3 11.961  11.997 11.871 ENSMUSG00000015341
## 4  9.074   8.951  9.032 ENSMUSG00000029446
## 5 10.578  10.536 10.418 ENSMUSG00000032112
## 6 10.016  10.018 10.102 ENSMUSG00000026810
# Extract gene location information
geneloc<-mouse.lung[, c(1,3,7,8)]
# head(geneloc)
# dim(geneloc)
# convert gene location from mb to bp
geneloc[,3:4]<-geneloc[,3:4] * 1000000
# head(geneloc)
# dim(geneloc)
# write lung gene location information for eqtl analysis
lung.gene.loc <- geneloc
write.table(lung.gene.loc,file="2015-12-07 lung.gene.loc.txt", sep="\t", row.names=FALSE, quote=FALSE)

# head(mouse.lung)
GE<-mouse.lung[, c(1, 18:60,62:65,16, 17)]
# head(GE)
# dim(GE)

# Prepare SNP location for lung eQTL anlaysis
BXD.geno<-read.table(file="BXD-3.geno.txt",  header=T)
# recode SNP, "B to 0, D to 1"
BXD.geno1 <- as.data.frame(sapply(BXD.geno,gsub,pattern="B",replacement="0"))
# BXD.geno2 <- as.data.frame(sapply(BXD.geno1,gsub,pattern="H",replacement="1"))
# BXD.geno3 <- as.data.frame(sapply(BXD.geno2,gsub,pattern="D",replacement="2"))
BXD.geno2 <- as.data.frame(sapply(BXD.geno1,gsub,pattern="H",replacement="NA"))
BXD.geno3 <- as.data.frame(sapply(BXD.geno2,gsub,pattern="D",replacement="1"))
BXD.geno4 <- as.data.frame(sapply(BXD.geno3,gsub,pattern="U",replacement="NA"))
# head(BXD.geno4)
BXD.geno.loc<- BXD.geno4[, c(2, 1, 4)]
# Transform SNP position from Mb to bp
BXD.geno.loc$Mb<-as.numeric(as.character(BXD.geno.loc$Mb))
BXD.geno.loc[, 3]<-BXD.geno.loc[, 3] * 1000000
library(plyr)
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:IRanges':
## 
##     desc
## The following object is masked from 'package:S4Vectors':
## 
##     rename
BXD.geno.loc<-rename(BXD.geno.loc, c("Mb"="position"))
BXD.geno.SNP<-BXD.geno4[, c(2, 5:97)]
strain<-unique(colnames(BXD.geno.SNP))
GE1<- GE[ , which(colnames(GE) %in% strain)]
GE2<-GE1[,order(colnames(GE1))]
GE2<-data.frame(cbind(GE$ProbeSet, GE2))

ProbeSet<-geneloc$ProbeSet
length(ProbeSet)
## [1] 34470
GE3<- GE2[GE2$GE.ProbeSet %in% ProbeSet, ]
head(GE3)
##    GE.ProbeSet   BXD1 BXD100  BXD12  BXD14  BXD16  BXD19   BXD2  BXD21
## 1   1415670_at 10.393 10.143 10.474 10.400 10.468 10.358 10.459 10.363
## 2   1415671_at 11.528 11.818 11.627 11.719 11.587 11.774 11.562 11.725
## 3   1415672_at 11.786 11.751 11.798 11.711 11.833 11.734 11.808 11.731
## 4   1415673_at  9.000  9.061  9.074  9.153  9.109  9.165  8.879  8.928
## 5 1415674_a_at 10.623 10.580 10.487 10.330 10.513 10.533 10.462 10.445
## 6   1415675_at 10.062 10.082 10.167 10.058 10.155 10.224 10.052 10.203
##    BXD22  BXD25  BXD27  BXD31  BXD32  BXD33  BXD34  BXD39  BXD40  BXD42
## 1 10.558 10.456 10.446 10.288 10.481 10.452 10.401 10.153 10.514 10.465
## 2 11.634 11.687 11.619 11.703 11.592 11.602 11.609 11.717 11.598 11.503
## 3 11.897 11.845 11.536 11.560 11.806 11.834 11.808 11.894 11.762 11.741
## 4  9.006  9.040  8.868  8.695  9.096  9.009  9.021  9.032  9.365  9.154
## 5 10.598 10.473 10.510 10.543 10.379 10.495 10.568 10.523 10.485 10.398
## 6 10.103 10.084 10.062 10.124 10.155 10.189 10.122 10.039 10.042  9.988
##    BXD43  BXD45  BXD48  BXD50  BXD51  BXD55  BXD56   BXD6  BXD60  BXD62
## 1 10.387 10.262 10.473 10.330 10.379 10.410 10.345 10.505 10.387 10.456
## 2 11.637 11.804 11.587 11.697 11.609 11.683 11.779 11.616 11.749 11.703
## 3 11.800 11.712 11.798 11.718 11.859 11.840 11.669 11.769 11.715 11.723
## 4  8.950  8.989  8.886  9.291  8.850  8.830  9.011  8.873  9.061  9.070
## 5 10.386 10.606 10.430 10.376 10.533 10.372 10.402 10.441 10.384 10.389
## 6 10.114 10.133 10.080 10.101  9.978 10.162  9.920 10.041 10.019 10.048
##    BXD65  BXD66  BXD68  BXD69  BXD70  BXD71  BXD73  BXD75  BXD83  BXD84
## 1 10.409 10.473 10.383 10.485 10.265 10.372 10.150 10.488 10.515 10.461
## 2 11.737 11.723 11.686 11.655 11.735 11.594 11.878 11.558 11.688 11.608
## 3 11.776 11.631 11.480 11.774 11.602 11.652 11.699 11.768 11.899 11.832
## 4  8.785  8.960  8.807  9.020  8.886  8.927  8.936  8.998  8.800  9.005
## 5 10.421 10.371 10.534 10.568 10.622 10.480 10.606 10.484 10.468 10.356
## 6 10.066  9.995 10.096 10.122 10.068 10.134 10.143 10.087 10.039 10.109
##    BXD86  BXD87  BXD89   BXD9  BXD90  BXD98  BXD99
## 1 10.439 10.284 10.384 10.486 10.274  9.312 10.481
## 2 11.712 11.607 11.664 11.568 11.856 11.884 11.774
## 3 11.739 11.885 11.792 11.883 11.832 12.013 11.843
## 4  8.927  8.880  8.772  8.787  8.966  8.819  8.907
## 5 10.460 10.437 10.446 10.331 10.436 10.798 10.491
## 6 10.007 10.026 10.049 10.019 10.107 10.012 10.196
dim(GE3)
## [1] 34470    46
# save lung gene expression data for eqtl analysis
write.table(GE3,file="2015-12-07 mouse.lung.expression.eqtl.txt", sep="\t", row.names=FALSE, quote=FALSE)
#Remove the invelant genotypes 
mouse.lung.expression.eqtl <- GE3
BXD.geno.strain.library<-colnames(mouse.lung.expression.eqtl)
BXD.geno5<-BXD.geno4[, which(colnames(BXD.geno4) %in% BXD.geno.strain.library)]
BXD.geno5<-BXD.geno5[,order(colnames(BXD.geno5))]
BXD.geno5$Locus<-BXD.geno4$Locus
BXD.geno.SNP.eqtl<-BXD.geno5[, c(46, 1:45)]

# save SNP genotype information for eqtl analysis
write.table(BXD.geno.SNP.eqtl,file="2015-12-07 BXD.geno.SNP.eqtl.for.lung.txt", sep="\t", row.names=FALSE, quote=FALSE)
# save SNP position information for eqtl analysis
write.table(BXD.geno.loc,file="2015-12-07 BXD.geno.loc.eqtl.for.lung.txt", sep="\t", row.names=FALSE, quote=FALSE)
head(BXD.geno.loc)
##       Locus Chr position
## 1 rs6269442   1  3482275
## 2 rs6365999   1  4811062
## 3 rs6376963   1  5008089
## 4 rs3677817   1  5176058
## 5 rs8236463   1  5579193
## 6 rs6333200   1  6217921
dim(BXD.geno.loc)
## [1] 3811    3
head(BXD.geno.SNP.eqtl)
##       Locus BXD1 BXD100 BXD12 BXD14 BXD16 BXD19 BXD2 BXD21 BXD22 BXD25
## 1 rs6269442    0      0     1     0     1     1    0     1     1     1
## 2 rs6365999    0      0     1     0     1     1    0     1     1     1
## 3 rs6376963    0      0     1     0     1     1    0     1     1     1
## 4 rs3677817    0      0     1     0     1     1    0     1     1     1
## 5 rs8236463    0      0     1     0     1     1    0     1     1     1
## 6 rs6333200    0      0     1     0     1     1    0     1     1     1
##   BXD27 BXD31 BXD32 BXD33 BXD34 BXD39 BXD40 BXD42 BXD43 BXD45 BXD48 BXD50
## 1     0     0     0     0     0     1     0     0     0     0     0    NA
## 2     0     0     0     0     0     1     0     0     0     0     0    NA
## 3     0     0     0     0     0     1     0     0     1     0     0    NA
## 4     0     0     0     0     0     1     0     0     1     0     0    NA
## 5     0     0     0     0     0     1     0     0     1     0     0    NA
## 6     0     0     0     0     0     1     0     0     1     0     0    NA
##   BXD51 BXD55 BXD56 BXD6 BXD60 BXD62 BXD65 BXD66 BXD68 BXD69 BXD70 BXD71
## 1     0    NA    NA    1     0     1     1     0    NA     0     0     0
## 2     0    NA    NA    1     0     1     1     0    NA     0     0     0
## 3     0    NA    NA    1     0     1     0     0    NA     0     0     0
## 4     0    NA    NA    1     0     1     0     0     1     0     0     0
## 5     0    NA    NA    1     0     1     0     0     1     0     0     0
## 6     0    NA    NA    1     0     1     0     0     1     0     0     0
##   BXD73 BXD75 BXD83 BXD84 BXD86 BXD87 BXD89 BXD9 BXD90 BXD98 BXD99
## 1     1     1    NA     0     1     0     0    0     0     0     0
## 2     1     1    NA     0     1     0     0    0     0     0     0
## 3     1     1    NA     0     1     0     0    0     0     0     0
## 4     1     1    NA     0     1     0     0    0     0     0     0
## 5     1     1    NA     0     1     0     0    0     0     0     0
## 6     1     1    NA     0     1     0     0    0     0     0     0
dim(BXD.geno.SNP.eqtl)
## [1] 3811   46
head(mouse.lung.expression.eqtl)
##    GE.ProbeSet   BXD1 BXD100  BXD12  BXD14  BXD16  BXD19   BXD2  BXD21
## 1   1415670_at 10.393 10.143 10.474 10.400 10.468 10.358 10.459 10.363
## 2   1415671_at 11.528 11.818 11.627 11.719 11.587 11.774 11.562 11.725
## 3   1415672_at 11.786 11.751 11.798 11.711 11.833 11.734 11.808 11.731
## 4   1415673_at  9.000  9.061  9.074  9.153  9.109  9.165  8.879  8.928
## 5 1415674_a_at 10.623 10.580 10.487 10.330 10.513 10.533 10.462 10.445
## 6   1415675_at 10.062 10.082 10.167 10.058 10.155 10.224 10.052 10.203
##    BXD22  BXD25  BXD27  BXD31  BXD32  BXD33  BXD34  BXD39  BXD40  BXD42
## 1 10.558 10.456 10.446 10.288 10.481 10.452 10.401 10.153 10.514 10.465
## 2 11.634 11.687 11.619 11.703 11.592 11.602 11.609 11.717 11.598 11.503
## 3 11.897 11.845 11.536 11.560 11.806 11.834 11.808 11.894 11.762 11.741
## 4  9.006  9.040  8.868  8.695  9.096  9.009  9.021  9.032  9.365  9.154
## 5 10.598 10.473 10.510 10.543 10.379 10.495 10.568 10.523 10.485 10.398
## 6 10.103 10.084 10.062 10.124 10.155 10.189 10.122 10.039 10.042  9.988
##    BXD43  BXD45  BXD48  BXD50  BXD51  BXD55  BXD56   BXD6  BXD60  BXD62
## 1 10.387 10.262 10.473 10.330 10.379 10.410 10.345 10.505 10.387 10.456
## 2 11.637 11.804 11.587 11.697 11.609 11.683 11.779 11.616 11.749 11.703
## 3 11.800 11.712 11.798 11.718 11.859 11.840 11.669 11.769 11.715 11.723
## 4  8.950  8.989  8.886  9.291  8.850  8.830  9.011  8.873  9.061  9.070
## 5 10.386 10.606 10.430 10.376 10.533 10.372 10.402 10.441 10.384 10.389
## 6 10.114 10.133 10.080 10.101  9.978 10.162  9.920 10.041 10.019 10.048
##    BXD65  BXD66  BXD68  BXD69  BXD70  BXD71  BXD73  BXD75  BXD83  BXD84
## 1 10.409 10.473 10.383 10.485 10.265 10.372 10.150 10.488 10.515 10.461
## 2 11.737 11.723 11.686 11.655 11.735 11.594 11.878 11.558 11.688 11.608
## 3 11.776 11.631 11.480 11.774 11.602 11.652 11.699 11.768 11.899 11.832
## 4  8.785  8.960  8.807  9.020  8.886  8.927  8.936  8.998  8.800  9.005
## 5 10.421 10.371 10.534 10.568 10.622 10.480 10.606 10.484 10.468 10.356
## 6 10.066  9.995 10.096 10.122 10.068 10.134 10.143 10.087 10.039 10.109
##    BXD86  BXD87  BXD89   BXD9  BXD90  BXD98  BXD99
## 1 10.439 10.284 10.384 10.486 10.274  9.312 10.481
## 2 11.712 11.607 11.664 11.568 11.856 11.884 11.774
## 3 11.739 11.885 11.792 11.883 11.832 12.013 11.843
## 4  8.927  8.880  8.772  8.787  8.966  8.819  8.907
## 5 10.460 10.437 10.446 10.331 10.436 10.798 10.491
## 6 10.007 10.026 10.049 10.019 10.107 10.012 10.196
dim(mouse.lung.expression.eqtl)
## [1] 34470    46
head(lung.gene.loc)
##       ProbeSet Chr Blat.Mb.Start Blat.Mb.End
## 1   1415670_at   6      87859681    87859682
## 2   1415671_at   8     108048521   108048893
## 3   1415672_at   8      24351869    24352391
## 4   1415673_at   5     130271465   130272427
## 5 1415674_a_at   9      44212489    44213429
## 6   1415675_at   2      32428524    32428891
dim(lung.gene.loc)
## [1] 34470     4