library("biomaRt")
We will use the genomic assembly GRCh38.p12 (GCF_000001405.38). Below is the gene name, ENTREZ ID, chromosome number and coordinates of human CHRN genes obtained from NCBI.
CHRNA1/1134: chr2 174747592..174764472 CHRNA2/1135: chr8 27459761..27479296 CHRNA3/1136: chr15 78593052..78621295 CHRNA4/1137: chr20 63343310..63375471 CHRNA5/1138: chr15 78565520..78595269 CHRNA6/8973: chr8 42752620..42768786 CHRNA7/1139: chr15 32030462..32172521 CHRNA9/55584: chr4 40335329..40355217 CHRNA10/57053: chr11 3665587..3673629 CHRNB1/1140: chr17 7445087..7457613 CHRNB2/1141: chr1 154567781..154584708 CHRNB3/1142: chr8 42697376..42737407 CHRNB4/1143: chr15 78623282..78655586 CHRNG/1144: chr2 232539727..232546328 CHRND/1145: chr17 232526160..232537907 CHRNE/1146: chr2 4897769..4905019
hssnpmart = useMart(biomart = "ENSEMBL_MART_SNP", dataset="hsapiens_snp") # see listMarts() function
hssnpmart
## Object of class 'Mart':
## Using the ENSEMBL_MART_SNP BioMart database
## Using the hsapiens_snp dataset
chrna1_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(2, 174747592, 174764472), # CHRNA1 seq coordinates
mart = hssnpmart)
head(chrna1_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1049898959 G/T 174747597 1
## 2 rs966040107 A/C 174747598 1
## 3 rs1452833174 A/G 174747602 1
## 4 rs1345498189 T/C 174747604 1
## 5 rs574135903 T/A 174747625 1
## 6 rs1446457557 C/T 174747644 1
nrow(chrna1_snps_ncbi)
## [1] 3879
chrna2_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(8, 27459761, 27479296), # CHRNA2 seq coordinates
mart = hssnpmart)
head(chrna2_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1481118481 G/A 27459767 1
## 2 rs548065444 A/G 27459775 1
## 3 rs1197776867 T/G 27459778 1
## 4 rs1470538508 T/- 27459778 1
## 5 rs531828656 T/C 27459780 1
## 6 rs377169301 T/A 27459784 1
nrow(chrna2_snps_ncbi)
## [1] 4814
chrna3_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(15, 78593052, 78621295), # CHRNA3 seq coordinates
mart = hssnpmart)
head(chrna3_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1231364841 C/T 78593056 1
## 2 rs764603712 A/G 78593058 1
## 3 rs936162811 A/C/G 78593066 1
## 4 rs1053394384 T/C/G 78593067 1
## 5 rs1229349593 G/T 78593068 1
## 6 rs1297830384 C/T 78593069 1
nrow(chrna3_snps_ncbi)
## [1] 7486
chrna4_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(20, 63343310, 63375471), # CHRNA4 seq coordinates
mart = hssnpmart)
head(chrna4_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1047761205 C/A 63343311 1
## 2 rs1465433121 -/GGCCGCACCTGGGCTCGGCG 63343312 1
## 3 rs45541941 G/A 63343312 1
## 4 rs199851285 C/G/T 63343315 1
## 5 rs918537239 G/A/C 63343316 1
## 6 rs929862981 A/C 63343318 1
nrow(chrna4_snps_ncbi)
## [1] 10229
chrna4_snps_ncbi[4000,]
## refsnp_id allele chrom_start chrom_strand
## 4000 rs1426688300 G/- 63354194 1
chrna5_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(15, 78565520, 78595269), # CHRNA5 seq coordinates
mart = hssnpmart)
head(chrna5_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1421007694 CACATGCGTCCCGAGCCCGCCAGAA/- 78565519 1
## 2 rs1031751646 A/C 78565520 1
## 3 rs199898580 A/C/G/T 78565522 1
## 4 rs201230605 T/A/C/G 78565523 1
## 5 rs1458245024 -/G 78565524 1
## 6 rs1404853491 C/- 78565525 1
nrow(chrna5_snps_ncbi)
## [1] 6825
chrna6_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(8, 42752620, 42768786), # CHRNA6 seq coordinates
mart = hssnpmart)
head(chrna6_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs200918981 T/G 42752620 1
## 2 rs879236648 T/- 42752620 1
## 3 rs1286043771 TT/- 42752620 1
## 4 rs1205361758 TTTTTT/- 42752620 1
## 5 rs1437679733 TTTTTTT/- 42752620 1
## 6 rs1272705442 TTTTTTTTTT/- 42752620 1
nrow(chrna6_snps_ncbi)
## [1] 3906
chrna7_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(15, 32030462, 32172521), # CHRNA7 seq coordinates
mart = hssnpmart)
head(chrna7_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1395355294 G/C 32030463 1
## 2 rs1053312620 C/G/T 32030465 1
## 3 rs1397564673 C/G 32030473 1
## 4 rs1293748195 C/T 32030479 1
## 5 rs1354724692 G/A/C/T 32030480 1
## 6 rs1436240101 A/C 32030488 1
nrow(chrna7_snps_ncbi)
## [1] 31342
chrna9_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(4, 40335329, 40355217), # CHRNA9 seq coordinates
mart = hssnpmart)
head(chrna9_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs542603027 A/T 40335329 1
## 2 rs971463142 G/A 40335331 1
## 3 rs1487486046 C/G 40335332 1
## 4 rs1262986819 T/C 40335335 1
## 5 rs562285535 C/T 40335347 1
## 6 rs1449287441 T/C 40335348 1
nrow(chrna9_snps_ncbi)
## [1] 4834
chrna10_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(11, 3665587, 3673629 ), # CHRNA10 coding seq coordinates
mart = hssnpmart)
head(chrna10_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1161340529 T/C 3665594 1
## 2 rs1406749346 C/T 3665596 1
## 3 rs1226800647 A/T 3665597 1
## 4 rs1270903945 C/T 3665598 1
## 5 rs1052619614 G/A 3665599 1
## 6 rs1250310691 CTTA/- 3665600 1
nrow(chrna10_snps_ncbi)
## [1] 2227
chrnb1_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(17, 7445087, 7457613), # CHRNB1 seq coordinates
mart = hssnpmart)
head(chrnb1_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1363140679 C/A 7445089 1
## 2 rs1471339400 G/A 7445090 1
## 3 rs773897169 C/T 7445092 1
## 4 rs759352782 C/G 7445096 1
## 5 rs767254632 T/G 7445097 1
## 6 rs1469018234 G/- 7445100 1
nrow(chrnb1_snps_ncbi)
## [1] 3371
chrnb2_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(1, 154567781, 154584708), # CHRNB2 seq coordinates
mart = hssnpmart)
head(chrnb2_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1277029869 C/T 154567784 1
## 2 rs1278820939 C/T 154567786 1
## 3 rs1431319843 C/G 154567788 1
## 4 rs555605813 C/A/G 154567790 1
## 5 rs1331904183 C/T 154567792 1
## 6 rs934191502 A/C 154567793 1
nrow(chrnb2_snps_ncbi)
## [1] 4231
chrnb3_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(8, 42697376, 42737407), # CHRNB3 seq coordinates
mart = hssnpmart)
head(chrnb3_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs949137347 A/G 42697378 1
## 2 rs1042148264 C/A 42697381 1
## 3 rs192526511 G/A 42697382 1
## 4 rs936317953 C/T 42697384 1
## 5 rs546877649 G/A 42697385 1
## 6 rs41272375 G/C 42697387 1
nrow(chrnb3_snps_ncbi)
## [1] 9167
chrnb4_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(15, 78623282, 78655586), # CHRNB4 seq coordinates
mart = hssnpmart)
head(chrnb4_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1238040072 A/C 78623282 1
## 2 rs1163579592 G/C 78623289 1
## 3 rs562951822 C/A 78623305 1
## 4 rs530387488 C/T 78623315 1
## 5 rs556173595 G/A 78623316 1
## 6 rs541966206 G/- 78623316 1
nrow(chrnb4_snps_ncbi)
## [1] 7799
chrng_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(2, 232539727, 232546328), # CHRNG seq coordinates
mart = hssnpmart)
head(chrng_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1321657427 C/A 232539731 1
## 2 rs369418719 G/C 232539735 1
## 3 rs1345635809 G/A 232539737 1
## 4 rs1194959452 C/T 232539738 1
## 5 rs186758675 T/A 232539739 1
## 6 rs1231421076 C/T 232539746 1
nrow(chrng_snps_ncbi)
## [1] 2150
chrnd_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(2, 232526160, 232537907), # CHRNdD seq coordinates
mart = hssnpmart)
head(chrnd_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1282550557 A/C 232526165 1
## 2 rs1240062383 C/T 232526166 1
## 3 rs367844338 G/A 232526168 1
## 4 rs778460094 C/T 232526169 1
## 5 rs747494704 G/A 232526173 1
## 6 rs771477279 C/T 232526178 1
nrow(chrnd_snps_ncbi)
## [1] 3151
chrne_snps_ncbi= getBM(attributes = c('refsnp_id','allele','chrom_start','chrom_strand'),
filters = c('chr_name','start','end'),
values = list(17, 4897769, 4905019), # CHRNE seq coordinates
mart = hssnpmart)
head(chrne_snps_ncbi)
## refsnp_id allele chrom_start chrom_strand
## 1 rs1237659462 TTTCTTCAT/- 4897763 1
## 2 rs1399638221 C/G/T 4897784 1
## 3 rs1413534922 T/C 4897787 1
## 4 rs1407275489 -/T 4897793 1
## 5 rs1299231687 C/A 4897795 1
## 6 rs879032301 C/A 4897796 1
nrow(chrne_snps_ncbi)
## [1] 2961
CHRNsnps <- as.data.frame(cbind(chrna1_snps_ncbi$refsnp_id, chrna2_snps_ncbi$refsnp_id,
chrna3_snps_ncbi$refsnp_id, chrna4_snps_ncbi$refsnp_id,
chrna5_snps_ncbi$refsnp_id, chrna6_snps_ncbi$refsnp_id,
chrna7_snps_ncbi$refsnp_id, chrna9_snps_ncbi$refsnp_id,
chrna10_snps_ncbi$refsnp_id, chrnb1_snps_ncbi$refsnp_id,
chrnb2_snps_ncbi$refsnp_id, chrnb3_snps_ncbi$refsnp_id,
chrnb4_snps_ncbi$refsnp_id, chrng_snps_ncbi$refsnp_id,
chrnd_snps_ncbi$refsnp_id, chrne_snps_ncbi$refsnp_id))
## Warning in cbind(chrna1_snps_ncbi$refsnp_id, chrna2_snps_ncbi$refsnp_id, :
## number of rows of result is not a multiple of vector length (arg 1)
head(CHRNsnps)
## V1 V2 V3 V4 V5
## 1 rs1049898959 rs1481118481 rs1231364841 rs1047761205 rs1421007694
## 2 rs966040107 rs548065444 rs764603712 rs1465433121 rs1031751646
## 3 rs1452833174 rs1197776867 rs936162811 rs45541941 rs199898580
## 4 rs1345498189 rs1470538508 rs1053394384 rs199851285 rs201230605
## 5 rs574135903 rs531828656 rs1229349593 rs918537239 rs1458245024
## 6 rs1446457557 rs377169301 rs1297830384 rs929862981 rs1404853491
## V6 V7 V8 V9 V10
## 1 rs200918981 rs1395355294 rs542603027 rs1161340529 rs1363140679
## 2 rs879236648 rs1053312620 rs971463142 rs1406749346 rs1471339400
## 3 rs1286043771 rs1397564673 rs1487486046 rs1226800647 rs773897169
## 4 rs1205361758 rs1293748195 rs1262986819 rs1270903945 rs759352782
## 5 rs1437679733 rs1354724692 rs562285535 rs1052619614 rs767254632
## 6 rs1272705442 rs1436240101 rs1449287441 rs1250310691 rs1469018234
## V11 V12 V13 V14 V15
## 1 rs1277029869 rs949137347 rs1238040072 rs1321657427 rs1282550557
## 2 rs1278820939 rs1042148264 rs1163579592 rs369418719 rs1240062383
## 3 rs1431319843 rs192526511 rs562951822 rs1345635809 rs367844338
## 4 rs555605813 rs936317953 rs530387488 rs1194959452 rs778460094
## 5 rs1331904183 rs546877649 rs556173595 rs186758675 rs747494704
## 6 rs934191502 rs41272375 rs541966206 rs1231421076 rs771477279
## V16
## 1 rs1237659462
## 2 rs1399638221
## 3 rs1413534922
## 4 rs1407275489
## 5 rs1299231687
## 6 rs879032301
dim(CHRNsnps)
## [1] 31342 16
summary(CHRNsnps)
## V1 V2 V3
## CM030013 : 9 CM062512 : 7 CR096320 : 5
## CM062511 : 9 CM144682 : 7 rs1000254362: 5
## CM103994 : 9 rs1000347952: 7 rs1000725595: 5
## rs1006100295: 9 rs1000413072: 7 rs1000841933: 5
## rs1006483556: 9 rs1000477519: 7 rs1000874450: 5
## rs1006548565: 9 rs1000645912: 7 rs1002373947: 5
## (Other) :31288 (Other) :31300 (Other) :31312
## V4 V5 V6
## rs1003575215: 4 CD144654 : 5 rs1003560319: 9
## rs1003901384: 4 rs1000249068: 5 rs1004986789: 9
## rs1005066999: 4 rs1000509773: 5 rs1005017818: 9
## rs1005199620: 4 rs1000860563: 5 rs1014947158: 9
## rs1007832495: 4 rs1001005920: 5 rs1024089097: 9
## rs1009196963: 4 rs1001197368: 5 rs1025872777: 9
## (Other) :31318 (Other) :31312 (Other) :31288
## V7 V8 V9
## CM066004 : 1 rs1000054751: 7 rs1002504968: 15
## CR024781 : 1 rs1000770079: 7 rs1008837430: 15
## rs1000024280: 1 rs10008177 : 7 rs1012615008: 15
## rs1000028368: 1 rs10008196 : 7 rs1014845087: 15
## rs1000036085: 1 rs1001299628: 7 rs1014858098: 15
## rs1000110150: 1 rs10015231 : 7 rs1024414778: 15
## (Other) :31336 (Other) :31300 (Other) :31252
## V10 V11 V12
## rs1000896955: 10 CM002752: 8 rs1000002402: 4
## rs1001271364: 10 CM010187: 8 rs1000426782: 4
## rs1001872905: 10 CM055129: 8 rs1000833576: 4
## rs1005338765: 10 CM082552: 8 rs1001159160: 4
## rs1005369927: 10 CM082553: 8 rs1001202370: 4
## rs1007366348: 10 CM113088: 8 rs1001439811: 4
## (Other) :31282 (Other) :31294 (Other) :31318
## V13 V14 V15 V16
## rs1002356327: 5 CD063477 : 15 CD012239: 10 CD002497: 11
## rs1007107072: 5 CD063478 : 15 CD139268: 10 CD002498: 11
## rs1016906591: 5 CD1110243: 15 CM012084: 10 CD022895: 11
## rs1019350570: 5 CD1110244: 15 CM020014: 10 CD030129: 11
## rs1039147527: 5 CI063659 : 15 CM030015: 10 CD030130: 11
## rs1043423152: 5 CI063660 : 15 CM066005: 10 CD030131: 11
## (Other) :31312 (Other) :31252 (Other) :31282 (Other) :31276