library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts --------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ape)
## Warning: package 'ape' was built under R version 4.0.5
Download the data from web
d <- "ftp://ftp.ensembl.org/pub/release-98/gff3/homo_sapiens/"
f <- "Homo_sapiens.GRCh38.98.gff3.gz"
download.file(paste0(d, f), "hs3898_gff3.gz")
rhs3898gff3 <- read.gff("hs3898_gff3.gz")
Glimpse the data
# overall data structure
glimpse(rhs3898gff3)
## Rows: 2,911,086
## Columns: 9
## $ seqid <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ source <fct> Ensembl, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ type <fct> chromosome, biological_region, biological_region, biolog...
## $ start <int> 1, 10469, 10650, 10655, 10678, 10681, 10707, 10708, 1073...
## $ end <int> 248956422, 11240, 10657, 10657, 10687, 10688, 10716, 107...
## $ score <dbl> NA, 1300.000, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, ...
## $ strand <fct> NA, NA, +, -, +, -, +, -, -, +, +, -, +, -, +, -, +, +, ...
## $ phase <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ attributes <chr> "ID=chromosome:1;Alias=CM000663.2,chr1,NC_000001.11", "e...
# column/variable names
colnames(rhs3898gff3)
## [1] "seqid" "source" "type" "start" "end"
## [6] "score" "strand" "phase" "attributes"
# first 6 rows of data
head(rhs3898gff3)
## seqid source type start end score strand phase
## 1 1 Ensembl chromosome 1 248956422 NA <NA> <NA>
## 2 1 <NA> biological_region 10469 11240 1300.000 <NA> <NA>
## 3 1 <NA> biological_region 10650 10657 0.999 + <NA>
## 4 1 <NA> biological_region 10655 10657 0.999 - <NA>
## 5 1 <NA> biological_region 10678 10687 0.999 + <NA>
## 6 1 <NA> biological_region 10681 10688 0.999 - <NA>
## attributes
## 1 ID=chromosome:1;Alias=CM000663.2,chr1,NC_000001.11
## 2 external_name=oe %3D 0.79;logic_name=cpg
## 3 logic_name=eponine
## 4 logic_name=eponine
## 5 logic_name=eponine
## 6 logic_name=eponine
# last 6 rows of data
tail(rhs3898gff3)
## seqid source type start end score strand
## 2911081 Y havana exon 26626520 26627159 NA -
## 2911082 Y <NA> biological_region 26626966 26627137 0.994 -
## 2911083 Y <NA> biological_region 26627457 26628186 0.997 +
## 2911084 Y havana pseudogene 56855244 56855488 NA +
## 2911085 Y havana pseudogenic_transcript 56855244 56855488 NA +
## 2911086 Y havana exon 56855244 56855488 NA +
## phase
## 2911081 <NA>
## 2911082 <NA>
## 2911083 <NA>
## 2911084 <NA>
## 2911085 <NA>
## 2911086 <NA>
## attributes
## 2911081 Parent=transcript:ENST00000435741;Name=ENSE00001616687;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001616687;rank=1;version=1
## 2911082 external_name=rank %3D 1;logic_name=firstef
## 2911083 external_name=rank %3D 1;logic_name=firstef
## 2911084 ID=gene:ENSG00000235857;Name=CTBP2P1;biotype=processed_pseudogene;description=C-terminal binding protein 2 pseudogene 1 [Source:HGNC Symbol%3BAcc:HGNC:23940];gene_id=ENSG00000235857;logic_name=havana_homo_sapiens;version=1
## 2911085 ID=transcript:ENST00000431853;Parent=gene:ENSG00000235857;Name=CTBP2P1-201;biotype=processed_pseudogene;tag=basic;transcript_id=ENST00000431853;transcript_support_level=NA;version=1
## 2911086 Parent=transcript:ENST00000431853;Name=ENSE00001794473;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001794473;rank=1;version=1
# unique seqID: these are the chromosomes and contigs
unique(rhs3898gff3$seqid)
## [1] 1 10 11 12 13 14
## [7] 15 16 17 18 19 2
## [13] 20 21 22 3 4 5
## [19] 6 7 8 9 GL000008.2 GL000009.2
## [25] GL000194.1 GL000195.1 GL000205.2 GL000208.1 GL000213.1 GL000214.1
## [31] GL000216.2 GL000218.1 GL000219.1 GL000220.1 GL000221.1 GL000224.1
## [37] GL000225.1 GL000226.1 KI270302.1 KI270303.1 KI270304.1 KI270305.1
## [43] KI270310.1 KI270311.1 KI270312.1 KI270315.1 KI270316.1 KI270317.1
## [49] KI270320.1 KI270322.1 KI270329.1 KI270330.1 KI270333.1 KI270334.1
## [55] KI270335.1 KI270336.1 KI270337.1 KI270338.1 KI270340.1 KI270362.1
## [61] KI270363.1 KI270364.1 KI270366.1 KI270371.1 KI270372.1 KI270373.1
## [67] KI270374.1 KI270375.1 KI270376.1 KI270378.1 KI270379.1 KI270381.1
## [73] KI270382.1 KI270383.1 KI270384.1 KI270385.1 KI270386.1 KI270387.1
## [79] KI270388.1 KI270389.1 KI270390.1 KI270391.1 KI270392.1 KI270393.1
## [85] KI270394.1 KI270395.1 KI270396.1 KI270411.1 KI270412.1 KI270414.1
## [91] KI270417.1 KI270418.1 KI270419.1 KI270420.1 KI270422.1 KI270423.1
## [97] KI270424.1 KI270425.1 KI270429.1 KI270435.1 KI270438.1 KI270442.1
## [103] KI270448.1 KI270465.1 KI270466.1 KI270467.1 KI270468.1 KI270507.1
## [109] KI270508.1 KI270509.1 KI270510.1 KI270511.1 KI270512.1 KI270515.1
## [115] KI270516.1 KI270517.1 KI270518.1 KI270519.1 KI270521.1 KI270522.1
## [121] KI270528.1 KI270529.1 KI270530.1 KI270538.1 KI270539.1 KI270544.1
## [127] KI270548.1 KI270579.1 KI270580.1 KI270581.1 KI270582.1 KI270583.1
## [133] KI270584.1 KI270587.1 KI270588.1 KI270589.1 KI270590.1 KI270591.1
## [139] KI270593.1 KI270706.1 KI270707.1 KI270708.1 KI270709.1 KI270710.1
## [145] KI270711.1 KI270712.1 KI270713.1 KI270714.1 KI270715.1 KI270716.1
## [151] KI270717.1 KI270718.1 KI270719.1 KI270720.1 KI270721.1 KI270722.1
## [157] KI270723.1 KI270724.1 KI270725.1 KI270726.1 KI270727.1 KI270728.1
## [163] KI270729.1 KI270730.1 KI270731.1 KI270732.1 KI270733.1 KI270734.1
## [169] KI270735.1 KI270736.1 KI270737.1 KI270738.1 KI270739.1 KI270740.1
## [175] KI270741.1 KI270742.1 KI270743.1 KI270744.1 KI270745.1 KI270746.1
## [181] KI270747.1 KI270748.1 KI270749.1 KI270750.1 KI270751.1 KI270752.1
## [187] KI270753.1 KI270754.1 KI270755.1 KI270756.1 KI270757.1 MT
## [193] X Y
## 194 Levels: 1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 3 4 5 6 7 8 ... Y
Frequency of factor variables
for (i in c('seqid', 'source', 'type', 'strand', 'phase')){
print(paste("Frequency for", i, ":"))
cat("\n")
print(table(rhs3898gff3[[i]]))
cat("\n")
}
## [1] "Frequency for seqid :"
##
##
## 1 10 11 12 13 14 15
## 266965 109692 169346 164355 48805 99668 107924
## 16 17 18 19 2 20 21
## 135858 174091 53030 172147 216479 69131 34510
## 22 3 4 5 6 7 8
## 64369 185364 121926 129573 133533 139890 106448
## 9 GL000008.2 GL000009.2 GL000194.1 GL000195.1 GL000205.2 GL000208.1
## 104055 4 9 34 26 20 1
## GL000213.1 GL000214.1 GL000216.2 GL000218.1 GL000219.1 GL000220.1 GL000221.1
## 71 4 23 14 24 408 3
## GL000224.1 GL000225.1 GL000226.1 KI270302.1 KI270303.1 KI270304.1 KI270305.1
## 7 58 1 1 1 1 1
## KI270310.1 KI270311.1 KI270312.1 KI270315.1 KI270316.1 KI270317.1 KI270320.1
## 1 1 1 1 1 1 1
## KI270322.1 KI270329.1 KI270330.1 KI270333.1 KI270334.1 KI270335.1 KI270336.1
## 1 1 1 1 1 1 1
## KI270337.1 KI270338.1 KI270340.1 KI270362.1 KI270363.1 KI270364.1 KI270366.1
## 1 1 1 1 1 1 1
## KI270371.1 KI270372.1 KI270373.1 KI270374.1 KI270375.1 KI270376.1 KI270378.1
## 1 1 1 1 1 1 1
## KI270379.1 KI270381.1 KI270382.1 KI270383.1 KI270384.1 KI270385.1 KI270386.1
## 1 1 1 1 1 1 1
## KI270387.1 KI270388.1 KI270389.1 KI270390.1 KI270391.1 KI270392.1 KI270393.1
## 1 1 1 1 1 1 1
## KI270394.1 KI270395.1 KI270396.1 KI270411.1 KI270412.1 KI270414.1 KI270417.1
## 1 1 1 1 1 1 1
## KI270418.1 KI270419.1 KI270420.1 KI270422.1 KI270423.1 KI270424.1 KI270425.1
## 1 1 1 1 1 1 1
## KI270429.1 KI270435.1 KI270438.1 KI270442.1 KI270448.1 KI270465.1 KI270466.1
## 1 1 1 25 1 1 1
## KI270467.1 KI270468.1 KI270507.1 KI270508.1 KI270509.1 KI270510.1 KI270511.1
## 1 1 1 1 1 1 1
## KI270512.1 KI270515.1 KI270516.1 KI270517.1 KI270518.1 KI270519.1 KI270521.1
## 1 1 1 1 1 1 1
## KI270522.1 KI270528.1 KI270529.1 KI270530.1 KI270538.1 KI270539.1 KI270544.1
## 1 1 1 1 1 2 1
## KI270548.1 KI270579.1 KI270580.1 KI270581.1 KI270582.1 KI270583.1 KI270584.1
## 1 1 1 1 1 1 1
## KI270587.1 KI270588.1 KI270589.1 KI270590.1 KI270591.1 KI270593.1 KI270706.1
## 1 1 1 1 1 1 4
## KI270707.1 KI270708.1 KI270709.1 KI270710.1 KI270711.1 KI270712.1 KI270713.1
## 1 3 46 1 148 353 26
## KI270714.1 KI270715.1 KI270716.1 KI270717.1 KI270718.1 KI270719.1 KI270720.1
## 35 1 1 90 1 9 1
## KI270721.1 KI270722.1 KI270723.1 KI270724.1 KI270725.1 KI270726.1 KI270727.1
## 56 4 10 11 103 18 112
## KI270728.1 KI270729.1 KI270730.1 KI270731.1 KI270732.1 KI270733.1 KI270734.1
## 368 63 1 28 48 422 118
## KI270735.1 KI270736.1 KI270737.1 KI270738.1 KI270739.1 KI270740.1 KI270741.1
## 21 1 1 19 1 1 10
## KI270742.1 KI270743.1 KI270744.1 KI270745.1 KI270746.1 KI270747.1 KI270748.1
## 4 8 18 3 1 45 2
## KI270749.1 KI270750.1 KI270751.1 KI270752.1 KI270753.1 KI270754.1 KI270755.1
## 144 7 3 4 19 22 1
## KI270756.1 KI270757.1 MT X Y
## 1 1 126 92643 7906
##
## [1] "Frequency for source :"
##
##
## ensembl Ensembl ensembl_havana
## 223622 194 637206
## ensembl_havana_tagene havana havana_tagene
## 87 1755580 106213
## insdc mirbase
## 37 5637
##
## [1] "Frequency for type :"
##
##
## biological_region C_gene_segment
## 182510 29
## CDS chromosome
## 762023 25
## D_gene_segment exon
## 41 1371695
## five_prime_UTR gene
## 152699 21487
## J_gene_segment lnc_RNA
## 97 103513
## miRNA mRNA
## 1879 99916
## ncRNA ncRNA_gene
## 2235 23934
## pseudogene pseudogenic_transcript
## 15202 15251
## rRNA scaffold
## 60 169
## scRNA snoRNA
## 50 954
## snRNA three_prime_UTR
## 1915 153974
## tRNA unconfirmed_transcript
## 22 1155
## V_gene_segment vaultRNA_primary_transcript
## 250 1
##
## [1] "Frequency for strand :"
##
##
## - +
## 1415171 1473087
##
## [1] "Frequency for phase :"
##
##
## 0 1 2
## 395573 148733 217717
Detect the presence of the word/character “nicotinic” in the attribute column
# how many occurrences of the word "nicotinic"
str_detect(rhs3898gff3$attribute, "nicotinic") %>% sum()
## [1] 19
# subset the rows with the word "nicotinic" from the attribute column
nicotinic_str <- str_subset(rhs3898gff3$attribute, "nicotinic")
nicotinic_str
## [1] "ID=gene:ENSG00000160716;Name=CHRNB2;biotype=protein_coding;description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962];gene_id=ENSG00000160716;logic_name=ensembl_havana_gene_homo_sapiens;version=6"
## [2] "ID=gene:ENSG00000129749;Name=CHRNA10;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800];gene_id=ENSG00000129749;logic_name=ensembl_havana_gene_homo_sapiens;version=3"
## [3] "ID=gene:ENSG00000261561;Name=AC091304.5;biotype=unprocessed_pseudogene;description=cholinergic receptor%2C nicotinic%2C beta 4 (CHRNB4) pseudogene;gene_id=ENSG00000261561;logic_name=havana_homo_sapiens;version=1"
## [4] "ID=gene:ENSG00000260444;Name=AC138749.2;biotype=unprocessed_pseudogene;description=cholinergic receptor%2C nicotinic%2C beta 4 (CHRNB4) pseudogene;gene_id=ENSG00000260444;logic_name=havana_homo_sapiens;version=1"
## [5] "ID=gene:ENSG00000175344;Name=CHRNA7;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960];gene_id=ENSG00000175344;logic_name=ensembl_havana_gene_homo_sapiens;version=18"
## [6] "ID=gene:ENSG00000169684;Name=CHRNA5;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959];gene_id=ENSG00000169684;logic_name=ensembl_havana_gene_homo_sapiens;version=13"
## [7] "ID=gene:ENSG00000080644;Name=CHRNA3;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957];gene_id=ENSG00000080644;logic_name=ensembl_havana_gene_homo_sapiens;version=16"
## [8] "ID=gene:ENSG00000117971;Name=CHRNB4;biotype=protein_coding;description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964];gene_id=ENSG00000117971;logic_name=ensembl_havana_gene_homo_sapiens;version=12"
## [9] "ID=gene:ENSG00000108556;Name=CHRNE;biotype=protein_coding;description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966];gene_id=ENSG00000108556;logic_name=ensembl_havana_gene_homo_sapiens;version=10"
## [10] "ID=gene:ENSG00000170175;Name=CHRNB1;biotype=protein_coding;description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961];gene_id=ENSG00000170175;logic_name=ensembl_havana_gene_homo_sapiens;version=11"
## [11] "ID=gene:ENSG00000264845;Name=AC119868.1;biotype=transcribed_processed_pseudogene;description=cholinergic receptor%2C nicotinic%2C epsilon (CHRNE) pseudogene;gene_id=ENSG00000264845;logic_name=havana_homo_sapiens;version=2"
## [12] "ID=gene:ENSG00000138435;Name=CHRNA1;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955];gene_id=ENSG00000138435;logic_name=ensembl_havana_gene_homo_sapiens;version=16"
## [13] "ID=gene:ENSG00000135902;Name=CHRND;biotype=protein_coding;description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965];gene_id=ENSG00000135902;logic_name=ensembl_havana_gene_homo_sapiens;version=10"
## [14] "ID=gene:ENSG00000196811;Name=CHRNG;biotype=protein_coding;description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967];gene_id=ENSG00000196811;logic_name=ensembl_havana_gene_homo_sapiens;version=13"
## [15] "ID=gene:ENSG00000101204;Name=CHRNA4;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958];gene_id=ENSG00000101204;logic_name=ensembl_havana_gene_homo_sapiens;version=17"
## [16] "ID=gene:ENSG00000174343;Name=CHRNA9;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079];gene_id=ENSG00000174343;logic_name=ensembl_havana_gene_homo_sapiens;version=6"
## [17] "ID=gene:ENSG00000120903;Name=CHRNA2;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956];gene_id=ENSG00000120903;logic_name=ensembl_havana_gene_homo_sapiens;version=13"
## [18] "ID=gene:ENSG00000147432;Name=CHRNB3;biotype=protein_coding;description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963];gene_id=ENSG00000147432;logic_name=ensembl_havana_gene_homo_sapiens;version=7"
## [19] "ID=gene:ENSG00000147434;Name=CHRNA6;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963];gene_id=ENSG00000147434;logic_name=ensembl_havana_gene_homo_sapiens;version=8"
# view "nicotinic" in the attribute column
str_view_all(nicotinic_str, "nicotinic")
Subset the rows from the data frame where attribute column contains the word “nicotinc”
# output also contains pseudogene
nicotinic <- rhs3898gff3 %>% filter(str_detect(rhs3898gff3$attribute, "nicotinic"))
nicotinic
## seqid source type start end score strand phase
## 171564 1 ensembl_havana gene 154567778 154580013 NA + <NA>
## 390056 11 ensembl_havana gene 3665587 3671384 NA - <NA>
## 868251 15 havana pseudogene 28346490 28346647 NA + <NA>
## 868591 15 havana pseudogene 28564986 28565152 NA - <NA>
## 871930 15 ensembl_havana gene 31923438 32173018 NA + <NA>
## 941990 15 ensembl_havana gene 78565520 78595269 NA + <NA>
## 942046 15 ensembl_havana gene 78593052 78621295 NA - <NA>
## 942128 15 ensembl_havana gene 78624111 78727754 NA - <NA>
## 1115037 17 ensembl_havana gene 4897771 4934438 NA - <NA>
## 1124543 17 ensembl_havana gene 7445061 7457710 NA + <NA>
## 1323732 18 havana pseudogene 69469517 69471736 NA + <NA>
## 1646403 2 ensembl_havana gene 174747592 174787935 NA - <NA>
## 1701797 2 ensembl_havana gene 232525993 232536667 NA + <NA>
## 1701932 2 ensembl_havana gene 232539692 232548115 NA + <NA>
## 1783481 20 ensembl_havana gene 63343223 63378401 NA - <NA>
## 2100056 4 ensembl_havana gene 40335333 40355217 NA + <NA>
## 2619636 8 ensembl_havana gene 27459756 27479883 NA - <NA>
## 2634316 8 ensembl_havana gene 42697366 42737407 NA + <NA>
## 2634355 8 ensembl_havana gene 42752620 42796392 NA - <NA>
## attributes
## 171564 ID=gene:ENSG00000160716;Name=CHRNB2;biotype=protein_coding;description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962];gene_id=ENSG00000160716;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 390056 ID=gene:ENSG00000129749;Name=CHRNA10;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800];gene_id=ENSG00000129749;logic_name=ensembl_havana_gene_homo_sapiens;version=3
## 868251 ID=gene:ENSG00000261561;Name=AC091304.5;biotype=unprocessed_pseudogene;description=cholinergic receptor%2C nicotinic%2C beta 4 (CHRNB4) pseudogene;gene_id=ENSG00000261561;logic_name=havana_homo_sapiens;version=1
## 868591 ID=gene:ENSG00000260444;Name=AC138749.2;biotype=unprocessed_pseudogene;description=cholinergic receptor%2C nicotinic%2C beta 4 (CHRNB4) pseudogene;gene_id=ENSG00000260444;logic_name=havana_homo_sapiens;version=1
## 871930 ID=gene:ENSG00000175344;Name=CHRNA7;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960];gene_id=ENSG00000175344;logic_name=ensembl_havana_gene_homo_sapiens;version=18
## 941990 ID=gene:ENSG00000169684;Name=CHRNA5;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959];gene_id=ENSG00000169684;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 942046 ID=gene:ENSG00000080644;Name=CHRNA3;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957];gene_id=ENSG00000080644;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 942128 ID=gene:ENSG00000117971;Name=CHRNB4;biotype=protein_coding;description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964];gene_id=ENSG00000117971;logic_name=ensembl_havana_gene_homo_sapiens;version=12
## 1115037 ID=gene:ENSG00000108556;Name=CHRNE;biotype=protein_coding;description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966];gene_id=ENSG00000108556;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1124543 ID=gene:ENSG00000170175;Name=CHRNB1;biotype=protein_coding;description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961];gene_id=ENSG00000170175;logic_name=ensembl_havana_gene_homo_sapiens;version=11
## 1323732 ID=gene:ENSG00000264845;Name=AC119868.1;biotype=transcribed_processed_pseudogene;description=cholinergic receptor%2C nicotinic%2C epsilon (CHRNE) pseudogene;gene_id=ENSG00000264845;logic_name=havana_homo_sapiens;version=2
## 1646403 ID=gene:ENSG00000138435;Name=CHRNA1;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955];gene_id=ENSG00000138435;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 1701797 ID=gene:ENSG00000135902;Name=CHRND;biotype=protein_coding;description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965];gene_id=ENSG00000135902;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1701932 ID=gene:ENSG00000196811;Name=CHRNG;biotype=protein_coding;description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967];gene_id=ENSG00000196811;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 1783481 ID=gene:ENSG00000101204;Name=CHRNA4;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958];gene_id=ENSG00000101204;logic_name=ensembl_havana_gene_homo_sapiens;version=17
## 2100056 ID=gene:ENSG00000174343;Name=CHRNA9;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079];gene_id=ENSG00000174343;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 2619636 ID=gene:ENSG00000120903;Name=CHRNA2;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956];gene_id=ENSG00000120903;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 2634316 ID=gene:ENSG00000147432;Name=CHRNB3;biotype=protein_coding;description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963];gene_id=ENSG00000147432;logic_name=ensembl_havana_gene_homo_sapiens;version=7
## 2634355 ID=gene:ENSG00000147434;Name=CHRNA6;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963];gene_id=ENSG00000147434;logic_name=ensembl_havana_gene_homo_sapiens;version=8
# alternative syntax using grepl and square brackets also yield the same result
rhs3898gff3[grepl(pattern = "nicotinic", rhs3898gff3$attribute), ]
## seqid source type start end score strand phase
## 171564 1 ensembl_havana gene 154567778 154580013 NA + <NA>
## 390056 11 ensembl_havana gene 3665587 3671384 NA - <NA>
## 868251 15 havana pseudogene 28346490 28346647 NA + <NA>
## 868591 15 havana pseudogene 28564986 28565152 NA - <NA>
## 871930 15 ensembl_havana gene 31923438 32173018 NA + <NA>
## 941990 15 ensembl_havana gene 78565520 78595269 NA + <NA>
## 942046 15 ensembl_havana gene 78593052 78621295 NA - <NA>
## 942128 15 ensembl_havana gene 78624111 78727754 NA - <NA>
## 1115037 17 ensembl_havana gene 4897771 4934438 NA - <NA>
## 1124543 17 ensembl_havana gene 7445061 7457710 NA + <NA>
## 1323732 18 havana pseudogene 69469517 69471736 NA + <NA>
## 1646403 2 ensembl_havana gene 174747592 174787935 NA - <NA>
## 1701797 2 ensembl_havana gene 232525993 232536667 NA + <NA>
## 1701932 2 ensembl_havana gene 232539692 232548115 NA + <NA>
## 1783481 20 ensembl_havana gene 63343223 63378401 NA - <NA>
## 2100056 4 ensembl_havana gene 40335333 40355217 NA + <NA>
## 2619636 8 ensembl_havana gene 27459756 27479883 NA - <NA>
## 2634316 8 ensembl_havana gene 42697366 42737407 NA + <NA>
## 2634355 8 ensembl_havana gene 42752620 42796392 NA - <NA>
## attributes
## 171564 ID=gene:ENSG00000160716;Name=CHRNB2;biotype=protein_coding;description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962];gene_id=ENSG00000160716;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 390056 ID=gene:ENSG00000129749;Name=CHRNA10;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800];gene_id=ENSG00000129749;logic_name=ensembl_havana_gene_homo_sapiens;version=3
## 868251 ID=gene:ENSG00000261561;Name=AC091304.5;biotype=unprocessed_pseudogene;description=cholinergic receptor%2C nicotinic%2C beta 4 (CHRNB4) pseudogene;gene_id=ENSG00000261561;logic_name=havana_homo_sapiens;version=1
## 868591 ID=gene:ENSG00000260444;Name=AC138749.2;biotype=unprocessed_pseudogene;description=cholinergic receptor%2C nicotinic%2C beta 4 (CHRNB4) pseudogene;gene_id=ENSG00000260444;logic_name=havana_homo_sapiens;version=1
## 871930 ID=gene:ENSG00000175344;Name=CHRNA7;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960];gene_id=ENSG00000175344;logic_name=ensembl_havana_gene_homo_sapiens;version=18
## 941990 ID=gene:ENSG00000169684;Name=CHRNA5;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959];gene_id=ENSG00000169684;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 942046 ID=gene:ENSG00000080644;Name=CHRNA3;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957];gene_id=ENSG00000080644;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 942128 ID=gene:ENSG00000117971;Name=CHRNB4;biotype=protein_coding;description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964];gene_id=ENSG00000117971;logic_name=ensembl_havana_gene_homo_sapiens;version=12
## 1115037 ID=gene:ENSG00000108556;Name=CHRNE;biotype=protein_coding;description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966];gene_id=ENSG00000108556;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1124543 ID=gene:ENSG00000170175;Name=CHRNB1;biotype=protein_coding;description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961];gene_id=ENSG00000170175;logic_name=ensembl_havana_gene_homo_sapiens;version=11
## 1323732 ID=gene:ENSG00000264845;Name=AC119868.1;biotype=transcribed_processed_pseudogene;description=cholinergic receptor%2C nicotinic%2C epsilon (CHRNE) pseudogene;gene_id=ENSG00000264845;logic_name=havana_homo_sapiens;version=2
## 1646403 ID=gene:ENSG00000138435;Name=CHRNA1;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955];gene_id=ENSG00000138435;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 1701797 ID=gene:ENSG00000135902;Name=CHRND;biotype=protein_coding;description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965];gene_id=ENSG00000135902;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1701932 ID=gene:ENSG00000196811;Name=CHRNG;biotype=protein_coding;description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967];gene_id=ENSG00000196811;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 1783481 ID=gene:ENSG00000101204;Name=CHRNA4;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958];gene_id=ENSG00000101204;logic_name=ensembl_havana_gene_homo_sapiens;version=17
## 2100056 ID=gene:ENSG00000174343;Name=CHRNA9;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079];gene_id=ENSG00000174343;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 2619636 ID=gene:ENSG00000120903;Name=CHRNA2;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956];gene_id=ENSG00000120903;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 2634316 ID=gene:ENSG00000147432;Name=CHRNB3;biotype=protein_coding;description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963];gene_id=ENSG00000147432;logic_name=ensembl_havana_gene_homo_sapiens;version=7
## 2634355 ID=gene:ENSG00000147434;Name=CHRNA6;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963];gene_id=ENSG00000147434;logic_name=ensembl_havana_gene_homo_sapiens;version=8
Subset the rows from the data frame where type is “gene” and attribute conatins “nicotinc”
nicotinic_gene <- rhs3898gff3 %>% filter(type=="gene" & str_detect(rhs3898gff3$attribute, "nicotinic"))
nicotinic_gene
## seqid source type start end score strand phase
## 171564 1 ensembl_havana gene 154567778 154580013 NA + <NA>
## 390056 11 ensembl_havana gene 3665587 3671384 NA - <NA>
## 871930 15 ensembl_havana gene 31923438 32173018 NA + <NA>
## 941990 15 ensembl_havana gene 78565520 78595269 NA + <NA>
## 942046 15 ensembl_havana gene 78593052 78621295 NA - <NA>
## 942128 15 ensembl_havana gene 78624111 78727754 NA - <NA>
## 1115037 17 ensembl_havana gene 4897771 4934438 NA - <NA>
## 1124543 17 ensembl_havana gene 7445061 7457710 NA + <NA>
## 1646403 2 ensembl_havana gene 174747592 174787935 NA - <NA>
## 1701797 2 ensembl_havana gene 232525993 232536667 NA + <NA>
## 1701932 2 ensembl_havana gene 232539692 232548115 NA + <NA>
## 1783481 20 ensembl_havana gene 63343223 63378401 NA - <NA>
## 2100056 4 ensembl_havana gene 40335333 40355217 NA + <NA>
## 2619636 8 ensembl_havana gene 27459756 27479883 NA - <NA>
## 2634316 8 ensembl_havana gene 42697366 42737407 NA + <NA>
## 2634355 8 ensembl_havana gene 42752620 42796392 NA - <NA>
## attributes
## 171564 ID=gene:ENSG00000160716;Name=CHRNB2;biotype=protein_coding;description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962];gene_id=ENSG00000160716;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 390056 ID=gene:ENSG00000129749;Name=CHRNA10;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800];gene_id=ENSG00000129749;logic_name=ensembl_havana_gene_homo_sapiens;version=3
## 871930 ID=gene:ENSG00000175344;Name=CHRNA7;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960];gene_id=ENSG00000175344;logic_name=ensembl_havana_gene_homo_sapiens;version=18
## 941990 ID=gene:ENSG00000169684;Name=CHRNA5;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959];gene_id=ENSG00000169684;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 942046 ID=gene:ENSG00000080644;Name=CHRNA3;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957];gene_id=ENSG00000080644;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 942128 ID=gene:ENSG00000117971;Name=CHRNB4;biotype=protein_coding;description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964];gene_id=ENSG00000117971;logic_name=ensembl_havana_gene_homo_sapiens;version=12
## 1115037 ID=gene:ENSG00000108556;Name=CHRNE;biotype=protein_coding;description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966];gene_id=ENSG00000108556;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1124543 ID=gene:ENSG00000170175;Name=CHRNB1;biotype=protein_coding;description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961];gene_id=ENSG00000170175;logic_name=ensembl_havana_gene_homo_sapiens;version=11
## 1646403 ID=gene:ENSG00000138435;Name=CHRNA1;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955];gene_id=ENSG00000138435;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 1701797 ID=gene:ENSG00000135902;Name=CHRND;biotype=protein_coding;description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965];gene_id=ENSG00000135902;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1701932 ID=gene:ENSG00000196811;Name=CHRNG;biotype=protein_coding;description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967];gene_id=ENSG00000196811;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 1783481 ID=gene:ENSG00000101204;Name=CHRNA4;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958];gene_id=ENSG00000101204;logic_name=ensembl_havana_gene_homo_sapiens;version=17
## 2100056 ID=gene:ENSG00000174343;Name=CHRNA9;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079];gene_id=ENSG00000174343;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 2619636 ID=gene:ENSG00000120903;Name=CHRNA2;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956];gene_id=ENSG00000120903;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 2634316 ID=gene:ENSG00000147432;Name=CHRNB3;biotype=protein_coding;description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963];gene_id=ENSG00000147432;logic_name=ensembl_havana_gene_homo_sapiens;version=7
## 2634355 ID=gene:ENSG00000147434;Name=CHRNA6;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963];gene_id=ENSG00000147434;logic_name=ensembl_havana_gene_homo_sapiens;version=8
nicotinic_gene$attributes
## [1] "ID=gene:ENSG00000160716;Name=CHRNB2;biotype=protein_coding;description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962];gene_id=ENSG00000160716;logic_name=ensembl_havana_gene_homo_sapiens;version=6"
## [2] "ID=gene:ENSG00000129749;Name=CHRNA10;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800];gene_id=ENSG00000129749;logic_name=ensembl_havana_gene_homo_sapiens;version=3"
## [3] "ID=gene:ENSG00000175344;Name=CHRNA7;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960];gene_id=ENSG00000175344;logic_name=ensembl_havana_gene_homo_sapiens;version=18"
## [4] "ID=gene:ENSG00000169684;Name=CHRNA5;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959];gene_id=ENSG00000169684;logic_name=ensembl_havana_gene_homo_sapiens;version=13"
## [5] "ID=gene:ENSG00000080644;Name=CHRNA3;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957];gene_id=ENSG00000080644;logic_name=ensembl_havana_gene_homo_sapiens;version=16"
## [6] "ID=gene:ENSG00000117971;Name=CHRNB4;biotype=protein_coding;description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964];gene_id=ENSG00000117971;logic_name=ensembl_havana_gene_homo_sapiens;version=12"
## [7] "ID=gene:ENSG00000108556;Name=CHRNE;biotype=protein_coding;description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966];gene_id=ENSG00000108556;logic_name=ensembl_havana_gene_homo_sapiens;version=10"
## [8] "ID=gene:ENSG00000170175;Name=CHRNB1;biotype=protein_coding;description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961];gene_id=ENSG00000170175;logic_name=ensembl_havana_gene_homo_sapiens;version=11"
## [9] "ID=gene:ENSG00000138435;Name=CHRNA1;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955];gene_id=ENSG00000138435;logic_name=ensembl_havana_gene_homo_sapiens;version=16"
## [10] "ID=gene:ENSG00000135902;Name=CHRND;biotype=protein_coding;description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965];gene_id=ENSG00000135902;logic_name=ensembl_havana_gene_homo_sapiens;version=10"
## [11] "ID=gene:ENSG00000196811;Name=CHRNG;biotype=protein_coding;description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967];gene_id=ENSG00000196811;logic_name=ensembl_havana_gene_homo_sapiens;version=13"
## [12] "ID=gene:ENSG00000101204;Name=CHRNA4;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958];gene_id=ENSG00000101204;logic_name=ensembl_havana_gene_homo_sapiens;version=17"
## [13] "ID=gene:ENSG00000174343;Name=CHRNA9;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079];gene_id=ENSG00000174343;logic_name=ensembl_havana_gene_homo_sapiens;version=6"
## [14] "ID=gene:ENSG00000120903;Name=CHRNA2;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956];gene_id=ENSG00000120903;logic_name=ensembl_havana_gene_homo_sapiens;version=13"
## [15] "ID=gene:ENSG00000147432;Name=CHRNB3;biotype=protein_coding;description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963];gene_id=ENSG00000147432;logic_name=ensembl_havana_gene_homo_sapiens;version=7"
## [16] "ID=gene:ENSG00000147434;Name=CHRNA6;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963];gene_id=ENSG00000147434;logic_name=ensembl_havana_gene_homo_sapiens;version=8"
Split the attributes column to extract aditional information
# the attributes column can be split into a list of 7 columns
nicotinic_gene$attributes %>% str_split(";", simplify = TRUE)
## [,1] [,2] [,3]
## [1,] "ID=gene:ENSG00000160716" "Name=CHRNB2" "biotype=protein_coding"
## [2,] "ID=gene:ENSG00000129749" "Name=CHRNA10" "biotype=protein_coding"
## [3,] "ID=gene:ENSG00000175344" "Name=CHRNA7" "biotype=protein_coding"
## [4,] "ID=gene:ENSG00000169684" "Name=CHRNA5" "biotype=protein_coding"
## [5,] "ID=gene:ENSG00000080644" "Name=CHRNA3" "biotype=protein_coding"
## [6,] "ID=gene:ENSG00000117971" "Name=CHRNB4" "biotype=protein_coding"
## [7,] "ID=gene:ENSG00000108556" "Name=CHRNE" "biotype=protein_coding"
## [8,] "ID=gene:ENSG00000170175" "Name=CHRNB1" "biotype=protein_coding"
## [9,] "ID=gene:ENSG00000138435" "Name=CHRNA1" "biotype=protein_coding"
## [10,] "ID=gene:ENSG00000135902" "Name=CHRND" "biotype=protein_coding"
## [11,] "ID=gene:ENSG00000196811" "Name=CHRNG" "biotype=protein_coding"
## [12,] "ID=gene:ENSG00000101204" "Name=CHRNA4" "biotype=protein_coding"
## [13,] "ID=gene:ENSG00000174343" "Name=CHRNA9" "biotype=protein_coding"
## [14,] "ID=gene:ENSG00000120903" "Name=CHRNA2" "biotype=protein_coding"
## [15,] "ID=gene:ENSG00000147432" "Name=CHRNB3" "biotype=protein_coding"
## [16,] "ID=gene:ENSG00000147434" "Name=CHRNA6" "biotype=protein_coding"
## [,4]
## [1,] "description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962]"
## [2,] "description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800]"
## [3,] "description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960]"
## [4,] "description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959]"
## [5,] "description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957]"
## [6,] "description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964]"
## [7,] "description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966]"
## [8,] "description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961]"
## [9,] "description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955]"
## [10,] "description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965]"
## [11,] "description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967]"
## [12,] "description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958]"
## [13,] "description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079]"
## [14,] "description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956]"
## [15,] "description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963]"
## [16,] "description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963]"
## [,5] [,6]
## [1,] "gene_id=ENSG00000160716" "logic_name=ensembl_havana_gene_homo_sapiens"
## [2,] "gene_id=ENSG00000129749" "logic_name=ensembl_havana_gene_homo_sapiens"
## [3,] "gene_id=ENSG00000175344" "logic_name=ensembl_havana_gene_homo_sapiens"
## [4,] "gene_id=ENSG00000169684" "logic_name=ensembl_havana_gene_homo_sapiens"
## [5,] "gene_id=ENSG00000080644" "logic_name=ensembl_havana_gene_homo_sapiens"
## [6,] "gene_id=ENSG00000117971" "logic_name=ensembl_havana_gene_homo_sapiens"
## [7,] "gene_id=ENSG00000108556" "logic_name=ensembl_havana_gene_homo_sapiens"
## [8,] "gene_id=ENSG00000170175" "logic_name=ensembl_havana_gene_homo_sapiens"
## [9,] "gene_id=ENSG00000138435" "logic_name=ensembl_havana_gene_homo_sapiens"
## [10,] "gene_id=ENSG00000135902" "logic_name=ensembl_havana_gene_homo_sapiens"
## [11,] "gene_id=ENSG00000196811" "logic_name=ensembl_havana_gene_homo_sapiens"
## [12,] "gene_id=ENSG00000101204" "logic_name=ensembl_havana_gene_homo_sapiens"
## [13,] "gene_id=ENSG00000174343" "logic_name=ensembl_havana_gene_homo_sapiens"
## [14,] "gene_id=ENSG00000120903" "logic_name=ensembl_havana_gene_homo_sapiens"
## [15,] "gene_id=ENSG00000147432" "logic_name=ensembl_havana_gene_homo_sapiens"
## [16,] "gene_id=ENSG00000147434" "logic_name=ensembl_havana_gene_homo_sapiens"
## [,7]
## [1,] "version=6"
## [2,] "version=3"
## [3,] "version=18"
## [4,] "version=13"
## [5,] "version=16"
## [6,] "version=12"
## [7,] "version=10"
## [8,] "version=11"
## [9,] "version=16"
## [10,] "version=10"
## [11,] "version=13"
## [12,] "version=17"
## [13,] "version=6"
## [14,] "version=13"
## [15,] "version=7"
## [16,] "version=8"
# split and assign to a list
nic_attr_list <- nicotinic_gene$attributes %>% str_split(";")
nic_attr_list
## [[1]]
## [1] "ID=gene:ENSG00000160716"
## [2] "Name=CHRNB2"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962]"
## [5] "gene_id=ENSG00000160716"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=6"
##
## [[2]]
## [1] "ID=gene:ENSG00000129749"
## [2] "Name=CHRNA10"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800]"
## [5] "gene_id=ENSG00000129749"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=3"
##
## [[3]]
## [1] "ID=gene:ENSG00000175344"
## [2] "Name=CHRNA7"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960]"
## [5] "gene_id=ENSG00000175344"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=18"
##
## [[4]]
## [1] "ID=gene:ENSG00000169684"
## [2] "Name=CHRNA5"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959]"
## [5] "gene_id=ENSG00000169684"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=13"
##
## [[5]]
## [1] "ID=gene:ENSG00000080644"
## [2] "Name=CHRNA3"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957]"
## [5] "gene_id=ENSG00000080644"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=16"
##
## [[6]]
## [1] "ID=gene:ENSG00000117971"
## [2] "Name=CHRNB4"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964]"
## [5] "gene_id=ENSG00000117971"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=12"
##
## [[7]]
## [1] "ID=gene:ENSG00000108556"
## [2] "Name=CHRNE"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966]"
## [5] "gene_id=ENSG00000108556"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=10"
##
## [[8]]
## [1] "ID=gene:ENSG00000170175"
## [2] "Name=CHRNB1"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961]"
## [5] "gene_id=ENSG00000170175"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=11"
##
## [[9]]
## [1] "ID=gene:ENSG00000138435"
## [2] "Name=CHRNA1"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955]"
## [5] "gene_id=ENSG00000138435"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=16"
##
## [[10]]
## [1] "ID=gene:ENSG00000135902"
## [2] "Name=CHRND"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965]"
## [5] "gene_id=ENSG00000135902"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=10"
##
## [[11]]
## [1] "ID=gene:ENSG00000196811"
## [2] "Name=CHRNG"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967]"
## [5] "gene_id=ENSG00000196811"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=13"
##
## [[12]]
## [1] "ID=gene:ENSG00000101204"
## [2] "Name=CHRNA4"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958]"
## [5] "gene_id=ENSG00000101204"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=17"
##
## [[13]]
## [1] "ID=gene:ENSG00000174343"
## [2] "Name=CHRNA9"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079]"
## [5] "gene_id=ENSG00000174343"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=6"
##
## [[14]]
## [1] "ID=gene:ENSG00000120903"
## [2] "Name=CHRNA2"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956]"
## [5] "gene_id=ENSG00000120903"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=13"
##
## [[15]]
## [1] "ID=gene:ENSG00000147432"
## [2] "Name=CHRNB3"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963]"
## [5] "gene_id=ENSG00000147432"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=7"
##
## [[16]]
## [1] "ID=gene:ENSG00000147434"
## [2] "Name=CHRNA6"
## [3] "biotype=protein_coding"
## [4] "description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963]"
## [5] "gene_id=ENSG00000147434"
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"
## [7] "version=8"
# convert the list to a data frame and assign column names
nic_attr_df <- do.call(rbind.data.frame, nic_attr_list)
colnames(nic_attr_df) <- c("ID", "Name", "biotype", "description", "gene_id", "logic_name", "version")
nic_attr_df
## ID Name biotype
## 1 ID=gene:ENSG00000160716 Name=CHRNB2 biotype=protein_coding
## 2 ID=gene:ENSG00000129749 Name=CHRNA10 biotype=protein_coding
## 3 ID=gene:ENSG00000175344 Name=CHRNA7 biotype=protein_coding
## 4 ID=gene:ENSG00000169684 Name=CHRNA5 biotype=protein_coding
## 5 ID=gene:ENSG00000080644 Name=CHRNA3 biotype=protein_coding
## 6 ID=gene:ENSG00000117971 Name=CHRNB4 biotype=protein_coding
## 7 ID=gene:ENSG00000108556 Name=CHRNE biotype=protein_coding
## 8 ID=gene:ENSG00000170175 Name=CHRNB1 biotype=protein_coding
## 9 ID=gene:ENSG00000138435 Name=CHRNA1 biotype=protein_coding
## 10 ID=gene:ENSG00000135902 Name=CHRND biotype=protein_coding
## 11 ID=gene:ENSG00000196811 Name=CHRNG biotype=protein_coding
## 12 ID=gene:ENSG00000101204 Name=CHRNA4 biotype=protein_coding
## 13 ID=gene:ENSG00000174343 Name=CHRNA9 biotype=protein_coding
## 14 ID=gene:ENSG00000120903 Name=CHRNA2 biotype=protein_coding
## 15 ID=gene:ENSG00000147432 Name=CHRNB3 biotype=protein_coding
## 16 ID=gene:ENSG00000147434 Name=CHRNA6 biotype=protein_coding
## description
## 1 description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962]
## 2 description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800]
## 3 description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960]
## 4 description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959]
## 5 description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957]
## 6 description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964]
## 7 description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966]
## 8 description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961]
## 9 description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955]
## 10 description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965]
## 11 description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967]
## 12 description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958]
## 13 description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079]
## 14 description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956]
## 15 description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963]
## 16 description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963]
## gene_id logic_name
## 1 gene_id=ENSG00000160716 logic_name=ensembl_havana_gene_homo_sapiens
## 2 gene_id=ENSG00000129749 logic_name=ensembl_havana_gene_homo_sapiens
## 3 gene_id=ENSG00000175344 logic_name=ensembl_havana_gene_homo_sapiens
## 4 gene_id=ENSG00000169684 logic_name=ensembl_havana_gene_homo_sapiens
## 5 gene_id=ENSG00000080644 logic_name=ensembl_havana_gene_homo_sapiens
## 6 gene_id=ENSG00000117971 logic_name=ensembl_havana_gene_homo_sapiens
## 7 gene_id=ENSG00000108556 logic_name=ensembl_havana_gene_homo_sapiens
## 8 gene_id=ENSG00000170175 logic_name=ensembl_havana_gene_homo_sapiens
## 9 gene_id=ENSG00000138435 logic_name=ensembl_havana_gene_homo_sapiens
## 10 gene_id=ENSG00000135902 logic_name=ensembl_havana_gene_homo_sapiens
## 11 gene_id=ENSG00000196811 logic_name=ensembl_havana_gene_homo_sapiens
## 12 gene_id=ENSG00000101204 logic_name=ensembl_havana_gene_homo_sapiens
## 13 gene_id=ENSG00000174343 logic_name=ensembl_havana_gene_homo_sapiens
## 14 gene_id=ENSG00000120903 logic_name=ensembl_havana_gene_homo_sapiens
## 15 gene_id=ENSG00000147432 logic_name=ensembl_havana_gene_homo_sapiens
## 16 gene_id=ENSG00000147434 logic_name=ensembl_havana_gene_homo_sapiens
## version
## 1 version=6
## 2 version=3
## 3 version=18
## 4 version=13
## 5 version=16
## 6 version=12
## 7 version=10
## 8 version=11
## 9 version=16
## 10 version=10
## 11 version=13
## 12 version=17
## 13 version=6
## 14 version=13
## 15 version=7
## 16 version=8
# bind the columns from one dataframe with another
nicotinic_gene <- bind_cols(nicotinic_gene, nic_attr_df)
nicotinic_gene
## seqid source type start end score strand phase
## 171564 1 ensembl_havana gene 154567778 154580013 NA + <NA>
## 390056 11 ensembl_havana gene 3665587 3671384 NA - <NA>
## 871930 15 ensembl_havana gene 31923438 32173018 NA + <NA>
## 941990 15 ensembl_havana gene 78565520 78595269 NA + <NA>
## 942046 15 ensembl_havana gene 78593052 78621295 NA - <NA>
## 942128 15 ensembl_havana gene 78624111 78727754 NA - <NA>
## 1115037 17 ensembl_havana gene 4897771 4934438 NA - <NA>
## 1124543 17 ensembl_havana gene 7445061 7457710 NA + <NA>
## 1646403 2 ensembl_havana gene 174747592 174787935 NA - <NA>
## 1701797 2 ensembl_havana gene 232525993 232536667 NA + <NA>
## 1701932 2 ensembl_havana gene 232539692 232548115 NA + <NA>
## 1783481 20 ensembl_havana gene 63343223 63378401 NA - <NA>
## 2100056 4 ensembl_havana gene 40335333 40355217 NA + <NA>
## 2619636 8 ensembl_havana gene 27459756 27479883 NA - <NA>
## 2634316 8 ensembl_havana gene 42697366 42737407 NA + <NA>
## 2634355 8 ensembl_havana gene 42752620 42796392 NA - <NA>
## attributes
## 171564 ID=gene:ENSG00000160716;Name=CHRNB2;biotype=protein_coding;description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962];gene_id=ENSG00000160716;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 390056 ID=gene:ENSG00000129749;Name=CHRNA10;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800];gene_id=ENSG00000129749;logic_name=ensembl_havana_gene_homo_sapiens;version=3
## 871930 ID=gene:ENSG00000175344;Name=CHRNA7;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960];gene_id=ENSG00000175344;logic_name=ensembl_havana_gene_homo_sapiens;version=18
## 941990 ID=gene:ENSG00000169684;Name=CHRNA5;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959];gene_id=ENSG00000169684;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 942046 ID=gene:ENSG00000080644;Name=CHRNA3;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957];gene_id=ENSG00000080644;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 942128 ID=gene:ENSG00000117971;Name=CHRNB4;biotype=protein_coding;description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964];gene_id=ENSG00000117971;logic_name=ensembl_havana_gene_homo_sapiens;version=12
## 1115037 ID=gene:ENSG00000108556;Name=CHRNE;biotype=protein_coding;description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966];gene_id=ENSG00000108556;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1124543 ID=gene:ENSG00000170175;Name=CHRNB1;biotype=protein_coding;description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961];gene_id=ENSG00000170175;logic_name=ensembl_havana_gene_homo_sapiens;version=11
## 1646403 ID=gene:ENSG00000138435;Name=CHRNA1;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955];gene_id=ENSG00000138435;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 1701797 ID=gene:ENSG00000135902;Name=CHRND;biotype=protein_coding;description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965];gene_id=ENSG00000135902;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1701932 ID=gene:ENSG00000196811;Name=CHRNG;biotype=protein_coding;description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967];gene_id=ENSG00000196811;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 1783481 ID=gene:ENSG00000101204;Name=CHRNA4;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958];gene_id=ENSG00000101204;logic_name=ensembl_havana_gene_homo_sapiens;version=17
## 2100056 ID=gene:ENSG00000174343;Name=CHRNA9;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079];gene_id=ENSG00000174343;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 2619636 ID=gene:ENSG00000120903;Name=CHRNA2;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956];gene_id=ENSG00000120903;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 2634316 ID=gene:ENSG00000147432;Name=CHRNB3;biotype=protein_coding;description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963];gene_id=ENSG00000147432;logic_name=ensembl_havana_gene_homo_sapiens;version=7
## 2634355 ID=gene:ENSG00000147434;Name=CHRNA6;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963];gene_id=ENSG00000147434;logic_name=ensembl_havana_gene_homo_sapiens;version=8
## ID Name biotype
## 171564 ID=gene:ENSG00000160716 Name=CHRNB2 biotype=protein_coding
## 390056 ID=gene:ENSG00000129749 Name=CHRNA10 biotype=protein_coding
## 871930 ID=gene:ENSG00000175344 Name=CHRNA7 biotype=protein_coding
## 941990 ID=gene:ENSG00000169684 Name=CHRNA5 biotype=protein_coding
## 942046 ID=gene:ENSG00000080644 Name=CHRNA3 biotype=protein_coding
## 942128 ID=gene:ENSG00000117971 Name=CHRNB4 biotype=protein_coding
## 1115037 ID=gene:ENSG00000108556 Name=CHRNE biotype=protein_coding
## 1124543 ID=gene:ENSG00000170175 Name=CHRNB1 biotype=protein_coding
## 1646403 ID=gene:ENSG00000138435 Name=CHRNA1 biotype=protein_coding
## 1701797 ID=gene:ENSG00000135902 Name=CHRND biotype=protein_coding
## 1701932 ID=gene:ENSG00000196811 Name=CHRNG biotype=protein_coding
## 1783481 ID=gene:ENSG00000101204 Name=CHRNA4 biotype=protein_coding
## 2100056 ID=gene:ENSG00000174343 Name=CHRNA9 biotype=protein_coding
## 2619636 ID=gene:ENSG00000120903 Name=CHRNA2 biotype=protein_coding
## 2634316 ID=gene:ENSG00000147432 Name=CHRNB3 biotype=protein_coding
## 2634355 ID=gene:ENSG00000147434 Name=CHRNA6 biotype=protein_coding
## description
## 171564 description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962]
## 390056 description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800]
## 871930 description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960]
## 941990 description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959]
## 942046 description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957]
## 942128 description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964]
## 1115037 description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966]
## 1124543 description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961]
## 1646403 description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955]
## 1701797 description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965]
## 1701932 description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967]
## 1783481 description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958]
## 2100056 description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079]
## 2619636 description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956]
## 2634316 description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963]
## 2634355 description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963]
## gene_id logic_name
## 171564 gene_id=ENSG00000160716 logic_name=ensembl_havana_gene_homo_sapiens
## 390056 gene_id=ENSG00000129749 logic_name=ensembl_havana_gene_homo_sapiens
## 871930 gene_id=ENSG00000175344 logic_name=ensembl_havana_gene_homo_sapiens
## 941990 gene_id=ENSG00000169684 logic_name=ensembl_havana_gene_homo_sapiens
## 942046 gene_id=ENSG00000080644 logic_name=ensembl_havana_gene_homo_sapiens
## 942128 gene_id=ENSG00000117971 logic_name=ensembl_havana_gene_homo_sapiens
## 1115037 gene_id=ENSG00000108556 logic_name=ensembl_havana_gene_homo_sapiens
## 1124543 gene_id=ENSG00000170175 logic_name=ensembl_havana_gene_homo_sapiens
## 1646403 gene_id=ENSG00000138435 logic_name=ensembl_havana_gene_homo_sapiens
## 1701797 gene_id=ENSG00000135902 logic_name=ensembl_havana_gene_homo_sapiens
## 1701932 gene_id=ENSG00000196811 logic_name=ensembl_havana_gene_homo_sapiens
## 1783481 gene_id=ENSG00000101204 logic_name=ensembl_havana_gene_homo_sapiens
## 2100056 gene_id=ENSG00000174343 logic_name=ensembl_havana_gene_homo_sapiens
## 2619636 gene_id=ENSG00000120903 logic_name=ensembl_havana_gene_homo_sapiens
## 2634316 gene_id=ENSG00000147432 logic_name=ensembl_havana_gene_homo_sapiens
## 2634355 gene_id=ENSG00000147434 logic_name=ensembl_havana_gene_homo_sapiens
## version
## 171564 version=6
## 390056 version=3
## 871930 version=18
## 941990 version=13
## 942046 version=16
## 942128 version=12
## 1115037 version=10
## 1124543 version=11
## 1646403 version=16
## 1701797 version=10
## 1701932 version=13
## 1783481 version=17
## 2100056 version=6
## 2619636 version=13
## 2634316 version=7
## 2634355 version=8
Drop the index column or row names
row.names(nicotinic_gene) <- NULL
Select or keep the relevant columns
chrn_df <- nicotinic_gene %>% select(seqid, source, type, start, end, strand, NAME, GENE_ID, DESCRIPTION)
chrn_df
## seqid source type start end strand NAME GENE_ID
## 1 1 ensembl_havana gene 154567778 154580013 + CHRNB2 ENSG00000160716
## 2 11 ensembl_havana gene 3665587 3671384 - CHRNA10 ENSG00000129749
## 3 15 ensembl_havana gene 31923438 32173018 + CHRNA7 ENSG00000175344
## 4 15 ensembl_havana gene 78565520 78595269 + CHRNA5 ENSG00000169684
## 5 15 ensembl_havana gene 78593052 78621295 - CHRNA3 ENSG00000080644
## 6 15 ensembl_havana gene 78624111 78727754 - CHRNB4 ENSG00000117971
## 7 17 ensembl_havana gene 4897771 4934438 - CHRNE ENSG00000108556
## 8 17 ensembl_havana gene 7445061 7457710 + CHRNB1 ENSG00000170175
## 9 2 ensembl_havana gene 174747592 174787935 - CHRNA1 ENSG00000138435
## 10 2 ensembl_havana gene 232525993 232536667 + CHRND ENSG00000135902
## 11 2 ensembl_havana gene 232539692 232548115 + CHRNG ENSG00000196811
## 12 20 ensembl_havana gene 63343223 63378401 - CHRNA4 ENSG00000101204
## 13 4 ensembl_havana gene 40335333 40355217 + CHRNA9 ENSG00000174343
## 14 8 ensembl_havana gene 27459756 27479883 - CHRNA2 ENSG00000120903
## 15 8 ensembl_havana gene 42697366 42737407 + CHRNB3 ENSG00000147432
## 16 8 ensembl_havana gene 42752620 42796392 - CHRNA6 ENSG00000147434
## DESCRIPTION
## 1 cholinergic receptor nicotinic beta 2 subunit
## 2 cholinergic receptor nicotinic alpha 10 subunit
## 3 cholinergic receptor nicotinic alpha 7 subunit
## 4 cholinergic receptor nicotinic alpha 5 subunit
## 5 cholinergic receptor nicotinic alpha 3 subunit
## 6 cholinergic receptor nicotinic beta 4 subunit
## 7 cholinergic receptor nicotinic epsilon subunit
## 8 cholinergic receptor nicotinic beta 1 subunit
## 9 cholinergic receptor nicotinic alpha 1 subunit
## 10 cholinergic receptor nicotinic delta subunit
## 11 cholinergic receptor nicotinic gamma subunit
## 12 cholinergic receptor nicotinic alpha 4 subunit
## 13 cholinergic receptor nicotinic alpha 9 subunit
## 14 cholinergic receptor nicotinic alpha 2 subunit
## 15 cholinergic receptor nicotinic beta 3 subunit
## 16 cholinergic receptor nicotinic alpha 6 subunit
Write out or save the data frame as a csv file for latter use
chrn_df %>% write_csv(file = "chrn_df.csv")
Derive a length column for each gene to see their distribution
# gene length is measured in base pairs or simply in number of bases or nucleotides
chrn_df['GENE_LENGTH (bp)'] <- chrn_df$end - (chrn_df$start -1)
chrn_df
## seqid source type start end strand NAME GENE_ID
## 1 1 ensembl_havana gene 154567778 154580013 + CHRNB2 ENSG00000160716
## 2 11 ensembl_havana gene 3665587 3671384 - CHRNA10 ENSG00000129749
## 3 15 ensembl_havana gene 31923438 32173018 + CHRNA7 ENSG00000175344
## 4 15 ensembl_havana gene 78565520 78595269 + CHRNA5 ENSG00000169684
## 5 15 ensembl_havana gene 78593052 78621295 - CHRNA3 ENSG00000080644
## 6 15 ensembl_havana gene 78624111 78727754 - CHRNB4 ENSG00000117971
## 7 17 ensembl_havana gene 4897771 4934438 - CHRNE ENSG00000108556
## 8 17 ensembl_havana gene 7445061 7457710 + CHRNB1 ENSG00000170175
## 9 2 ensembl_havana gene 174747592 174787935 - CHRNA1 ENSG00000138435
## 10 2 ensembl_havana gene 232525993 232536667 + CHRND ENSG00000135902
## 11 2 ensembl_havana gene 232539692 232548115 + CHRNG ENSG00000196811
## 12 20 ensembl_havana gene 63343223 63378401 - CHRNA4 ENSG00000101204
## 13 4 ensembl_havana gene 40335333 40355217 + CHRNA9 ENSG00000174343
## 14 8 ensembl_havana gene 27459756 27479883 - CHRNA2 ENSG00000120903
## 15 8 ensembl_havana gene 42697366 42737407 + CHRNB3 ENSG00000147432
## 16 8 ensembl_havana gene 42752620 42796392 - CHRNA6 ENSG00000147434
## DESCRIPTION GENE_LENGTH (bp)
## 1 cholinergic receptor nicotinic beta 2 subunit 12236
## 2 cholinergic receptor nicotinic alpha 10 subunit 5798
## 3 cholinergic receptor nicotinic alpha 7 subunit 249581
## 4 cholinergic receptor nicotinic alpha 5 subunit 29750
## 5 cholinergic receptor nicotinic alpha 3 subunit 28244
## 6 cholinergic receptor nicotinic beta 4 subunit 103644
## 7 cholinergic receptor nicotinic epsilon subunit 36668
## 8 cholinergic receptor nicotinic beta 1 subunit 12650
## 9 cholinergic receptor nicotinic alpha 1 subunit 40344
## 10 cholinergic receptor nicotinic delta subunit 10675
## 11 cholinergic receptor nicotinic gamma subunit 8424
## 12 cholinergic receptor nicotinic alpha 4 subunit 35179
## 13 cholinergic receptor nicotinic alpha 9 subunit 19885
## 14 cholinergic receptor nicotinic alpha 2 subunit 20128
## 15 cholinergic receptor nicotinic beta 3 subunit 40042
## 16 cholinergic receptor nicotinic alpha 6 subunit 43773
# longest human CHRN gene
chrn_df[chrn_df$`GENE_LENGTH (bp)` == max(chrn_df$`GENE_LENGTH (bp)`),]
## seqid source type start end strand NAME GENE_ID
## 3 15 ensembl_havana gene 31923438 32173018 + CHRNA7 ENSG00000175344
## DESCRIPTION GENE_LENGTH (bp)
## 3 cholinergic receptor nicotinic alpha 7 subunit 249581
# shortest human CHRN gene
chrn_df[chrn_df$`GENE_LENGTH (bp)` == min(chrn_df$`GENE_LENGTH (bp)`),]
## seqid source type start end strand NAME GENE_ID
## 2 11 ensembl_havana gene 3665587 3671384 - CHRNA10 ENSG00000129749
## DESCRIPTION GENE_LENGTH (bp)
## 2 cholinergic receptor nicotinic alpha 10 subunit 5798
Some plotting of the variable length
# histogram in ggplot
ggplot(data=chrn_df, mapping=aes(x=`GENE_LENGTH (bp)`)) +
geom_histogram(binwidth = 1000)

# density plot
ggplot(data=chrn_df, mapping=aes(x=`GENE_LENGTH (bp)`)) +
geom_density(color='red')

# box plot in ggplot
ggplot(data=chrn_df, mapping=aes(x=`GENE_LENGTH (bp)`)) +
geom_boxplot(color='red')

# sorted barplot in baseR
chrn_sorted <- chrn_df[order(chrn_df$`GENE_LENGTH (bp)`), ]
barplot(chrn_sorted$`GENE_LENGTH (bp)`, names.arg = chrn_sorted$NAME)
title(main = list("Length of human CHRN genes (bp)", font = 4))

# barplot using ggplot
ggplot(data=chrn_df, aes(x=NAME, y=`GENE_LENGTH (bp)`)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
