library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts --------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ape)
## Warning: package 'ape' was built under R version 4.0.5

Download the data from web

d <- "ftp://ftp.ensembl.org/pub/release-98/gff3/homo_sapiens/"

f <- "Homo_sapiens.GRCh38.98.gff3.gz"

download.file(paste0(d, f), "hs3898_gff3.gz")

rhs3898gff3 <- read.gff("hs3898_gff3.gz")

Glimpse the data

# overall data structure

glimpse(rhs3898gff3)
## Rows: 2,911,086
## Columns: 9
## $ seqid      <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ source     <fct> Ensembl, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ type       <fct> chromosome, biological_region, biological_region, biolog...
## $ start      <int> 1, 10469, 10650, 10655, 10678, 10681, 10707, 10708, 1073...
## $ end        <int> 248956422, 11240, 10657, 10657, 10687, 10688, 10716, 107...
## $ score      <dbl> NA, 1300.000, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, ...
## $ strand     <fct> NA, NA, +, -, +, -, +, -, -, +, +, -, +, -, +, -, +, +, ...
## $ phase      <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ attributes <chr> "ID=chromosome:1;Alias=CM000663.2,chr1,NC_000001.11", "e...
# column/variable names

colnames(rhs3898gff3)
## [1] "seqid"      "source"     "type"       "start"      "end"       
## [6] "score"      "strand"     "phase"      "attributes"
# first 6 rows of data

head(rhs3898gff3)
##   seqid  source              type start       end    score strand phase
## 1     1 Ensembl        chromosome     1 248956422       NA   <NA>  <NA>
## 2     1    <NA> biological_region 10469     11240 1300.000   <NA>  <NA>
## 3     1    <NA> biological_region 10650     10657    0.999      +  <NA>
## 4     1    <NA> biological_region 10655     10657    0.999      -  <NA>
## 5     1    <NA> biological_region 10678     10687    0.999      +  <NA>
## 6     1    <NA> biological_region 10681     10688    0.999      -  <NA>
##                                           attributes
## 1 ID=chromosome:1;Alias=CM000663.2,chr1,NC_000001.11
## 2           external_name=oe %3D 0.79;logic_name=cpg
## 3                                 logic_name=eponine
## 4                                 logic_name=eponine
## 5                                 logic_name=eponine
## 6                                 logic_name=eponine
# last 6 rows of data

tail(rhs3898gff3)
##         seqid source                   type    start      end score strand
## 2911081     Y havana                   exon 26626520 26627159    NA      -
## 2911082     Y   <NA>      biological_region 26626966 26627137 0.994      -
## 2911083     Y   <NA>      biological_region 26627457 26628186 0.997      +
## 2911084     Y havana             pseudogene 56855244 56855488    NA      +
## 2911085     Y havana pseudogenic_transcript 56855244 56855488    NA      +
## 2911086     Y havana                   exon 56855244 56855488    NA      +
##         phase
## 2911081  <NA>
## 2911082  <NA>
## 2911083  <NA>
## 2911084  <NA>
## 2911085  <NA>
## 2911086  <NA>
##                                                                                                                                                                                                                             attributes
## 2911081                                                                           Parent=transcript:ENST00000435741;Name=ENSE00001616687;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001616687;rank=1;version=1
## 2911082                                                                                                                                                                                    external_name=rank %3D 1;logic_name=firstef
## 2911083                                                                                                                                                                                    external_name=rank %3D 1;logic_name=firstef
## 2911084 ID=gene:ENSG00000235857;Name=CTBP2P1;biotype=processed_pseudogene;description=C-terminal binding protein 2 pseudogene 1 [Source:HGNC Symbol%3BAcc:HGNC:23940];gene_id=ENSG00000235857;logic_name=havana_homo_sapiens;version=1
## 2911085                                          ID=transcript:ENST00000431853;Parent=gene:ENSG00000235857;Name=CTBP2P1-201;biotype=processed_pseudogene;tag=basic;transcript_id=ENST00000431853;transcript_support_level=NA;version=1
## 2911086                                                                           Parent=transcript:ENST00000431853;Name=ENSE00001794473;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001794473;rank=1;version=1
# unique seqID: these are the chromosomes and contigs

unique(rhs3898gff3$seqid)
##   [1] 1          10         11         12         13         14        
##   [7] 15         16         17         18         19         2         
##  [13] 20         21         22         3          4          5         
##  [19] 6          7          8          9          GL000008.2 GL000009.2
##  [25] GL000194.1 GL000195.1 GL000205.2 GL000208.1 GL000213.1 GL000214.1
##  [31] GL000216.2 GL000218.1 GL000219.1 GL000220.1 GL000221.1 GL000224.1
##  [37] GL000225.1 GL000226.1 KI270302.1 KI270303.1 KI270304.1 KI270305.1
##  [43] KI270310.1 KI270311.1 KI270312.1 KI270315.1 KI270316.1 KI270317.1
##  [49] KI270320.1 KI270322.1 KI270329.1 KI270330.1 KI270333.1 KI270334.1
##  [55] KI270335.1 KI270336.1 KI270337.1 KI270338.1 KI270340.1 KI270362.1
##  [61] KI270363.1 KI270364.1 KI270366.1 KI270371.1 KI270372.1 KI270373.1
##  [67] KI270374.1 KI270375.1 KI270376.1 KI270378.1 KI270379.1 KI270381.1
##  [73] KI270382.1 KI270383.1 KI270384.1 KI270385.1 KI270386.1 KI270387.1
##  [79] KI270388.1 KI270389.1 KI270390.1 KI270391.1 KI270392.1 KI270393.1
##  [85] KI270394.1 KI270395.1 KI270396.1 KI270411.1 KI270412.1 KI270414.1
##  [91] KI270417.1 KI270418.1 KI270419.1 KI270420.1 KI270422.1 KI270423.1
##  [97] KI270424.1 KI270425.1 KI270429.1 KI270435.1 KI270438.1 KI270442.1
## [103] KI270448.1 KI270465.1 KI270466.1 KI270467.1 KI270468.1 KI270507.1
## [109] KI270508.1 KI270509.1 KI270510.1 KI270511.1 KI270512.1 KI270515.1
## [115] KI270516.1 KI270517.1 KI270518.1 KI270519.1 KI270521.1 KI270522.1
## [121] KI270528.1 KI270529.1 KI270530.1 KI270538.1 KI270539.1 KI270544.1
## [127] KI270548.1 KI270579.1 KI270580.1 KI270581.1 KI270582.1 KI270583.1
## [133] KI270584.1 KI270587.1 KI270588.1 KI270589.1 KI270590.1 KI270591.1
## [139] KI270593.1 KI270706.1 KI270707.1 KI270708.1 KI270709.1 KI270710.1
## [145] KI270711.1 KI270712.1 KI270713.1 KI270714.1 KI270715.1 KI270716.1
## [151] KI270717.1 KI270718.1 KI270719.1 KI270720.1 KI270721.1 KI270722.1
## [157] KI270723.1 KI270724.1 KI270725.1 KI270726.1 KI270727.1 KI270728.1
## [163] KI270729.1 KI270730.1 KI270731.1 KI270732.1 KI270733.1 KI270734.1
## [169] KI270735.1 KI270736.1 KI270737.1 KI270738.1 KI270739.1 KI270740.1
## [175] KI270741.1 KI270742.1 KI270743.1 KI270744.1 KI270745.1 KI270746.1
## [181] KI270747.1 KI270748.1 KI270749.1 KI270750.1 KI270751.1 KI270752.1
## [187] KI270753.1 KI270754.1 KI270755.1 KI270756.1 KI270757.1 MT        
## [193] X          Y         
## 194 Levels: 1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 3 4 5 6 7 8 ... Y

Frequency of factor variables

for (i in c('seqid', 'source', 'type', 'strand', 'phase')){
  
  print(paste("Frequency for", i, ":"))
  
  cat("\n")
  
  print(table(rhs3898gff3[[i]]))

  cat("\n")
  
}
## [1] "Frequency for seqid :"
## 
## 
##          1         10         11         12         13         14         15 
##     266965     109692     169346     164355      48805      99668     107924 
##         16         17         18         19          2         20         21 
##     135858     174091      53030     172147     216479      69131      34510 
##         22          3          4          5          6          7          8 
##      64369     185364     121926     129573     133533     139890     106448 
##          9 GL000008.2 GL000009.2 GL000194.1 GL000195.1 GL000205.2 GL000208.1 
##     104055          4          9         34         26         20          1 
## GL000213.1 GL000214.1 GL000216.2 GL000218.1 GL000219.1 GL000220.1 GL000221.1 
##         71          4         23         14         24        408          3 
## GL000224.1 GL000225.1 GL000226.1 KI270302.1 KI270303.1 KI270304.1 KI270305.1 
##          7         58          1          1          1          1          1 
## KI270310.1 KI270311.1 KI270312.1 KI270315.1 KI270316.1 KI270317.1 KI270320.1 
##          1          1          1          1          1          1          1 
## KI270322.1 KI270329.1 KI270330.1 KI270333.1 KI270334.1 KI270335.1 KI270336.1 
##          1          1          1          1          1          1          1 
## KI270337.1 KI270338.1 KI270340.1 KI270362.1 KI270363.1 KI270364.1 KI270366.1 
##          1          1          1          1          1          1          1 
## KI270371.1 KI270372.1 KI270373.1 KI270374.1 KI270375.1 KI270376.1 KI270378.1 
##          1          1          1          1          1          1          1 
## KI270379.1 KI270381.1 KI270382.1 KI270383.1 KI270384.1 KI270385.1 KI270386.1 
##          1          1          1          1          1          1          1 
## KI270387.1 KI270388.1 KI270389.1 KI270390.1 KI270391.1 KI270392.1 KI270393.1 
##          1          1          1          1          1          1          1 
## KI270394.1 KI270395.1 KI270396.1 KI270411.1 KI270412.1 KI270414.1 KI270417.1 
##          1          1          1          1          1          1          1 
## KI270418.1 KI270419.1 KI270420.1 KI270422.1 KI270423.1 KI270424.1 KI270425.1 
##          1          1          1          1          1          1          1 
## KI270429.1 KI270435.1 KI270438.1 KI270442.1 KI270448.1 KI270465.1 KI270466.1 
##          1          1          1         25          1          1          1 
## KI270467.1 KI270468.1 KI270507.1 KI270508.1 KI270509.1 KI270510.1 KI270511.1 
##          1          1          1          1          1          1          1 
## KI270512.1 KI270515.1 KI270516.1 KI270517.1 KI270518.1 KI270519.1 KI270521.1 
##          1          1          1          1          1          1          1 
## KI270522.1 KI270528.1 KI270529.1 KI270530.1 KI270538.1 KI270539.1 KI270544.1 
##          1          1          1          1          1          2          1 
## KI270548.1 KI270579.1 KI270580.1 KI270581.1 KI270582.1 KI270583.1 KI270584.1 
##          1          1          1          1          1          1          1 
## KI270587.1 KI270588.1 KI270589.1 KI270590.1 KI270591.1 KI270593.1 KI270706.1 
##          1          1          1          1          1          1          4 
## KI270707.1 KI270708.1 KI270709.1 KI270710.1 KI270711.1 KI270712.1 KI270713.1 
##          1          3         46          1        148        353         26 
## KI270714.1 KI270715.1 KI270716.1 KI270717.1 KI270718.1 KI270719.1 KI270720.1 
##         35          1          1         90          1          9          1 
## KI270721.1 KI270722.1 KI270723.1 KI270724.1 KI270725.1 KI270726.1 KI270727.1 
##         56          4         10         11        103         18        112 
## KI270728.1 KI270729.1 KI270730.1 KI270731.1 KI270732.1 KI270733.1 KI270734.1 
##        368         63          1         28         48        422        118 
## KI270735.1 KI270736.1 KI270737.1 KI270738.1 KI270739.1 KI270740.1 KI270741.1 
##         21          1          1         19          1          1         10 
## KI270742.1 KI270743.1 KI270744.1 KI270745.1 KI270746.1 KI270747.1 KI270748.1 
##          4          8         18          3          1         45          2 
## KI270749.1 KI270750.1 KI270751.1 KI270752.1 KI270753.1 KI270754.1 KI270755.1 
##        144          7          3          4         19         22          1 
## KI270756.1 KI270757.1         MT          X          Y 
##          1          1        126      92643       7906 
## 
## [1] "Frequency for source :"
## 
## 
##               ensembl               Ensembl        ensembl_havana 
##                223622                   194                637206 
## ensembl_havana_tagene                havana         havana_tagene 
##                    87               1755580                106213 
##                 insdc               mirbase 
##                    37                  5637 
## 
## [1] "Frequency for type :"
## 
## 
##           biological_region              C_gene_segment 
##                      182510                          29 
##                         CDS                  chromosome 
##                      762023                          25 
##              D_gene_segment                        exon 
##                          41                     1371695 
##              five_prime_UTR                        gene 
##                      152699                       21487 
##              J_gene_segment                     lnc_RNA 
##                          97                      103513 
##                       miRNA                        mRNA 
##                        1879                       99916 
##                       ncRNA                  ncRNA_gene 
##                        2235                       23934 
##                  pseudogene      pseudogenic_transcript 
##                       15202                       15251 
##                        rRNA                    scaffold 
##                          60                         169 
##                       scRNA                      snoRNA 
##                          50                         954 
##                       snRNA             three_prime_UTR 
##                        1915                      153974 
##                        tRNA      unconfirmed_transcript 
##                          22                        1155 
##              V_gene_segment vaultRNA_primary_transcript 
##                         250                           1 
## 
## [1] "Frequency for strand :"
## 
## 
##       -       + 
## 1415171 1473087 
## 
## [1] "Frequency for phase :"
## 
## 
##      0      1      2 
## 395573 148733 217717

Much of the information is present in the attribute column

First 50 rows

head(rhs3898gff3$attribute, n=50)
##  [1] "ID=chromosome:1;Alias=CM000663.2,chr1,NC_000001.11"                                                                                                                                                                              
##  [2] "external_name=oe %3D 0.79;logic_name=cpg"                                                                                                                                                                                        
##  [3] "logic_name=eponine"                                                                                                                                                                                                              
##  [4] "logic_name=eponine"                                                                                                                                                                                                              
##  [5] "logic_name=eponine"                                                                                                                                                                                                              
##  [6] "logic_name=eponine"                                                                                                                                                                                                              
##  [7] "logic_name=eponine"                                                                                                                                                                                                              
##  [8] "logic_name=eponine"                                                                                                                                                                                                              
##  [9] "logic_name=eponine"                                                                                                                                                                                                              
## [10] "logic_name=eponine"                                                                                                                                                                                                              
## [11] "logic_name=eponine"                                                                                                                                                                                                              
## [12] "logic_name=eponine"                                                                                                                                                                                                              
## [13] "logic_name=eponine"                                                                                                                                                                                                              
## [14] "logic_name=eponine"                                                                                                                                                                                                              
## [15] "logic_name=eponine"                                                                                                                                                                                                              
## [16] "logic_name=eponine"                                                                                                                                                                                                              
## [17] "ID=gene:ENSG00000223972;Name=DDX11L1;biotype=transcribed_unprocessed_pseudogene;description=DEAD/H-box helicase 11 like 1 [Source:HGNC Symbol%3BAcc:HGNC:37102];gene_id=ENSG00000223972;logic_name=havana_homo_sapiens;version=5"
## [18] "ID=transcript:ENST00000456328;Parent=gene:ENSG00000223972;Name=DDX11L1-202;biotype=lncRNA;tag=basic;transcript_id=ENST00000456328;transcript_support_level=1;version=2"                                                          
## [19] "Parent=transcript:ENST00000456328;Name=ENSE00002234944;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002234944;rank=1;version=1"                                                                            
## [20] "Parent=transcript:ENST00000456328;Name=ENSE00003582793;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003582793;rank=2;version=1"                                                                            
## [21] "Parent=transcript:ENST00000456328;Name=ENSE00002312635;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002312635;rank=3;version=1"                                                                            
## [22] "ID=transcript:ENST00000450305;Parent=gene:ENSG00000223972;Name=DDX11L1-201;biotype=transcribed_unprocessed_pseudogene;tag=basic;transcript_id=ENST00000450305;transcript_support_level=NA;version=2"                             
## [23] "Parent=transcript:ENST00000450305;Name=ENSE00001948541;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001948541;rank=1;version=1"                                                                            
## [24] "Parent=transcript:ENST00000450305;Name=ENSE00001671638;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001671638;rank=2;version=2"                                                                            
## [25] "Parent=transcript:ENST00000450305;Name=ENSE00001758273;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001758273;rank=3;version=2"                                                                            
## [26] "Parent=transcript:ENST00000450305;Name=ENSE00001799933;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001799933;rank=4;version=2"                                                                            
## [27] "Parent=transcript:ENST00000450305;Name=ENSE00001746346;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001746346;rank=5;version=2"                                                                            
## [28] "Parent=transcript:ENST00000450305;Name=ENSE00001863096;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001863096;rank=6;version=1"                                                                            
## [29] "ID=gene:ENSG00000227232;Name=WASH7P;biotype=unprocessed_pseudogene;description=WASP family homolog 7%2C pseudogene [Source:HGNC Symbol%3BAcc:HGNC:38034];gene_id=ENSG00000227232;logic_name=havana_homo_sapiens;version=5"       
## [30] "ID=transcript:ENST00000488147;Parent=gene:ENSG00000227232;Name=WASH7P-201;biotype=unprocessed_pseudogene;tag=basic;transcript_id=ENST00000488147;transcript_support_level=NA;version=1"                                          
## [31] "Parent=transcript:ENST00000488147;Name=ENSE00001843071;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001843071;rank=11;version=1"                                                                           
## [32] "Parent=transcript:ENST00000488147;Name=ENSE00001935574;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001935574;rank=10;version=1"                                                                           
## [33] "Parent=transcript:ENST00000488147;Name=ENSE00002030414;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002030414;rank=9;version=1"                                                                            
## [34] "Parent=transcript:ENST00000488147;Name=ENSE00003621279;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003621279;rank=8;version=1"                                                                            
## [35] "Parent=transcript:ENST00000488147;Name=ENSE00003553898;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003553898;rank=7;version=1"                                                                            
## [36] "Parent=transcript:ENST00000488147;Name=ENSE00003502542;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003502542;rank=6;version=1"                                                                            
## [37] "Parent=transcript:ENST00000488147;Name=ENSE00003475637;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003475637;rank=5;version=1"                                                                            
## [38] "Parent=transcript:ENST00000488147;Name=ENSE00003565697;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003565697;rank=4;version=1"                                                                            
## [39] "Parent=transcript:ENST00000488147;Name=ENSE00003477500;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003477500;rank=3;version=1"                                                                            
## [40] "Parent=transcript:ENST00000488147;Name=ENSE00003507205;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003507205;rank=2;version=1"                                                                            
## [41] "Parent=transcript:ENST00000488147;Name=ENSE00001890219;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001890219;rank=1;version=1"                                                                            
## [42] "external_name=rank %3D 1;logic_name=firstef"                                                                                                                                                                                     
## [43] "ID=gene:ENSG00000278267;Name=MIR6859-1;biotype=miRNA;description=microRNA 6859-1 [Source:HGNC Symbol%3BAcc:HGNC:50039];gene_id=ENSG00000278267;logic_name=ncrna_homo_sapiens;version=1"                                          
## [44] "ID=transcript:ENST00000619216;Parent=gene:ENSG00000278267;Name=MIR6859-1-201;biotype=miRNA;tag=basic;transcript_id=ENST00000619216;transcript_support_level=NA;version=1"                                                        
## [45] "Parent=transcript:ENST00000619216;Name=ENSE00003746039;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003746039;rank=1;version=1"                                                                            
## [46] "external_name=oe %3D 0.88;logic_name=cpg"                                                                                                                                                                                        
## [47] "logic_name=eponine"                                                                                                                                                                                                              
## [48] "external_name=rank %3D 1;logic_name=firstef"                                                                                                                                                                                     
## [49] "external_name=rank %3D 1;logic_name=firstef"                                                                                                                                                                                     
## [50] "logic_name=eponine"

Sort the attribute column to glimpse the head and the tail

# first 18 rows following sorting

str_sort(rhs3898gff3$attribute) %>% head(n=18)
##  [1] "external_name=Ala;logic_name=trnascan"
##  [2] "external_name=Ala;logic_name=trnascan"
##  [3] "external_name=Ala;logic_name=trnascan"
##  [4] "external_name=Ala;logic_name=trnascan"
##  [5] "external_name=Ala;logic_name=trnascan"
##  [6] "external_name=Ala;logic_name=trnascan"
##  [7] "external_name=Ala;logic_name=trnascan"
##  [8] "external_name=Ala;logic_name=trnascan"
##  [9] "external_name=Ala;logic_name=trnascan"
## [10] "external_name=Ala;logic_name=trnascan"
## [11] "external_name=Ala;logic_name=trnascan"
## [12] "external_name=Ala;logic_name=trnascan"
## [13] "external_name=Ala;logic_name=trnascan"
## [14] "external_name=Ala;logic_name=trnascan"
## [15] "external_name=Ala;logic_name=trnascan"
## [16] "external_name=Ala;logic_name=trnascan"
## [17] "external_name=Ala;logic_name=trnascan"
## [18] "external_name=Ala;logic_name=trnascan"
# last 18 rows following sorting

str_sort(rhs3898gff3$attribute) %>% tail(n=18)
##  [1] "Parent=transcript:ENST00000673621"                                                                                                                   
##  [2] "Parent=transcript:ENST00000673621;Name=ENSE00001867645;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00001867645;rank=1;version=1"
##  [3] "Parent=transcript:ENST00000673621;Name=ENSE00003463478;constitutive=0;ensembl_end_phase=0;ensembl_phase=0;exon_id=ENSE00003463478;rank=11;version=1" 
##  [4] "Parent=transcript:ENST00000673621;Name=ENSE00003467356;constitutive=0;ensembl_end_phase=0;ensembl_phase=1;exon_id=ENSE00003467356;rank=4;version=1"  
##  [5] "Parent=transcript:ENST00000673621;Name=ENSE00003474306;constitutive=0;ensembl_end_phase=2;ensembl_phase=2;exon_id=ENSE00003474306;rank=13;version=1" 
##  [6] "Parent=transcript:ENST00000673621;Name=ENSE00003504263;constitutive=0;ensembl_end_phase=1;ensembl_phase=1;exon_id=ENSE00003504263;rank=3;version=1"  
##  [7] "Parent=transcript:ENST00000673621;Name=ENSE00003531718;constitutive=0;ensembl_end_phase=0;ensembl_phase=0;exon_id=ENSE00003531718;rank=5;version=1"  
##  [8] "Parent=transcript:ENST00000673621;Name=ENSE00003590938;constitutive=0;ensembl_end_phase=1;ensembl_phase=1;exon_id=ENSE00003590938;rank=15;version=1" 
##  [9] "Parent=transcript:ENST00000673621;Name=ENSE00003629402;constitutive=0;ensembl_end_phase=0;ensembl_phase=0;exon_id=ENSE00003629402;rank=10;version=1" 
## [10] "Parent=transcript:ENST00000673621;Name=ENSE00003633264;constitutive=0;ensembl_end_phase=1;ensembl_phase=0;exon_id=ENSE00003633264;rank=8;version=1"  
## [11] "Parent=transcript:ENST00000673621;Name=ENSE00003641299;constitutive=0;ensembl_end_phase=0;ensembl_phase=1;exon_id=ENSE00003641299;rank=9;version=1"  
## [12] "Parent=transcript:ENST00000673621;Name=ENSE00003652089;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=ENSE00003652089;rank=6;version=1"  
## [13] "Parent=transcript:ENST00000673621;Name=ENSE00003655053;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=ENSE00003655053;rank=12;version=1" 
## [14] "Parent=transcript:ENST00000673621;Name=ENSE00003655244;constitutive=0;ensembl_end_phase=1;ensembl_phase=2;exon_id=ENSE00003655244;rank=14;version=1" 
## [15] "Parent=transcript:ENST00000673621;Name=ENSE00003678507;constitutive=0;ensembl_end_phase=2;ensembl_phase=1;exon_id=ENSE00003678507;rank=16;version=1" 
## [16] "Parent=transcript:ENST00000673621;Name=ENSE00003694154;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=ENSE00003694154;rank=7;version=1"  
## [17] "Parent=transcript:ENST00000673621;Name=ENSE00003895587;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=ENSE00003895587;rank=2;version=1" 
## [18] "Parent=transcript:ENST00000673621;Name=ENSE00003895808;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=ENSE00003895808;rank=17;version=1"

Detect the presence of the word/character “nicotinic” in the attribute column

# how many occurrences of the word "nicotinic"

str_detect(rhs3898gff3$attribute, "nicotinic") %>% sum()
## [1] 19
# subset the rows with the word "nicotinic" from the attribute column

nicotinic_str <- str_subset(rhs3898gff3$attribute, "nicotinic")

nicotinic_str
##  [1] "ID=gene:ENSG00000160716;Name=CHRNB2;biotype=protein_coding;description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962];gene_id=ENSG00000160716;logic_name=ensembl_havana_gene_homo_sapiens;version=6"    
##  [2] "ID=gene:ENSG00000129749;Name=CHRNA10;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800];gene_id=ENSG00000129749;logic_name=ensembl_havana_gene_homo_sapiens;version=3"
##  [3] "ID=gene:ENSG00000261561;Name=AC091304.5;biotype=unprocessed_pseudogene;description=cholinergic receptor%2C nicotinic%2C beta 4 (CHRNB4) pseudogene;gene_id=ENSG00000261561;logic_name=havana_homo_sapiens;version=1"                        
##  [4] "ID=gene:ENSG00000260444;Name=AC138749.2;biotype=unprocessed_pseudogene;description=cholinergic receptor%2C nicotinic%2C beta 4 (CHRNB4) pseudogene;gene_id=ENSG00000260444;logic_name=havana_homo_sapiens;version=1"                        
##  [5] "ID=gene:ENSG00000175344;Name=CHRNA7;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960];gene_id=ENSG00000175344;logic_name=ensembl_havana_gene_homo_sapiens;version=18"  
##  [6] "ID=gene:ENSG00000169684;Name=CHRNA5;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959];gene_id=ENSG00000169684;logic_name=ensembl_havana_gene_homo_sapiens;version=13"  
##  [7] "ID=gene:ENSG00000080644;Name=CHRNA3;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957];gene_id=ENSG00000080644;logic_name=ensembl_havana_gene_homo_sapiens;version=16"  
##  [8] "ID=gene:ENSG00000117971;Name=CHRNB4;biotype=protein_coding;description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964];gene_id=ENSG00000117971;logic_name=ensembl_havana_gene_homo_sapiens;version=12"   
##  [9] "ID=gene:ENSG00000108556;Name=CHRNE;biotype=protein_coding;description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966];gene_id=ENSG00000108556;logic_name=ensembl_havana_gene_homo_sapiens;version=10"   
## [10] "ID=gene:ENSG00000170175;Name=CHRNB1;biotype=protein_coding;description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961];gene_id=ENSG00000170175;logic_name=ensembl_havana_gene_homo_sapiens;version=11"   
## [11] "ID=gene:ENSG00000264845;Name=AC119868.1;biotype=transcribed_processed_pseudogene;description=cholinergic receptor%2C nicotinic%2C epsilon (CHRNE) pseudogene;gene_id=ENSG00000264845;logic_name=havana_homo_sapiens;version=2"              
## [12] "ID=gene:ENSG00000138435;Name=CHRNA1;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955];gene_id=ENSG00000138435;logic_name=ensembl_havana_gene_homo_sapiens;version=16"  
## [13] "ID=gene:ENSG00000135902;Name=CHRND;biotype=protein_coding;description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965];gene_id=ENSG00000135902;logic_name=ensembl_havana_gene_homo_sapiens;version=10"     
## [14] "ID=gene:ENSG00000196811;Name=CHRNG;biotype=protein_coding;description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967];gene_id=ENSG00000196811;logic_name=ensembl_havana_gene_homo_sapiens;version=13"     
## [15] "ID=gene:ENSG00000101204;Name=CHRNA4;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958];gene_id=ENSG00000101204;logic_name=ensembl_havana_gene_homo_sapiens;version=17"  
## [16] "ID=gene:ENSG00000174343;Name=CHRNA9;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079];gene_id=ENSG00000174343;logic_name=ensembl_havana_gene_homo_sapiens;version=6"  
## [17] "ID=gene:ENSG00000120903;Name=CHRNA2;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956];gene_id=ENSG00000120903;logic_name=ensembl_havana_gene_homo_sapiens;version=13"  
## [18] "ID=gene:ENSG00000147432;Name=CHRNB3;biotype=protein_coding;description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963];gene_id=ENSG00000147432;logic_name=ensembl_havana_gene_homo_sapiens;version=7"    
## [19] "ID=gene:ENSG00000147434;Name=CHRNA6;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963];gene_id=ENSG00000147434;logic_name=ensembl_havana_gene_homo_sapiens;version=8"
# view "nicotinic" in the attribute column

str_view_all(nicotinic_str, "nicotinic")

Subset the rows from the data frame where attribute column contains the word “nicotinc”

# output also contains pseudogene

nicotinic <- rhs3898gff3 %>% filter(str_detect(rhs3898gff3$attribute, "nicotinic"))

nicotinic
##         seqid         source       type     start       end score strand phase
## 171564      1 ensembl_havana       gene 154567778 154580013    NA      +  <NA>
## 390056     11 ensembl_havana       gene   3665587   3671384    NA      -  <NA>
## 868251     15         havana pseudogene  28346490  28346647    NA      +  <NA>
## 868591     15         havana pseudogene  28564986  28565152    NA      -  <NA>
## 871930     15 ensembl_havana       gene  31923438  32173018    NA      +  <NA>
## 941990     15 ensembl_havana       gene  78565520  78595269    NA      +  <NA>
## 942046     15 ensembl_havana       gene  78593052  78621295    NA      -  <NA>
## 942128     15 ensembl_havana       gene  78624111  78727754    NA      -  <NA>
## 1115037    17 ensembl_havana       gene   4897771   4934438    NA      -  <NA>
## 1124543    17 ensembl_havana       gene   7445061   7457710    NA      +  <NA>
## 1323732    18         havana pseudogene  69469517  69471736    NA      +  <NA>
## 1646403     2 ensembl_havana       gene 174747592 174787935    NA      -  <NA>
## 1701797     2 ensembl_havana       gene 232525993 232536667    NA      +  <NA>
## 1701932     2 ensembl_havana       gene 232539692 232548115    NA      +  <NA>
## 1783481    20 ensembl_havana       gene  63343223  63378401    NA      -  <NA>
## 2100056     4 ensembl_havana       gene  40335333  40355217    NA      +  <NA>
## 2619636     8 ensembl_havana       gene  27459756  27479883    NA      -  <NA>
## 2634316     8 ensembl_havana       gene  42697366  42737407    NA      +  <NA>
## 2634355     8 ensembl_havana       gene  42752620  42796392    NA      -  <NA>
##                                                                                                                                                                                                                                          attributes
## 171564      ID=gene:ENSG00000160716;Name=CHRNB2;biotype=protein_coding;description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962];gene_id=ENSG00000160716;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 390056  ID=gene:ENSG00000129749;Name=CHRNA10;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800];gene_id=ENSG00000129749;logic_name=ensembl_havana_gene_homo_sapiens;version=3
## 868251                          ID=gene:ENSG00000261561;Name=AC091304.5;biotype=unprocessed_pseudogene;description=cholinergic receptor%2C nicotinic%2C beta 4 (CHRNB4) pseudogene;gene_id=ENSG00000261561;logic_name=havana_homo_sapiens;version=1
## 868591                          ID=gene:ENSG00000260444;Name=AC138749.2;biotype=unprocessed_pseudogene;description=cholinergic receptor%2C nicotinic%2C beta 4 (CHRNB4) pseudogene;gene_id=ENSG00000260444;logic_name=havana_homo_sapiens;version=1
## 871930    ID=gene:ENSG00000175344;Name=CHRNA7;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960];gene_id=ENSG00000175344;logic_name=ensembl_havana_gene_homo_sapiens;version=18
## 941990    ID=gene:ENSG00000169684;Name=CHRNA5;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959];gene_id=ENSG00000169684;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 942046    ID=gene:ENSG00000080644;Name=CHRNA3;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957];gene_id=ENSG00000080644;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 942128     ID=gene:ENSG00000117971;Name=CHRNB4;biotype=protein_coding;description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964];gene_id=ENSG00000117971;logic_name=ensembl_havana_gene_homo_sapiens;version=12
## 1115037    ID=gene:ENSG00000108556;Name=CHRNE;biotype=protein_coding;description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966];gene_id=ENSG00000108556;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1124543    ID=gene:ENSG00000170175;Name=CHRNB1;biotype=protein_coding;description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961];gene_id=ENSG00000170175;logic_name=ensembl_havana_gene_homo_sapiens;version=11
## 1323732               ID=gene:ENSG00000264845;Name=AC119868.1;biotype=transcribed_processed_pseudogene;description=cholinergic receptor%2C nicotinic%2C epsilon (CHRNE) pseudogene;gene_id=ENSG00000264845;logic_name=havana_homo_sapiens;version=2
## 1646403   ID=gene:ENSG00000138435;Name=CHRNA1;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955];gene_id=ENSG00000138435;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 1701797      ID=gene:ENSG00000135902;Name=CHRND;biotype=protein_coding;description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965];gene_id=ENSG00000135902;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1701932      ID=gene:ENSG00000196811;Name=CHRNG;biotype=protein_coding;description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967];gene_id=ENSG00000196811;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 1783481   ID=gene:ENSG00000101204;Name=CHRNA4;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958];gene_id=ENSG00000101204;logic_name=ensembl_havana_gene_homo_sapiens;version=17
## 2100056   ID=gene:ENSG00000174343;Name=CHRNA9;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079];gene_id=ENSG00000174343;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 2619636   ID=gene:ENSG00000120903;Name=CHRNA2;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956];gene_id=ENSG00000120903;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 2634316     ID=gene:ENSG00000147432;Name=CHRNB3;biotype=protein_coding;description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963];gene_id=ENSG00000147432;logic_name=ensembl_havana_gene_homo_sapiens;version=7
## 2634355   ID=gene:ENSG00000147434;Name=CHRNA6;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963];gene_id=ENSG00000147434;logic_name=ensembl_havana_gene_homo_sapiens;version=8
# alternative syntax using grepl and square brackets also yield the same result

rhs3898gff3[grepl(pattern = "nicotinic", rhs3898gff3$attribute), ]
##         seqid         source       type     start       end score strand phase
## 171564      1 ensembl_havana       gene 154567778 154580013    NA      +  <NA>
## 390056     11 ensembl_havana       gene   3665587   3671384    NA      -  <NA>
## 868251     15         havana pseudogene  28346490  28346647    NA      +  <NA>
## 868591     15         havana pseudogene  28564986  28565152    NA      -  <NA>
## 871930     15 ensembl_havana       gene  31923438  32173018    NA      +  <NA>
## 941990     15 ensembl_havana       gene  78565520  78595269    NA      +  <NA>
## 942046     15 ensembl_havana       gene  78593052  78621295    NA      -  <NA>
## 942128     15 ensembl_havana       gene  78624111  78727754    NA      -  <NA>
## 1115037    17 ensembl_havana       gene   4897771   4934438    NA      -  <NA>
## 1124543    17 ensembl_havana       gene   7445061   7457710    NA      +  <NA>
## 1323732    18         havana pseudogene  69469517  69471736    NA      +  <NA>
## 1646403     2 ensembl_havana       gene 174747592 174787935    NA      -  <NA>
## 1701797     2 ensembl_havana       gene 232525993 232536667    NA      +  <NA>
## 1701932     2 ensembl_havana       gene 232539692 232548115    NA      +  <NA>
## 1783481    20 ensembl_havana       gene  63343223  63378401    NA      -  <NA>
## 2100056     4 ensembl_havana       gene  40335333  40355217    NA      +  <NA>
## 2619636     8 ensembl_havana       gene  27459756  27479883    NA      -  <NA>
## 2634316     8 ensembl_havana       gene  42697366  42737407    NA      +  <NA>
## 2634355     8 ensembl_havana       gene  42752620  42796392    NA      -  <NA>
##                                                                                                                                                                                                                                          attributes
## 171564      ID=gene:ENSG00000160716;Name=CHRNB2;biotype=protein_coding;description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962];gene_id=ENSG00000160716;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 390056  ID=gene:ENSG00000129749;Name=CHRNA10;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800];gene_id=ENSG00000129749;logic_name=ensembl_havana_gene_homo_sapiens;version=3
## 868251                          ID=gene:ENSG00000261561;Name=AC091304.5;biotype=unprocessed_pseudogene;description=cholinergic receptor%2C nicotinic%2C beta 4 (CHRNB4) pseudogene;gene_id=ENSG00000261561;logic_name=havana_homo_sapiens;version=1
## 868591                          ID=gene:ENSG00000260444;Name=AC138749.2;biotype=unprocessed_pseudogene;description=cholinergic receptor%2C nicotinic%2C beta 4 (CHRNB4) pseudogene;gene_id=ENSG00000260444;logic_name=havana_homo_sapiens;version=1
## 871930    ID=gene:ENSG00000175344;Name=CHRNA7;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960];gene_id=ENSG00000175344;logic_name=ensembl_havana_gene_homo_sapiens;version=18
## 941990    ID=gene:ENSG00000169684;Name=CHRNA5;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959];gene_id=ENSG00000169684;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 942046    ID=gene:ENSG00000080644;Name=CHRNA3;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957];gene_id=ENSG00000080644;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 942128     ID=gene:ENSG00000117971;Name=CHRNB4;biotype=protein_coding;description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964];gene_id=ENSG00000117971;logic_name=ensembl_havana_gene_homo_sapiens;version=12
## 1115037    ID=gene:ENSG00000108556;Name=CHRNE;biotype=protein_coding;description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966];gene_id=ENSG00000108556;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1124543    ID=gene:ENSG00000170175;Name=CHRNB1;biotype=protein_coding;description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961];gene_id=ENSG00000170175;logic_name=ensembl_havana_gene_homo_sapiens;version=11
## 1323732               ID=gene:ENSG00000264845;Name=AC119868.1;biotype=transcribed_processed_pseudogene;description=cholinergic receptor%2C nicotinic%2C epsilon (CHRNE) pseudogene;gene_id=ENSG00000264845;logic_name=havana_homo_sapiens;version=2
## 1646403   ID=gene:ENSG00000138435;Name=CHRNA1;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955];gene_id=ENSG00000138435;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 1701797      ID=gene:ENSG00000135902;Name=CHRND;biotype=protein_coding;description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965];gene_id=ENSG00000135902;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1701932      ID=gene:ENSG00000196811;Name=CHRNG;biotype=protein_coding;description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967];gene_id=ENSG00000196811;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 1783481   ID=gene:ENSG00000101204;Name=CHRNA4;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958];gene_id=ENSG00000101204;logic_name=ensembl_havana_gene_homo_sapiens;version=17
## 2100056   ID=gene:ENSG00000174343;Name=CHRNA9;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079];gene_id=ENSG00000174343;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 2619636   ID=gene:ENSG00000120903;Name=CHRNA2;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956];gene_id=ENSG00000120903;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 2634316     ID=gene:ENSG00000147432;Name=CHRNB3;biotype=protein_coding;description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963];gene_id=ENSG00000147432;logic_name=ensembl_havana_gene_homo_sapiens;version=7
## 2634355   ID=gene:ENSG00000147434;Name=CHRNA6;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963];gene_id=ENSG00000147434;logic_name=ensembl_havana_gene_homo_sapiens;version=8

Subset the rows from the data frame where type is “gene” and attribute conatins “nicotinc”

nicotinic_gene <-  rhs3898gff3 %>% filter(type=="gene" & str_detect(rhs3898gff3$attribute, "nicotinic"))

nicotinic_gene
##         seqid         source type     start       end score strand phase
## 171564      1 ensembl_havana gene 154567778 154580013    NA      +  <NA>
## 390056     11 ensembl_havana gene   3665587   3671384    NA      -  <NA>
## 871930     15 ensembl_havana gene  31923438  32173018    NA      +  <NA>
## 941990     15 ensembl_havana gene  78565520  78595269    NA      +  <NA>
## 942046     15 ensembl_havana gene  78593052  78621295    NA      -  <NA>
## 942128     15 ensembl_havana gene  78624111  78727754    NA      -  <NA>
## 1115037    17 ensembl_havana gene   4897771   4934438    NA      -  <NA>
## 1124543    17 ensembl_havana gene   7445061   7457710    NA      +  <NA>
## 1646403     2 ensembl_havana gene 174747592 174787935    NA      -  <NA>
## 1701797     2 ensembl_havana gene 232525993 232536667    NA      +  <NA>
## 1701932     2 ensembl_havana gene 232539692 232548115    NA      +  <NA>
## 1783481    20 ensembl_havana gene  63343223  63378401    NA      -  <NA>
## 2100056     4 ensembl_havana gene  40335333  40355217    NA      +  <NA>
## 2619636     8 ensembl_havana gene  27459756  27479883    NA      -  <NA>
## 2634316     8 ensembl_havana gene  42697366  42737407    NA      +  <NA>
## 2634355     8 ensembl_havana gene  42752620  42796392    NA      -  <NA>
##                                                                                                                                                                                                                                          attributes
## 171564      ID=gene:ENSG00000160716;Name=CHRNB2;biotype=protein_coding;description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962];gene_id=ENSG00000160716;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 390056  ID=gene:ENSG00000129749;Name=CHRNA10;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800];gene_id=ENSG00000129749;logic_name=ensembl_havana_gene_homo_sapiens;version=3
## 871930    ID=gene:ENSG00000175344;Name=CHRNA7;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960];gene_id=ENSG00000175344;logic_name=ensembl_havana_gene_homo_sapiens;version=18
## 941990    ID=gene:ENSG00000169684;Name=CHRNA5;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959];gene_id=ENSG00000169684;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 942046    ID=gene:ENSG00000080644;Name=CHRNA3;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957];gene_id=ENSG00000080644;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 942128     ID=gene:ENSG00000117971;Name=CHRNB4;biotype=protein_coding;description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964];gene_id=ENSG00000117971;logic_name=ensembl_havana_gene_homo_sapiens;version=12
## 1115037    ID=gene:ENSG00000108556;Name=CHRNE;biotype=protein_coding;description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966];gene_id=ENSG00000108556;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1124543    ID=gene:ENSG00000170175;Name=CHRNB1;biotype=protein_coding;description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961];gene_id=ENSG00000170175;logic_name=ensembl_havana_gene_homo_sapiens;version=11
## 1646403   ID=gene:ENSG00000138435;Name=CHRNA1;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955];gene_id=ENSG00000138435;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 1701797      ID=gene:ENSG00000135902;Name=CHRND;biotype=protein_coding;description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965];gene_id=ENSG00000135902;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1701932      ID=gene:ENSG00000196811;Name=CHRNG;biotype=protein_coding;description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967];gene_id=ENSG00000196811;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 1783481   ID=gene:ENSG00000101204;Name=CHRNA4;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958];gene_id=ENSG00000101204;logic_name=ensembl_havana_gene_homo_sapiens;version=17
## 2100056   ID=gene:ENSG00000174343;Name=CHRNA9;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079];gene_id=ENSG00000174343;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 2619636   ID=gene:ENSG00000120903;Name=CHRNA2;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956];gene_id=ENSG00000120903;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 2634316     ID=gene:ENSG00000147432;Name=CHRNB3;biotype=protein_coding;description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963];gene_id=ENSG00000147432;logic_name=ensembl_havana_gene_homo_sapiens;version=7
## 2634355   ID=gene:ENSG00000147434;Name=CHRNA6;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963];gene_id=ENSG00000147434;logic_name=ensembl_havana_gene_homo_sapiens;version=8
nicotinic_gene$attributes
##  [1] "ID=gene:ENSG00000160716;Name=CHRNB2;biotype=protein_coding;description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962];gene_id=ENSG00000160716;logic_name=ensembl_havana_gene_homo_sapiens;version=6"    
##  [2] "ID=gene:ENSG00000129749;Name=CHRNA10;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800];gene_id=ENSG00000129749;logic_name=ensembl_havana_gene_homo_sapiens;version=3"
##  [3] "ID=gene:ENSG00000175344;Name=CHRNA7;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960];gene_id=ENSG00000175344;logic_name=ensembl_havana_gene_homo_sapiens;version=18"  
##  [4] "ID=gene:ENSG00000169684;Name=CHRNA5;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959];gene_id=ENSG00000169684;logic_name=ensembl_havana_gene_homo_sapiens;version=13"  
##  [5] "ID=gene:ENSG00000080644;Name=CHRNA3;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957];gene_id=ENSG00000080644;logic_name=ensembl_havana_gene_homo_sapiens;version=16"  
##  [6] "ID=gene:ENSG00000117971;Name=CHRNB4;biotype=protein_coding;description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964];gene_id=ENSG00000117971;logic_name=ensembl_havana_gene_homo_sapiens;version=12"   
##  [7] "ID=gene:ENSG00000108556;Name=CHRNE;biotype=protein_coding;description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966];gene_id=ENSG00000108556;logic_name=ensembl_havana_gene_homo_sapiens;version=10"   
##  [8] "ID=gene:ENSG00000170175;Name=CHRNB1;biotype=protein_coding;description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961];gene_id=ENSG00000170175;logic_name=ensembl_havana_gene_homo_sapiens;version=11"   
##  [9] "ID=gene:ENSG00000138435;Name=CHRNA1;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955];gene_id=ENSG00000138435;logic_name=ensembl_havana_gene_homo_sapiens;version=16"  
## [10] "ID=gene:ENSG00000135902;Name=CHRND;biotype=protein_coding;description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965];gene_id=ENSG00000135902;logic_name=ensembl_havana_gene_homo_sapiens;version=10"     
## [11] "ID=gene:ENSG00000196811;Name=CHRNG;biotype=protein_coding;description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967];gene_id=ENSG00000196811;logic_name=ensembl_havana_gene_homo_sapiens;version=13"     
## [12] "ID=gene:ENSG00000101204;Name=CHRNA4;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958];gene_id=ENSG00000101204;logic_name=ensembl_havana_gene_homo_sapiens;version=17"  
## [13] "ID=gene:ENSG00000174343;Name=CHRNA9;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079];gene_id=ENSG00000174343;logic_name=ensembl_havana_gene_homo_sapiens;version=6"  
## [14] "ID=gene:ENSG00000120903;Name=CHRNA2;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956];gene_id=ENSG00000120903;logic_name=ensembl_havana_gene_homo_sapiens;version=13"  
## [15] "ID=gene:ENSG00000147432;Name=CHRNB3;biotype=protein_coding;description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963];gene_id=ENSG00000147432;logic_name=ensembl_havana_gene_homo_sapiens;version=7"    
## [16] "ID=gene:ENSG00000147434;Name=CHRNA6;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963];gene_id=ENSG00000147434;logic_name=ensembl_havana_gene_homo_sapiens;version=8"

Split the attributes column to extract aditional information

# the attributes column can be split into a list of 7 columns 

nicotinic_gene$attributes %>% str_split(";", simplify = TRUE)
##       [,1]                      [,2]           [,3]                    
##  [1,] "ID=gene:ENSG00000160716" "Name=CHRNB2"  "biotype=protein_coding"
##  [2,] "ID=gene:ENSG00000129749" "Name=CHRNA10" "biotype=protein_coding"
##  [3,] "ID=gene:ENSG00000175344" "Name=CHRNA7"  "biotype=protein_coding"
##  [4,] "ID=gene:ENSG00000169684" "Name=CHRNA5"  "biotype=protein_coding"
##  [5,] "ID=gene:ENSG00000080644" "Name=CHRNA3"  "biotype=protein_coding"
##  [6,] "ID=gene:ENSG00000117971" "Name=CHRNB4"  "biotype=protein_coding"
##  [7,] "ID=gene:ENSG00000108556" "Name=CHRNE"   "biotype=protein_coding"
##  [8,] "ID=gene:ENSG00000170175" "Name=CHRNB1"  "biotype=protein_coding"
##  [9,] "ID=gene:ENSG00000138435" "Name=CHRNA1"  "biotype=protein_coding"
## [10,] "ID=gene:ENSG00000135902" "Name=CHRND"   "biotype=protein_coding"
## [11,] "ID=gene:ENSG00000196811" "Name=CHRNG"   "biotype=protein_coding"
## [12,] "ID=gene:ENSG00000101204" "Name=CHRNA4"  "biotype=protein_coding"
## [13,] "ID=gene:ENSG00000174343" "Name=CHRNA9"  "biotype=protein_coding"
## [14,] "ID=gene:ENSG00000120903" "Name=CHRNA2"  "biotype=protein_coding"
## [15,] "ID=gene:ENSG00000147432" "Name=CHRNB3"  "biotype=protein_coding"
## [16,] "ID=gene:ENSG00000147434" "Name=CHRNA6"  "biotype=protein_coding"
##       [,4]                                                                                               
##  [1,] "description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962]"   
##  [2,] "description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800]"
##  [3,] "description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960]"  
##  [4,] "description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959]"  
##  [5,] "description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957]"  
##  [6,] "description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964]"   
##  [7,] "description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966]"  
##  [8,] "description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961]"   
##  [9,] "description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955]"  
## [10,] "description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965]"    
## [11,] "description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967]"    
## [12,] "description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958]"  
## [13,] "description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079]" 
## [14,] "description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956]"  
## [15,] "description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963]"   
## [16,] "description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963]" 
##       [,5]                      [,6]                                         
##  [1,] "gene_id=ENSG00000160716" "logic_name=ensembl_havana_gene_homo_sapiens"
##  [2,] "gene_id=ENSG00000129749" "logic_name=ensembl_havana_gene_homo_sapiens"
##  [3,] "gene_id=ENSG00000175344" "logic_name=ensembl_havana_gene_homo_sapiens"
##  [4,] "gene_id=ENSG00000169684" "logic_name=ensembl_havana_gene_homo_sapiens"
##  [5,] "gene_id=ENSG00000080644" "logic_name=ensembl_havana_gene_homo_sapiens"
##  [6,] "gene_id=ENSG00000117971" "logic_name=ensembl_havana_gene_homo_sapiens"
##  [7,] "gene_id=ENSG00000108556" "logic_name=ensembl_havana_gene_homo_sapiens"
##  [8,] "gene_id=ENSG00000170175" "logic_name=ensembl_havana_gene_homo_sapiens"
##  [9,] "gene_id=ENSG00000138435" "logic_name=ensembl_havana_gene_homo_sapiens"
## [10,] "gene_id=ENSG00000135902" "logic_name=ensembl_havana_gene_homo_sapiens"
## [11,] "gene_id=ENSG00000196811" "logic_name=ensembl_havana_gene_homo_sapiens"
## [12,] "gene_id=ENSG00000101204" "logic_name=ensembl_havana_gene_homo_sapiens"
## [13,] "gene_id=ENSG00000174343" "logic_name=ensembl_havana_gene_homo_sapiens"
## [14,] "gene_id=ENSG00000120903" "logic_name=ensembl_havana_gene_homo_sapiens"
## [15,] "gene_id=ENSG00000147432" "logic_name=ensembl_havana_gene_homo_sapiens"
## [16,] "gene_id=ENSG00000147434" "logic_name=ensembl_havana_gene_homo_sapiens"
##       [,7]        
##  [1,] "version=6" 
##  [2,] "version=3" 
##  [3,] "version=18"
##  [4,] "version=13"
##  [5,] "version=16"
##  [6,] "version=12"
##  [7,] "version=10"
##  [8,] "version=11"
##  [9,] "version=16"
## [10,] "version=10"
## [11,] "version=13"
## [12,] "version=17"
## [13,] "version=6" 
## [14,] "version=13"
## [15,] "version=7" 
## [16,] "version=8"
# split and assign to a list

nic_attr_list <- nicotinic_gene$attributes %>% str_split(";")
nic_attr_list
## [[1]]
## [1] "ID=gene:ENSG00000160716"                                                                       
## [2] "Name=CHRNB2"                                                                                   
## [3] "biotype=protein_coding"                                                                        
## [4] "description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962]"
## [5] "gene_id=ENSG00000160716"                                                                       
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                   
## [7] "version=6"                                                                                     
## 
## [[2]]
## [1] "ID=gene:ENSG00000129749"                                                                          
## [2] "Name=CHRNA10"                                                                                     
## [3] "biotype=protein_coding"                                                                           
## [4] "description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800]"
## [5] "gene_id=ENSG00000129749"                                                                          
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                      
## [7] "version=3"                                                                                        
## 
## [[3]]
## [1] "ID=gene:ENSG00000175344"                                                                        
## [2] "Name=CHRNA7"                                                                                    
## [3] "biotype=protein_coding"                                                                         
## [4] "description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960]"
## [5] "gene_id=ENSG00000175344"                                                                        
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                    
## [7] "version=18"                                                                                     
## 
## [[4]]
## [1] "ID=gene:ENSG00000169684"                                                                        
## [2] "Name=CHRNA5"                                                                                    
## [3] "biotype=protein_coding"                                                                         
## [4] "description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959]"
## [5] "gene_id=ENSG00000169684"                                                                        
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                    
## [7] "version=13"                                                                                     
## 
## [[5]]
## [1] "ID=gene:ENSG00000080644"                                                                        
## [2] "Name=CHRNA3"                                                                                    
## [3] "biotype=protein_coding"                                                                         
## [4] "description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957]"
## [5] "gene_id=ENSG00000080644"                                                                        
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                    
## [7] "version=16"                                                                                     
## 
## [[6]]
## [1] "ID=gene:ENSG00000117971"                                                                       
## [2] "Name=CHRNB4"                                                                                   
## [3] "biotype=protein_coding"                                                                        
## [4] "description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964]"
## [5] "gene_id=ENSG00000117971"                                                                       
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                   
## [7] "version=12"                                                                                    
## 
## [[7]]
## [1] "ID=gene:ENSG00000108556"                                                                        
## [2] "Name=CHRNE"                                                                                     
## [3] "biotype=protein_coding"                                                                         
## [4] "description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966]"
## [5] "gene_id=ENSG00000108556"                                                                        
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                    
## [7] "version=10"                                                                                     
## 
## [[8]]
## [1] "ID=gene:ENSG00000170175"                                                                       
## [2] "Name=CHRNB1"                                                                                   
## [3] "biotype=protein_coding"                                                                        
## [4] "description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961]"
## [5] "gene_id=ENSG00000170175"                                                                       
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                   
## [7] "version=11"                                                                                    
## 
## [[9]]
## [1] "ID=gene:ENSG00000138435"                                                                        
## [2] "Name=CHRNA1"                                                                                    
## [3] "biotype=protein_coding"                                                                         
## [4] "description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955]"
## [5] "gene_id=ENSG00000138435"                                                                        
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                    
## [7] "version=16"                                                                                     
## 
## [[10]]
## [1] "ID=gene:ENSG00000135902"                                                                      
## [2] "Name=CHRND"                                                                                   
## [3] "biotype=protein_coding"                                                                       
## [4] "description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965]"
## [5] "gene_id=ENSG00000135902"                                                                      
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                  
## [7] "version=10"                                                                                   
## 
## [[11]]
## [1] "ID=gene:ENSG00000196811"                                                                      
## [2] "Name=CHRNG"                                                                                   
## [3] "biotype=protein_coding"                                                                       
## [4] "description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967]"
## [5] "gene_id=ENSG00000196811"                                                                      
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                  
## [7] "version=13"                                                                                   
## 
## [[12]]
## [1] "ID=gene:ENSG00000101204"                                                                        
## [2] "Name=CHRNA4"                                                                                    
## [3] "biotype=protein_coding"                                                                         
## [4] "description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958]"
## [5] "gene_id=ENSG00000101204"                                                                        
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                    
## [7] "version=17"                                                                                     
## 
## [[13]]
## [1] "ID=gene:ENSG00000174343"                                                                         
## [2] "Name=CHRNA9"                                                                                     
## [3] "biotype=protein_coding"                                                                          
## [4] "description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079]"
## [5] "gene_id=ENSG00000174343"                                                                         
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                     
## [7] "version=6"                                                                                       
## 
## [[14]]
## [1] "ID=gene:ENSG00000120903"                                                                        
## [2] "Name=CHRNA2"                                                                                    
## [3] "biotype=protein_coding"                                                                         
## [4] "description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956]"
## [5] "gene_id=ENSG00000120903"                                                                        
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                    
## [7] "version=13"                                                                                     
## 
## [[15]]
## [1] "ID=gene:ENSG00000147432"                                                                       
## [2] "Name=CHRNB3"                                                                                   
## [3] "biotype=protein_coding"                                                                        
## [4] "description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963]"
## [5] "gene_id=ENSG00000147432"                                                                       
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                   
## [7] "version=7"                                                                                     
## 
## [[16]]
## [1] "ID=gene:ENSG00000147434"                                                                         
## [2] "Name=CHRNA6"                                                                                     
## [3] "biotype=protein_coding"                                                                          
## [4] "description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963]"
## [5] "gene_id=ENSG00000147434"                                                                         
## [6] "logic_name=ensembl_havana_gene_homo_sapiens"                                                     
## [7] "version=8"
# convert the list to a data frame and assign column names

nic_attr_df <- do.call(rbind.data.frame, nic_attr_list)
colnames(nic_attr_df) <- c("ID", "Name", "biotype", "description", "gene_id", "logic_name", "version")
nic_attr_df
##                         ID         Name                biotype
## 1  ID=gene:ENSG00000160716  Name=CHRNB2 biotype=protein_coding
## 2  ID=gene:ENSG00000129749 Name=CHRNA10 biotype=protein_coding
## 3  ID=gene:ENSG00000175344  Name=CHRNA7 biotype=protein_coding
## 4  ID=gene:ENSG00000169684  Name=CHRNA5 biotype=protein_coding
## 5  ID=gene:ENSG00000080644  Name=CHRNA3 biotype=protein_coding
## 6  ID=gene:ENSG00000117971  Name=CHRNB4 biotype=protein_coding
## 7  ID=gene:ENSG00000108556   Name=CHRNE biotype=protein_coding
## 8  ID=gene:ENSG00000170175  Name=CHRNB1 biotype=protein_coding
## 9  ID=gene:ENSG00000138435  Name=CHRNA1 biotype=protein_coding
## 10 ID=gene:ENSG00000135902   Name=CHRND biotype=protein_coding
## 11 ID=gene:ENSG00000196811   Name=CHRNG biotype=protein_coding
## 12 ID=gene:ENSG00000101204  Name=CHRNA4 biotype=protein_coding
## 13 ID=gene:ENSG00000174343  Name=CHRNA9 biotype=protein_coding
## 14 ID=gene:ENSG00000120903  Name=CHRNA2 biotype=protein_coding
## 15 ID=gene:ENSG00000147432  Name=CHRNB3 biotype=protein_coding
## 16 ID=gene:ENSG00000147434  Name=CHRNA6 biotype=protein_coding
##                                                                                          description
## 1     description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962]
## 2  description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800]
## 3    description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960]
## 4    description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959]
## 5    description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957]
## 6     description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964]
## 7    description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966]
## 8     description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961]
## 9    description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955]
## 10     description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965]
## 11     description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967]
## 12   description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958]
## 13  description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079]
## 14   description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956]
## 15    description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963]
## 16  description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963]
##                    gene_id                                  logic_name
## 1  gene_id=ENSG00000160716 logic_name=ensembl_havana_gene_homo_sapiens
## 2  gene_id=ENSG00000129749 logic_name=ensembl_havana_gene_homo_sapiens
## 3  gene_id=ENSG00000175344 logic_name=ensembl_havana_gene_homo_sapiens
## 4  gene_id=ENSG00000169684 logic_name=ensembl_havana_gene_homo_sapiens
## 5  gene_id=ENSG00000080644 logic_name=ensembl_havana_gene_homo_sapiens
## 6  gene_id=ENSG00000117971 logic_name=ensembl_havana_gene_homo_sapiens
## 7  gene_id=ENSG00000108556 logic_name=ensembl_havana_gene_homo_sapiens
## 8  gene_id=ENSG00000170175 logic_name=ensembl_havana_gene_homo_sapiens
## 9  gene_id=ENSG00000138435 logic_name=ensembl_havana_gene_homo_sapiens
## 10 gene_id=ENSG00000135902 logic_name=ensembl_havana_gene_homo_sapiens
## 11 gene_id=ENSG00000196811 logic_name=ensembl_havana_gene_homo_sapiens
## 12 gene_id=ENSG00000101204 logic_name=ensembl_havana_gene_homo_sapiens
## 13 gene_id=ENSG00000174343 logic_name=ensembl_havana_gene_homo_sapiens
## 14 gene_id=ENSG00000120903 logic_name=ensembl_havana_gene_homo_sapiens
## 15 gene_id=ENSG00000147432 logic_name=ensembl_havana_gene_homo_sapiens
## 16 gene_id=ENSG00000147434 logic_name=ensembl_havana_gene_homo_sapiens
##       version
## 1   version=6
## 2   version=3
## 3  version=18
## 4  version=13
## 5  version=16
## 6  version=12
## 7  version=10
## 8  version=11
## 9  version=16
## 10 version=10
## 11 version=13
## 12 version=17
## 13  version=6
## 14 version=13
## 15  version=7
## 16  version=8
# bind the columns from one dataframe with another

nicotinic_gene <- bind_cols(nicotinic_gene, nic_attr_df)

nicotinic_gene
##         seqid         source type     start       end score strand phase
## 171564      1 ensembl_havana gene 154567778 154580013    NA      +  <NA>
## 390056     11 ensembl_havana gene   3665587   3671384    NA      -  <NA>
## 871930     15 ensembl_havana gene  31923438  32173018    NA      +  <NA>
## 941990     15 ensembl_havana gene  78565520  78595269    NA      +  <NA>
## 942046     15 ensembl_havana gene  78593052  78621295    NA      -  <NA>
## 942128     15 ensembl_havana gene  78624111  78727754    NA      -  <NA>
## 1115037    17 ensembl_havana gene   4897771   4934438    NA      -  <NA>
## 1124543    17 ensembl_havana gene   7445061   7457710    NA      +  <NA>
## 1646403     2 ensembl_havana gene 174747592 174787935    NA      -  <NA>
## 1701797     2 ensembl_havana gene 232525993 232536667    NA      +  <NA>
## 1701932     2 ensembl_havana gene 232539692 232548115    NA      +  <NA>
## 1783481    20 ensembl_havana gene  63343223  63378401    NA      -  <NA>
## 2100056     4 ensembl_havana gene  40335333  40355217    NA      +  <NA>
## 2619636     8 ensembl_havana gene  27459756  27479883    NA      -  <NA>
## 2634316     8 ensembl_havana gene  42697366  42737407    NA      +  <NA>
## 2634355     8 ensembl_havana gene  42752620  42796392    NA      -  <NA>
##                                                                                                                                                                                                                                          attributes
## 171564      ID=gene:ENSG00000160716;Name=CHRNB2;biotype=protein_coding;description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962];gene_id=ENSG00000160716;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 390056  ID=gene:ENSG00000129749;Name=CHRNA10;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800];gene_id=ENSG00000129749;logic_name=ensembl_havana_gene_homo_sapiens;version=3
## 871930    ID=gene:ENSG00000175344;Name=CHRNA7;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960];gene_id=ENSG00000175344;logic_name=ensembl_havana_gene_homo_sapiens;version=18
## 941990    ID=gene:ENSG00000169684;Name=CHRNA5;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959];gene_id=ENSG00000169684;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 942046    ID=gene:ENSG00000080644;Name=CHRNA3;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957];gene_id=ENSG00000080644;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 942128     ID=gene:ENSG00000117971;Name=CHRNB4;biotype=protein_coding;description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964];gene_id=ENSG00000117971;logic_name=ensembl_havana_gene_homo_sapiens;version=12
## 1115037    ID=gene:ENSG00000108556;Name=CHRNE;biotype=protein_coding;description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966];gene_id=ENSG00000108556;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1124543    ID=gene:ENSG00000170175;Name=CHRNB1;biotype=protein_coding;description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961];gene_id=ENSG00000170175;logic_name=ensembl_havana_gene_homo_sapiens;version=11
## 1646403   ID=gene:ENSG00000138435;Name=CHRNA1;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955];gene_id=ENSG00000138435;logic_name=ensembl_havana_gene_homo_sapiens;version=16
## 1701797      ID=gene:ENSG00000135902;Name=CHRND;biotype=protein_coding;description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965];gene_id=ENSG00000135902;logic_name=ensembl_havana_gene_homo_sapiens;version=10
## 1701932      ID=gene:ENSG00000196811;Name=CHRNG;biotype=protein_coding;description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967];gene_id=ENSG00000196811;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 1783481   ID=gene:ENSG00000101204;Name=CHRNA4;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958];gene_id=ENSG00000101204;logic_name=ensembl_havana_gene_homo_sapiens;version=17
## 2100056   ID=gene:ENSG00000174343;Name=CHRNA9;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079];gene_id=ENSG00000174343;logic_name=ensembl_havana_gene_homo_sapiens;version=6
## 2619636   ID=gene:ENSG00000120903;Name=CHRNA2;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956];gene_id=ENSG00000120903;logic_name=ensembl_havana_gene_homo_sapiens;version=13
## 2634316     ID=gene:ENSG00000147432;Name=CHRNB3;biotype=protein_coding;description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963];gene_id=ENSG00000147432;logic_name=ensembl_havana_gene_homo_sapiens;version=7
## 2634355   ID=gene:ENSG00000147434;Name=CHRNA6;biotype=protein_coding;description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963];gene_id=ENSG00000147434;logic_name=ensembl_havana_gene_homo_sapiens;version=8
##                              ID         Name                biotype
## 171564  ID=gene:ENSG00000160716  Name=CHRNB2 biotype=protein_coding
## 390056  ID=gene:ENSG00000129749 Name=CHRNA10 biotype=protein_coding
## 871930  ID=gene:ENSG00000175344  Name=CHRNA7 biotype=protein_coding
## 941990  ID=gene:ENSG00000169684  Name=CHRNA5 biotype=protein_coding
## 942046  ID=gene:ENSG00000080644  Name=CHRNA3 biotype=protein_coding
## 942128  ID=gene:ENSG00000117971  Name=CHRNB4 biotype=protein_coding
## 1115037 ID=gene:ENSG00000108556   Name=CHRNE biotype=protein_coding
## 1124543 ID=gene:ENSG00000170175  Name=CHRNB1 biotype=protein_coding
## 1646403 ID=gene:ENSG00000138435  Name=CHRNA1 biotype=protein_coding
## 1701797 ID=gene:ENSG00000135902   Name=CHRND biotype=protein_coding
## 1701932 ID=gene:ENSG00000196811   Name=CHRNG biotype=protein_coding
## 1783481 ID=gene:ENSG00000101204  Name=CHRNA4 biotype=protein_coding
## 2100056 ID=gene:ENSG00000174343  Name=CHRNA9 biotype=protein_coding
## 2619636 ID=gene:ENSG00000120903  Name=CHRNA2 biotype=protein_coding
## 2634316 ID=gene:ENSG00000147432  Name=CHRNB3 biotype=protein_coding
## 2634355 ID=gene:ENSG00000147434  Name=CHRNA6 biotype=protein_coding
##                                                                                               description
## 171564     description=cholinergic receptor nicotinic beta 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1962]
## 390056  description=cholinergic receptor nicotinic alpha 10 subunit [Source:HGNC Symbol%3BAcc:HGNC:13800]
## 871930    description=cholinergic receptor nicotinic alpha 7 subunit [Source:HGNC Symbol%3BAcc:HGNC:1960]
## 941990    description=cholinergic receptor nicotinic alpha 5 subunit [Source:HGNC Symbol%3BAcc:HGNC:1959]
## 942046    description=cholinergic receptor nicotinic alpha 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1957]
## 942128     description=cholinergic receptor nicotinic beta 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1964]
## 1115037   description=cholinergic receptor nicotinic epsilon subunit [Source:HGNC Symbol%3BAcc:HGNC:1966]
## 1124543    description=cholinergic receptor nicotinic beta 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1961]
## 1646403   description=cholinergic receptor nicotinic alpha 1 subunit [Source:HGNC Symbol%3BAcc:HGNC:1955]
## 1701797     description=cholinergic receptor nicotinic delta subunit [Source:HGNC Symbol%3BAcc:HGNC:1965]
## 1701932     description=cholinergic receptor nicotinic gamma subunit [Source:HGNC Symbol%3BAcc:HGNC:1967]
## 1783481   description=cholinergic receptor nicotinic alpha 4 subunit [Source:HGNC Symbol%3BAcc:HGNC:1958]
## 2100056  description=cholinergic receptor nicotinic alpha 9 subunit [Source:HGNC Symbol%3BAcc:HGNC:14079]
## 2619636   description=cholinergic receptor nicotinic alpha 2 subunit [Source:HGNC Symbol%3BAcc:HGNC:1956]
## 2634316    description=cholinergic receptor nicotinic beta 3 subunit [Source:HGNC Symbol%3BAcc:HGNC:1963]
## 2634355  description=cholinergic receptor nicotinic alpha 6 subunit [Source:HGNC Symbol%3BAcc:HGNC:15963]
##                         gene_id                                  logic_name
## 171564  gene_id=ENSG00000160716 logic_name=ensembl_havana_gene_homo_sapiens
## 390056  gene_id=ENSG00000129749 logic_name=ensembl_havana_gene_homo_sapiens
## 871930  gene_id=ENSG00000175344 logic_name=ensembl_havana_gene_homo_sapiens
## 941990  gene_id=ENSG00000169684 logic_name=ensembl_havana_gene_homo_sapiens
## 942046  gene_id=ENSG00000080644 logic_name=ensembl_havana_gene_homo_sapiens
## 942128  gene_id=ENSG00000117971 logic_name=ensembl_havana_gene_homo_sapiens
## 1115037 gene_id=ENSG00000108556 logic_name=ensembl_havana_gene_homo_sapiens
## 1124543 gene_id=ENSG00000170175 logic_name=ensembl_havana_gene_homo_sapiens
## 1646403 gene_id=ENSG00000138435 logic_name=ensembl_havana_gene_homo_sapiens
## 1701797 gene_id=ENSG00000135902 logic_name=ensembl_havana_gene_homo_sapiens
## 1701932 gene_id=ENSG00000196811 logic_name=ensembl_havana_gene_homo_sapiens
## 1783481 gene_id=ENSG00000101204 logic_name=ensembl_havana_gene_homo_sapiens
## 2100056 gene_id=ENSG00000174343 logic_name=ensembl_havana_gene_homo_sapiens
## 2619636 gene_id=ENSG00000120903 logic_name=ensembl_havana_gene_homo_sapiens
## 2634316 gene_id=ENSG00000147432 logic_name=ensembl_havana_gene_homo_sapiens
## 2634355 gene_id=ENSG00000147434 logic_name=ensembl_havana_gene_homo_sapiens
##            version
## 171564   version=6
## 390056   version=3
## 871930  version=18
## 941990  version=13
## 942046  version=16
## 942128  version=12
## 1115037 version=10
## 1124543 version=11
## 1646403 version=16
## 1701797 version=10
## 1701932 version=13
## 1783481 version=17
## 2100056  version=6
## 2619636 version=13
## 2634316  version=7
## 2634355  version=8

Precise information can be obtained from the newly created columns

# extract name
nicotinic_gene$NAME <- nicotinic_gene$Name %>% str_extract(pattern = "[A-Z]{5}?[A-Z0-9]*")
# extract gene id
nicotinic_gene$GENE_ID <- nicotinic_gene$gene_id %>% str_extract(pattern = "ENSG.*")
# extract descriptive info
nicotinic_gene$DESCRIPTION <- nicotinic_gene$description %>% str_extract(pattern = "cholinergic.*subunit")
colnames(nicotinic_gene)
##  [1] "seqid"       "source"      "type"        "start"       "end"        
##  [6] "score"       "strand"      "phase"       "attributes"  "ID"         
## [11] "Name"        "biotype"     "description" "gene_id"     "logic_name" 
## [16] "version"     "NAME"        "GENE_ID"     "DESCRIPTION"

Drop the index column or row names

row.names(nicotinic_gene) <- NULL

Select or keep the relevant columns

chrn_df <- nicotinic_gene %>% select(seqid, source, type, start, end, strand, NAME, GENE_ID, DESCRIPTION)
chrn_df
##    seqid         source type     start       end strand    NAME         GENE_ID
## 1      1 ensembl_havana gene 154567778 154580013      +  CHRNB2 ENSG00000160716
## 2     11 ensembl_havana gene   3665587   3671384      - CHRNA10 ENSG00000129749
## 3     15 ensembl_havana gene  31923438  32173018      +  CHRNA7 ENSG00000175344
## 4     15 ensembl_havana gene  78565520  78595269      +  CHRNA5 ENSG00000169684
## 5     15 ensembl_havana gene  78593052  78621295      -  CHRNA3 ENSG00000080644
## 6     15 ensembl_havana gene  78624111  78727754      -  CHRNB4 ENSG00000117971
## 7     17 ensembl_havana gene   4897771   4934438      -   CHRNE ENSG00000108556
## 8     17 ensembl_havana gene   7445061   7457710      +  CHRNB1 ENSG00000170175
## 9      2 ensembl_havana gene 174747592 174787935      -  CHRNA1 ENSG00000138435
## 10     2 ensembl_havana gene 232525993 232536667      +   CHRND ENSG00000135902
## 11     2 ensembl_havana gene 232539692 232548115      +   CHRNG ENSG00000196811
## 12    20 ensembl_havana gene  63343223  63378401      -  CHRNA4 ENSG00000101204
## 13     4 ensembl_havana gene  40335333  40355217      +  CHRNA9 ENSG00000174343
## 14     8 ensembl_havana gene  27459756  27479883      -  CHRNA2 ENSG00000120903
## 15     8 ensembl_havana gene  42697366  42737407      +  CHRNB3 ENSG00000147432
## 16     8 ensembl_havana gene  42752620  42796392      -  CHRNA6 ENSG00000147434
##                                        DESCRIPTION
## 1    cholinergic receptor nicotinic beta 2 subunit
## 2  cholinergic receptor nicotinic alpha 10 subunit
## 3   cholinergic receptor nicotinic alpha 7 subunit
## 4   cholinergic receptor nicotinic alpha 5 subunit
## 5   cholinergic receptor nicotinic alpha 3 subunit
## 6    cholinergic receptor nicotinic beta 4 subunit
## 7   cholinergic receptor nicotinic epsilon subunit
## 8    cholinergic receptor nicotinic beta 1 subunit
## 9   cholinergic receptor nicotinic alpha 1 subunit
## 10    cholinergic receptor nicotinic delta subunit
## 11    cholinergic receptor nicotinic gamma subunit
## 12  cholinergic receptor nicotinic alpha 4 subunit
## 13  cholinergic receptor nicotinic alpha 9 subunit
## 14  cholinergic receptor nicotinic alpha 2 subunit
## 15   cholinergic receptor nicotinic beta 3 subunit
## 16  cholinergic receptor nicotinic alpha 6 subunit

Write out or save the data frame as a csv file for latter use

chrn_df %>% write_csv(file = "chrn_df.csv")

Derive a length column for each gene to see their distribution

# gene length is measured in base pairs or simply in number of bases or nucleotides

chrn_df['GENE_LENGTH (bp)'] <- chrn_df$end - (chrn_df$start -1)
chrn_df
##    seqid         source type     start       end strand    NAME         GENE_ID
## 1      1 ensembl_havana gene 154567778 154580013      +  CHRNB2 ENSG00000160716
## 2     11 ensembl_havana gene   3665587   3671384      - CHRNA10 ENSG00000129749
## 3     15 ensembl_havana gene  31923438  32173018      +  CHRNA7 ENSG00000175344
## 4     15 ensembl_havana gene  78565520  78595269      +  CHRNA5 ENSG00000169684
## 5     15 ensembl_havana gene  78593052  78621295      -  CHRNA3 ENSG00000080644
## 6     15 ensembl_havana gene  78624111  78727754      -  CHRNB4 ENSG00000117971
## 7     17 ensembl_havana gene   4897771   4934438      -   CHRNE ENSG00000108556
## 8     17 ensembl_havana gene   7445061   7457710      +  CHRNB1 ENSG00000170175
## 9      2 ensembl_havana gene 174747592 174787935      -  CHRNA1 ENSG00000138435
## 10     2 ensembl_havana gene 232525993 232536667      +   CHRND ENSG00000135902
## 11     2 ensembl_havana gene 232539692 232548115      +   CHRNG ENSG00000196811
## 12    20 ensembl_havana gene  63343223  63378401      -  CHRNA4 ENSG00000101204
## 13     4 ensembl_havana gene  40335333  40355217      +  CHRNA9 ENSG00000174343
## 14     8 ensembl_havana gene  27459756  27479883      -  CHRNA2 ENSG00000120903
## 15     8 ensembl_havana gene  42697366  42737407      +  CHRNB3 ENSG00000147432
## 16     8 ensembl_havana gene  42752620  42796392      -  CHRNA6 ENSG00000147434
##                                        DESCRIPTION GENE_LENGTH (bp)
## 1    cholinergic receptor nicotinic beta 2 subunit            12236
## 2  cholinergic receptor nicotinic alpha 10 subunit             5798
## 3   cholinergic receptor nicotinic alpha 7 subunit           249581
## 4   cholinergic receptor nicotinic alpha 5 subunit            29750
## 5   cholinergic receptor nicotinic alpha 3 subunit            28244
## 6    cholinergic receptor nicotinic beta 4 subunit           103644
## 7   cholinergic receptor nicotinic epsilon subunit            36668
## 8    cholinergic receptor nicotinic beta 1 subunit            12650
## 9   cholinergic receptor nicotinic alpha 1 subunit            40344
## 10    cholinergic receptor nicotinic delta subunit            10675
## 11    cholinergic receptor nicotinic gamma subunit             8424
## 12  cholinergic receptor nicotinic alpha 4 subunit            35179
## 13  cholinergic receptor nicotinic alpha 9 subunit            19885
## 14  cholinergic receptor nicotinic alpha 2 subunit            20128
## 15   cholinergic receptor nicotinic beta 3 subunit            40042
## 16  cholinergic receptor nicotinic alpha 6 subunit            43773
# longest human CHRN gene

chrn_df[chrn_df$`GENE_LENGTH (bp)` == max(chrn_df$`GENE_LENGTH (bp)`),]
##   seqid         source type    start      end strand   NAME         GENE_ID
## 3    15 ensembl_havana gene 31923438 32173018      + CHRNA7 ENSG00000175344
##                                      DESCRIPTION GENE_LENGTH (bp)
## 3 cholinergic receptor nicotinic alpha 7 subunit           249581
# shortest human CHRN gene

chrn_df[chrn_df$`GENE_LENGTH (bp)` == min(chrn_df$`GENE_LENGTH (bp)`),]
##   seqid         source type   start     end strand    NAME         GENE_ID
## 2    11 ensembl_havana gene 3665587 3671384      - CHRNA10 ENSG00000129749
##                                       DESCRIPTION GENE_LENGTH (bp)
## 2 cholinergic receptor nicotinic alpha 10 subunit             5798

Some plotting of the variable length

# histogram in ggplot

ggplot(data=chrn_df, mapping=aes(x=`GENE_LENGTH (bp)`)) +
  geom_histogram(binwidth = 1000)

# density plot 

ggplot(data=chrn_df, mapping=aes(x=`GENE_LENGTH (bp)`)) +
  geom_density(color='red')

# box plot in ggplot

ggplot(data=chrn_df, mapping=aes(x=`GENE_LENGTH (bp)`)) +
  geom_boxplot(color='red')

# sorted barplot in baseR

chrn_sorted <- chrn_df[order(chrn_df$`GENE_LENGTH (bp)`), ]

barplot(chrn_sorted$`GENE_LENGTH (bp)`, names.arg = chrn_sorted$NAME)

title(main = list("Length of human CHRN genes (bp)", font = 4))

# barplot using ggplot

ggplot(data=chrn_df, aes(x=NAME, y=`GENE_LENGTH (bp)`)) +
  geom_bar(stat = "identity") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Precise information from the attributes column can be extracted without splitting that column

nicotinic_gene$attributes %>% str_extract(pattern = "ENSG[0-9]+")
##  [1] "ENSG00000160716" "ENSG00000129749" "ENSG00000175344" "ENSG00000169684"
##  [5] "ENSG00000080644" "ENSG00000117971" "ENSG00000108556" "ENSG00000170175"
##  [9] "ENSG00000138435" "ENSG00000135902" "ENSG00000196811" "ENSG00000101204"
## [13] "ENSG00000174343" "ENSG00000120903" "ENSG00000147432" "ENSG00000147434"
nicotinic_gene$attributes %>% str_extract(pattern = "[A-Z]{5}?[A-Z0-9]*")
##  [1] "CHRNB2"  "CHRNA10" "CHRNA7"  "CHRNA5"  "CHRNA3"  "CHRNB4"  "CHRNE"  
##  [8] "CHRNB1"  "CHRNA1"  "CHRND"   "CHRNG"   "CHRNA4"  "CHRNA9"  "CHRNA2" 
## [15] "CHRNB3"  "CHRNA6"
nicotinic_gene$attributes %>% str_extract(pattern = "cholinergic.*subunit")
##  [1] "cholinergic receptor nicotinic beta 2 subunit"  
##  [2] "cholinergic receptor nicotinic alpha 10 subunit"
##  [3] "cholinergic receptor nicotinic alpha 7 subunit" 
##  [4] "cholinergic receptor nicotinic alpha 5 subunit" 
##  [5] "cholinergic receptor nicotinic alpha 3 subunit" 
##  [6] "cholinergic receptor nicotinic beta 4 subunit"  
##  [7] "cholinergic receptor nicotinic epsilon subunit" 
##  [8] "cholinergic receptor nicotinic beta 1 subunit"  
##  [9] "cholinergic receptor nicotinic alpha 1 subunit" 
## [10] "cholinergic receptor nicotinic delta subunit"   
## [11] "cholinergic receptor nicotinic gamma subunit"   
## [12] "cholinergic receptor nicotinic alpha 4 subunit" 
## [13] "cholinergic receptor nicotinic alpha 9 subunit" 
## [14] "cholinergic receptor nicotinic alpha 2 subunit" 
## [15] "cholinergic receptor nicotinic beta 3 subunit"  
## [16] "cholinergic receptor nicotinic alpha 6 subunit"