title: "Annotate and analyze nucleotide and amino acid sequences using biopython"
author: "Bhagirathi Dash"
date: "December 1, 2018"
output: html_document
import sys
sys.stdout.write("Hello from Python %s\n" % (sys.version,))
Hello from Python 3.5.6 |Anaconda, Inc.| (default, Aug 26 2018, 16:05:27) [MSC v.1900 64 bit (AMD64)]

Biopython Contents

Affy, Align , AlignIO , Alphabet, Application, Blast,
CAPS, Cluster, Compass, Crystal ,
Data ,
Emboss, Entrez, ExPASy ,
FSSP ,
GA, GenBank, Geo, Graphics,
HMM ,
KDTree , KEGG ,
Medline , NMR ,
NeuralNetwork , Nexus ,
PDB, Pathway, Phylo , PopGen,
Restriction ,
SCOP, SVDSuperimposer, SearchIO, SeqIO, SeqUtils, Sequencing, Statistics, SubsMat, SwissProt,
TogoWS ,
UniGene , UniProt ,
Wise

import Bio
print(Bio.__version__)
1.72
help(Bio.Align)
help(Bio.AlignIO)
help(Bio.Alphabet)
help(Bio.Blast)
help(Bio.Entrez)
help(Bio.GenBank)
help(Bio.Seq)
help(Bio.SeqIO)
help(Bio.SeqRecord)
help(Bio.Sequencing)
help(Bio.SwissProt)

Blast: Applications, NCBIStandalone, NCBIWWW, NCBIXML, ParseBlastTable, Record

from Bio import Blast
help(Blast)
Help on package Bio.Blast in Bio:

NAME
    Bio.Blast - Code for dealing with BLAST programs and output.

PACKAGE CONTENTS
    Applications
    NCBIStandalone
    NCBIWWW
    NCBIXML
    ParseBlastTable
    Record

FILE
    c:\users\bhagi\anaconda3\lib\site-packages\bio\blast\__init__.py
from Bio.Blast import NCBIWWW
help(NCBIWWW.qblast)
Help on function qblast in module Bio.Blast.NCBIWWW:

qblast(program, database, sequence, url_base='https://blast.ncbi.nlm.nih.gov/Blast.cgi', auto_format=None, composition_based_statistics=None, db_genetic_code=None, endpoints=None, entrez_query='(none)', expect=10.0, filter=None, gapcosts=None, genetic_code=None, hitlist_size=50, i_thresh=None, layout=None, lcase_mask=None, matrix_name=None, nucl_penalty=None, nucl_reward=None, other_advanced=None, perc_ident=None, phi_pattern=None, query_file=None, query_believe_defline=None, query_from=None, query_to=None, searchsp_eff=None, service=None, threshold=None, ungapped_alignment=None, word_size=None, alignments=500, alignment_view=None, descriptions=500, entrez_links_new_window=None, expect_low=None, expect_high=None, format_entrez_query=None, format_object=None, format_type='XML', ncbi_gi=None, results_file=None, show_overview=None, megablast=None, template_type=None, template_length=None)
    BLAST search using NCBI's QBLAST server or a cloud service provider.
    
    Supports all parameters of the qblast API for Put and Get.
    
    Please note that BLAST on the cloud supports the NCBI-BLAST Common
    URL API (http://ncbi.github.io/blast-cloud/dev/api.html). To
    use this feature, please set url_base to
    'http://host.my.cloud.service.provider.com/cgi-bin/blast.cgi' and
    format_object='Alignment'. For more details, please see
    https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=CloudBlast
    
    Some useful parameters:
    
     - program        blastn, blastp, blastx, tblastn, or tblastx (lower case)
     - database       Which database to search against (e.g. "nr").
     - sequence       The sequence to search.
     - ncbi_gi        TRUE/FALSE whether to give 'gi' identifier.
     - descriptions   Number of descriptions to show.  Def 500.
     - alignments     Number of alignments to show.  Def 500.
     - expect         An expect value cutoff.  Def 10.0.
     - matrix_name    Specify an alt. matrix (PAM30, PAM70, BLOSUM80, BLOSUM45).
     - filter         "none" turns off filtering.  Default no filtering
     - format_type    "HTML", "Text", "ASN.1", or "XML".  Def. "XML".
     - entrez_query   Entrez query to limit Blast search
     - hitlist_size   Number of hits to return. Default 50
     - megablast      TRUE/FALSE whether to use MEga BLAST algorithm (blastn only)
     - service        plain, psi, phi, rpsblast, megablast (lower case)
    
    This function does no checking of the validity of the parameters
    and passes the values to the server as is.  More help is available at:
    https://ncbi.github.io/blast-cloud/dev/api.html

Blast an unknown sequence

seq1 = open('seq1.fasta').read()
seq1
'>seq1\nCATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC\n\n\n\n'
seq1_handle = NCBIWWW.qblast("blastn", "nt", seq1)
from Bio.Blast import NCBIXML
seq1_blast_record = NCBIXML.read(seq1_handle)
len(seq1_blast_record.alignments)
50
E_value_thresh = 0.01
for alignment in seq1_blast_record.alignments:
    for hsp in alignment.hsps:
        if hsp.expect < E_value_thresh:
            print('*****alignment*****')
            print('sequence:', alignment.title)
            print('length:', alignment.length)
            print('e-value:', hsp.expect)
            print(hsp.query)
            print(hsp.match)
            print(hsp.sbjct)
*****alignment*****
sequence: gi|1500174811|gb|MK044561.1| Zaire ebolavirus isolate ZEBOV/Human/DRC/2014/BOE_036, partial genome
length: 18898
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1500174801|gb|MK044560.1| Zaire ebolavirus isolate ZEBOV/Human/DRC/2014/BOE_023, partial genome
length: 18870
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1500174791|gb|MK044559.1| Zaire ebolavirus isolate ZEBOV/Human/DRC/2014/BOE_011, partial genome
length: 18896
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1500174781|gb|MK044558.1| Zaire ebolavirus isolate ZEBOV/Human/DRC/2014/BOE-070, partial genome
length: 18784
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1487704041|gb|MH898466.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK012, partial genome
length: 18887
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301462|gb|MH733491.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK137, complete genome
length: 18939
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301452|gb|MH733490.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK091, complete genome
length: 18932
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301442|gb|MH733489.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK055, complete genome
length: 18913
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301422|gb|MH733487.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK046, complete genome
length: 18919
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301412|gb|MH733486.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK045, complete genome
length: 18914
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301402|gb|MH733485.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK039, complete genome
length: 18940
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301392|gb|MH733484.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK037, complete genome
length: 18905
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301382|gb|MH733483.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK031, complete genome
length: 18944
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301372|gb|MH733482.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK026, complete genome
length: 18944
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301362|gb|MH733481.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK025, complete genome
length: 18939
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301352|gb|MH733480.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK019, complete genome
length: 18938
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301342|gb|MH733479.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK016, complete genome
length: 18944
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301332|gb|MH733478.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK011, complete genome
length: 18941
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1453301322|gb|MH733477.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-wt/COD/2018/Tumba-BIK009, complete genome
length: 18894
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1436008541|gb|MH613311.1| Zaire ebolavirus isolate H.sapiens-wt/COD/2017/Muyembe.1, complete genome
length: 18899
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1431829175|gb|MH464889.1| Vector pEBO_BDBV-GP, complete sequence
length: 21172
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1431829165|gb|MH464888.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-rec/COD/1976/Yambuku-Mayinga-eGFP-BDBV_GP, complete genome
length: 18962
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1425140432|gb|MH481611.2| Zaire ebolavirus strain Ebola_virus/DRC/Likati/Human/2017/1, complete genome
length: 18898
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1390208419|gb|MH121168.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/Zaire/1976/Mayinga-R3816, complete genome
length: 18958
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1390208399|gb|MH121166.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1976/Yambuku-Mayinga, complete genome
length: 18957
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1390208389|gb|MH121165.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621, complete genome
length: 18959
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1390208379|gb|MH121164.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621, complete genome
length: 18958
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1129866795|gb|KY425656.1| Zaire ebolavirus isolate IRF0164, partial genome
length: 18899
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1129866765|gb|KY425653.1| Zaire ebolavirus isolate IRF0120, partial genome
length: 18894
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1129866755|gb|KY425652.1| Zaire ebolavirus isolate IRF0150, partial genome
length: 18890
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1129866739|gb|KY425649.1| Zaire ebolavirus isolate IRF0206, partial genome
length: 18907
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1129866719|gb|KY425647.1| Zaire ebolavirus isolate IRF0147, partial genome
length: 18902
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1129866668|gb|KY425639.1| Zaire ebolavirus isolate IRF0175, partial genome
length: 18880
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1129866655|gb|KY425637.1| Zaire ebolavirus isolate IRF0176, partial genome
length: 18911
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1129866645|gb|KY425636.1| Zaire ebolavirus isolate IRF0190, partial genome
length: 18909
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|1129866607|gb|KY425630.1| Zaire ebolavirus isolate IRF0156, partial genome
length: 18917
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427286|gb|KU321204.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-0776D9 membrane-associated protein VP24 (VP24) gene, partial cds; and polymerase (L) gene, complete cds
length: 8127
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427283|gb|KU321203.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-1305D7 membrane-associated protein VP24 (VP24) gene, partial cds; and polymerase (L) gene, complete cds
length: 8127
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427280|gb|KU321202.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-1302D9 membrane-associated protein VP24 (VP24) gene, partial cds; and polymerase (L) gene, complete cds
length: 8127
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427277|gb|KU321201.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-2807D5 membrane-associated protein VP24 (VP24) gene, partial cds; and polymerase (L) gene, complete cds
length: 16787
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427274|gb|KU321200.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-2571D7 membrane-associated protein VP24 (VP24) and polymerase (L) genes, complete cds
length: 18400
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427271|gb|KU321199.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-2709D5 membrane-associated protein VP24 (VP24) gene, partial cds; and polymerase (L) gene, complete cds
length: 8127
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427268|gb|KU321198.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-2483D5 membrane-associated protein VP24 (VP24) gene, partial cds; and polymerase (L) gene, complete cds
length: 8127
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427265|gb|KU321197.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-2704D5 membrane-associated protein VP24 (VP24) gene, partial cds; and polymerase (L) gene, complete cds
length: 8127
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427262|gb|KU321196.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-1531D9 membrane-associated protein VP24 (VP24) and polymerase (L) genes, partial cds
length: 7699
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427259|gb|KU321195.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-1282D5 membrane-associated protein VP24 (VP24) gene, partial cds; and polymerase (L) gene, complete cds
length: 7933
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427256|gb|KU321194.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-1509D5 membrane-associated protein VP24 (VP24) gene, partial cds; and polymerase (L) gene, complete cds
length: 8127
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427253|gb|KU321193.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-2807D9 membrane-associated protein VP24 (VP24) gene, partial cds; and polymerase (L) gene, complete cds
length: 16060
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427250|gb|KU321192.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-2780D3 membrane-associated protein VP24 (VP24) and polymerase (L) genes, complete cds
length: 15430
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
*****alignment*****
sequence: gi|973427247|gb|KU321191.1| Zaire ebolavirus isolate Ebola virus/H.sapiens-tc/COD/1995/Kikwit-9510621-2711D5 membrane-associated protein VP24 (VP24) gene, partial cds; and polymerase (L) gene, complete cds
length: 8007
e-value: 5.89602e-29
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
CATGCTACGGTGCTAAAAGCATTACGCCCTATAGTGATTTTCGAGACATACTGTGTTTTTAAATATAGTATTGCC

Entrez: efetch, esearch, einfo, esummary, egquery; etc.

from Bio import Entrez
Entrez.email = 'bhagirathi.dsh@gmail.com'
print(Entrez.read(Entrez.einfo()))
DictElement({'DbList': ['pubmed', 'protein', 'nuccore', 'ipg', 'nucleotide', 'nucgss', 'nucest', 'structure', 'sparcle', 'genome', 'annotinfo', 'assembly', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'clone', 'gap', 'gapplus', 'grasp', 'dbvar', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'mesh', 'ncbisearch', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'probe', 'proteinclusters', 'pcassay', 'biosystems', 'pccompound', 'pcsubstance', 'seqannot', 'snp', 'sra', 'taxonomy', 'biocollections', 'unigene', 'gencoll', 'gtr']}, attributes={})
einfo_handle = Entrez.einfo()
einfo_record = Entrez.read(einfo_handle)
print(einfo_record)
DictElement({'DbList': ['pubmed', 'protein', 'nuccore', 'ipg', 'nucleotide', 'nucgss', 'nucest', 'structure', 'sparcle', 'genome', 'annotinfo', 'assembly', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'clone', 'gap', 'gapplus', 'grasp', 'dbvar', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'mesh', 'ncbisearch', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'probe', 'proteinclusters', 'pcassay', 'biosystems', 'pccompound', 'pcsubstance', 'seqannot', 'snp', 'sra', 'taxonomy', 'biocollections', 'unigene', 'gencoll', 'gtr']}, attributes={})
einfo_record['DbList']
['pubmed', 'protein', 'nuccore', 'ipg', 'nucleotide', 'nucgss', 'nucest', 'structure', 'sparcle', 'genome', 'annotinfo', 'assembly', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'clone', 'gap', 'gapplus', 'grasp', 'dbvar', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'mesh', 'ncbisearch', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'probe', 'proteinclusters', 'pcassay', 'biosystems', 'pccompound', 'pcsubstance', 'seqannot', 'snp', 'sra', 'taxonomy', 'biocollections', 'unigene', 'gencoll', 'gtr']

Entrez.efetch(), rettype=“gb”

efetch_handle = Entrez.efetch(db="nucleotide", id="NM_000744.6", rettype="gb", retmode="text")
print(efetch_handle.read())
LOCUS       NM_000744               5543 bp    mRNA    linear   PRI 21-OCT-2018
DEFINITION  Homo sapiens cholinergic receptor nicotinic alpha 4 subunit
            (CHRNA4), transcript variant 1, mRNA.
ACCESSION   NM_000744
VERSION     NM_000744.6
KEYWORDS    RefSeq.
SOURCE      Homo sapiens (human)
  ORGANISM  Homo sapiens
            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
            Catarrhini; Hominidae; Homo.
REFERENCE   1  (bases 1 to 5543)
  AUTHORS   Walsh RM Jr, Roh SH, Gharpure A, Morales-Perez CL, Teng J and Hibbs
            RE.
  TITLE     Structural principles of distinct assemblies of the human
            alpha4beta2 nicotinic receptor
  JOURNAL   Nature 557 (7704), 261-265 (2018)
   PUBMED   29720657
  REMARK    GeneRIF: cryoelectron microscopy and the structural principles of
            distinct assemblies of the human alpha4beta2 nicotinic receptor
REFERENCE   2  (bases 1 to 5543)
  AUTHORS   Jeong JE, Rhee JK, Kim TM, Kwak SM, Bang SH, Cho H, Cheon YH, Min
            JA, Yoo GS, Kim K, Choi JS, Choi SW and Kim DJ.
  TITLE     The association between the nicotinic acetylcholine receptor alpha4
            subunit gene (CHRNA4) rs1044396 and Internet gaming disorder in
            Korean male adults
  JOURNAL   PLoS ONE 12 (12), e0188358 (2017)
   PUBMED   29240768
  REMARK    GeneRIF: This study demonstrated that rs1044396 of CHRNA4 was
            significantly associated with Internet gaming disorder
            Publication Status: Online-Only
REFERENCE   3  (bases 1 to 5543)
  AUTHORS   Sadaghiani S, Ng B, Altmann A, Poline JB, Banaschewski T, Bokde
            ALW, Bromberg U, Buchel C, Burke Quinlan E, Conrod P, Desrivieres
            S, Flor H, Frouin V, Garavan H, Gowland P, Gallinat J, Heinz A,
            Ittermann B, Martinot JL, Paillere Martinot ML, Lemaitre H, Nees F,
            Papadopoulos Orfanos D, Paus T, Poustka L, Millenet S, Frohner JH,
            Smolka MN, Walter H, Whelan R, Schumann G, Napolioni V and Greicius
            M.
  TITLE     Overdominant Effect of a CHRNA4 Polymorphism on Cingulo-Opercular
            Network Activity and Cognitive Control
  JOURNAL   J. Neurosci. 37 (40), 9657-9666 (2017)
   PUBMED   28877969
  REMARK    GeneRIF: A polymorphism in CHRNA4 leads to an overdominant effect
            at this locus, with higher activity in the cingulo-opercular
            network underlying alertness.
REFERENCE   4  (bases 1 to 5543)
  AUTHORS   Quan J, Ong ML, Bureau JF, Sim LW, Sanmugam S, Abdul Malik AB, Wong
            E, Wong J, Chong YS, Saw SM, Kwek K, Qiu A, Holbrook JD and
            Rifkin-Graboi A.
  CONSRTM   GUSTO Study Group
  TITLE     The influence of CHRNA4, COMT, and maternal sensitivity on
            orienting and executive attention in 6-month-old infants
  JOURNAL   Brain Cogn 116, 17-28 (2017)
   PUBMED   28582665
  REMARK    GeneRIF: Orienting attention predicted by CHRNA4 and maternal
            sensitivity interaction in 6-month-old infants.
REFERENCE   5  (bases 1 to 5543)
  AUTHORS   Eggert M, Aichinger E, Pfaffl MW, Steinlein OK and Pfob M.
  TITLE     Nicotinic acetylcholine receptor subunits alpha4 and alpha5
            associated with smoking behaviour and lung cancer are regulated by
            upstream open reading frames
  JOURNAL   PLoS ONE 8 (7), e66157 (2013)
   PUBMED   23843950
  REMARK    GeneRIF: Nicotinic acetylcholine receptor subunits alpha4 and
            alpha5 associated with smoking behavior and lung cancer are
            regulated by upstream open reading frames.
            Publication Status: Online-Only
REFERENCE   6  (bases 1 to 5543)
  AUTHORS   Phillips,H.A., Scheffer,I.E., Berkovic,S.F., Hollway,G.E.,
            Sutherland,G.R. and Mulley,J.C.
  TITLE     Localization of a gene for autosomal dominant nocturnal frontal
            lobe epilepsy to chromosome 20q 13.2
  JOURNAL   Nat. Genet. 10 (1), 117-118 (1995)
   PUBMED   7647781
REFERENCE   7  (bases 1 to 5543)
  AUTHORS   Kurahashi,H. and Hirose,S.
  TITLE     Autosomal Dominant Nocturnal Frontal Lobe Epilepsy
  JOURNAL   (in) Adam MP, Ardinger HH, Pagon RA, Wallace SE, Bean LJH, Stephens
            K and Amemiya A (Eds.);
            GENEREVIEWS((R));
            (1993)
   PUBMED   20301348
REFERENCE   8  (bases 1 to 5543)
  AUTHORS   Pilz AJ, Willer E, Povey S and Abbott CM.
  TITLE     The genes coding for phosphoenolpyruvate carboxykinase-1 (PCK1) and
            neuronal nicotinic acetylcholine receptor alpha 4 subunit (CHRNA4)
            map to human chromosome 20, extending the known region of homology
            with mouse chromosome 2
  JOURNAL   Ann. Hum. Genet. 56 (4), 289-293 (1992)
   PUBMED   1492743
REFERENCE   9  (bases 1 to 5543)
  AUTHORS   Anand R and Lindstrom J.
  TITLE     Chromosomal localization of seven neuronal nicotinic acetylcholine
            receptor subunit genes in humans
  JOURNAL   Genomics 13 (4), 962-967 (1992)
   PUBMED   1505988
REFERENCE   10 (bases 1 to 5543)
  AUTHORS   McLane KE, Wu XD and Conti-Tronconi BM.
  TITLE     Identification of a brain acetylcholine receptor alpha subunit able
            to bind alpha-bungarotoxin
  JOURNAL   J. Biol. Chem. 265 (17), 9816-9824 (1990)
   PUBMED   2351675
COMMENT     REVIEWED REFSEQ: This record has been curated by NCBI staff. The
            reference sequence was derived from U62433.1, AL121827.34,
            L35901.1, BC096291.1, AK296430.1 and AB209359.1.
            This sequence is a reference standard in the RefSeqGene project.
            On Feb 10, 2012 this sequence version replaced NM_000744.5.
            
            Summary: This gene encodes a nicotinic acetylcholine receptor,
            which belongs to a superfamily of ligand-gated ion channels that
            play a role in fast signal transmission at synapses. These
            pentameric receptors can bind acetylcholine, which causes an
            extensive change in conformation that leads to the opening of an
            ion-conducting channel across the plasma membrane. This protein is
            an integral membrane receptor subunit that can interact with either
            nAChR beta-2 or nAChR beta-4 to form a functional receptor.
            Mutations in this gene cause nocturnal frontal lobe epilepsy type
            1. Polymorphisms in this gene that provide protection against
            nicotine addiction have been described. Alternative splicing
            results in multiple transcript variants. [provided by RefSeq, Feb
            2012].
            
            Transcript Variant: This variant (1) encodes the longer isoform
            (1).
            
            Sequence Note: This RefSeq record was created from transcript and
            genomic sequence data to make the sequence consistent with the
            reference genome assembly. The genomic coordinates used for the
            transcript record were based on transcript alignments.
            
            Publication Note:  This RefSeq record includes a subset of the
            publications that are available for this gene. Please see the Gene
            record to access additional publications.
            
            ##Evidence-Data-START##
            Transcript exon combination :: U62433.1, BC096290.1 [ECO:0000332]
            RNAseq introns              :: single sample supports all introns
                                           SAMEA1968968, SAMEA2145743
                                           [ECO:0000348]
            ##Evidence-Data-END##
            
            ##RefSeq-Attributes-START##
            regulatory uORF :: PMID: 23843950
            ##RefSeq-Attributes-END##
PRIMARY     REFSEQ_SPAN         PRIMARY_IDENTIFIER PRIMARY_SPAN        COMP
            1-48                U62433.1           1-48
            49-53               AL121827.34        130846-130850       c
            54-1859             L35901.1           1-1806
            1860-2259           BC096291.1         1676-2075
            2260-2648           AK296430.1         531-919
            2649-2649           AL121827.34        115706-115706       c
            2650-4784           AB209359.1         3067-5201
            4785-5543           AL121827.34        112812-113570       c
FEATURES             Location/Qualifiers
     source          1..5543
                     /organism="Homo sapiens"
                     /mol_type="mRNA"
                     /db_xref="taxon:9606"
                     /chromosome="20"
                     /map="20q13.33"
     gene            1..5543
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /note="cholinergic receptor nicotinic alpha 4 subunit"
                     /db_xref="GeneID:1137"
                     /db_xref="HGNC:HGNC:1958"
                     /db_xref="MIM:118504"
     exon            1..307
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /inference="alignment:Splign:2.1.0"
     regulatory      151..210
                     /regulatory_class="other"
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /experiment="DESCRIPTION:regulatory uORF[PMID:23843950]"
                     /note="regulatory uORF; this uORF is predicted to encode a
                     19 aa peptide (PMID:23843950)"
     misc_feature    208..210
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /note="upstream in-frame stop codon"
     CDS             232..2115
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /note="isoform 1 precursor is encoded by transcript
                     variant 1; cholinergic receptor, nicotinic, alpha
                     polypeptide 4; neuronal acetylcholine receptor subunit
                     alpha-4; neuronal nicotinic acetylcholine receptor alpha-4
                     subunit; cholinergic receptor, nicotinic, alpha 4
                     (neuronal); cholinergic receptor, nicotinic alpha 4"
                     /codon_start=1
                     /product="neuronal acetylcholine receptor subunit alpha-4
                     isoform 1 precursor"
                     /protein_id="NP_000735.1"
                     /db_xref="CCDS:CCDS13517.1"
                     /db_xref="GeneID:1137"
                     /db_xref="HGNC:HGNC:1958"
                     /db_xref="MIM:118504"
                     /translation="MELGGPGAPRLLPPLLLLLGTGLLRASSHVETRAHAEERLLKKL
                     FSGYNKWSRPVANISDVVLVRFGLSIAQLIDVDEKNQMMTTNVWVKQEWHDYKLRWDP
                     ADYENVTSIRIPSELIWRPDIVLYNNADGDFAVTHLTKAHLFHDGRVQWTPPAIYKSS
                     CSIDVTFFPFDQQNCTMKFGSWTYDKAKIDLVNMHSRVDQLDFWESGEWVIVDAVGTY
                     NTRKYECCAEIYPDITYAFVIRRLPLFYTINLIIPCLLISCLTVLVFYLPSECGEKIT
                     LCISVLLSLTVFLLLITEIIPSTSLVIPLIGEYLLFTMIFVTLSIVITVFVLNVHHRS
                     PRTHTMPTWVRRVFLDIVPRLLLMKRPSVVKDNCRRLIESMHKMASAPRFWPEPEGEP
                     PATSGTQSLHPPSPSFCVPLDVPAEPGPSCKSPSDQLPPQQPLEAEKASPHPSPGPCR
                     PPHGTQAPGLAKARSLSVQHMSSPGEAVEGGVRCRSRSIQYCVPRDDAAPEADGQAAG
                     ALASRNTHSAELPPPDQPSPCKCTCKKEPSSVSPSATVKTRSTKAPPPHLPLSPALTR
                     AVEGVQYIADHLKAEDTDFSVKEDWKYVAMVIDRIFLWMFIIVCLLGTVGLFLPPWLA
                     GMI"
     sig_peptide     232..315
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /experiment="experimental evidence, no additional details
                     recorded"
                     /note="{ECO:0000255}; propagated from UniProtKB/Swiss-Prot
                     (P43681.2)"
     mat_peptide     316..2112
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /product="Neuronal acetylcholine receptor subunit alpha-4"
                     /experiment="experimental evidence, no additional details
                     recorded"
                     /note="propagated from UniProtKB/Swiss-Prot (P43681.2)"
     misc_feature    958..1032
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /experiment="experimental evidence, no additional details
                     recorded"
                     /note="propagated from UniProtKB/Swiss-Prot (P43681.2);
                     transmembrane region"
     misc_feature    1054..1110
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /experiment="experimental evidence, no additional details
                     recorded"
                     /note="propagated from UniProtKB/Swiss-Prot (P43681.2);
                     transmembrane region"
     misc_feature    1156..1221
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /experiment="experimental evidence, no additional details
                     recorded"
                     /note="propagated from UniProtKB/Swiss-Prot (P43681.2);
                     transmembrane region"
     misc_feature    1501..1503
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /experiment="experimental evidence, no additional details
                     recorded"
                     /note="Phosphoserine. {ECO:0000250|UniProtKB:P09483};
                     propagated from UniProtKB/Swiss-Prot (P43681.2); other
                     site"
     misc_feature    1843..1845
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /experiment="experimental evidence, no additional details
                     recorded"
                     /note="Phosphoserine. {ECO:0000250|UniProtKB:O70174};
                     propagated from UniProtKB/Swiss-Prot (P43681.2); other
                     site"
     misc_feature    1852..1854
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /experiment="experimental evidence, no additional details
                     recorded"
                     /note="Phosphoserine. {ECO:0000250|UniProtKB:O70174};
                     propagated from UniProtKB/Swiss-Prot (P43681.2); other
                     site"
     misc_feature    2032..2088
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /experiment="experimental evidence, no additional details
                     recorded"
                     /note="propagated from UniProtKB/Swiss-Prot (P43681.2);
                     transmembrane region"
     exon            308..459
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /inference="alignment:Splign:2.1.0"
     exon            460..504
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /inference="alignment:Splign:2.1.0"
     exon            505..614
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /inference="alignment:Splign:2.1.0"
     exon            615..1989
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /inference="alignment:Splign:2.1.0"
     exon            1990..5543
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /inference="alignment:Splign:2.1.0"
     STS             2960..3821
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /standard_name="CHRNA4_1625"
                     /db_xref="UniSTS:280498"
     STS             3362..3517
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /standard_name="RH70691"
                     /db_xref="UniSTS:91561"
     STS             3690..3819
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /standard_name="D20S127E"
                     /db_xref="UniSTS:77874"
     STS             3712..3824
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /standard_name="RH64135"
                     /db_xref="UniSTS:85131"
     STS             4237..4416
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /standard_name="RH75305"
                     /db_xref="UniSTS:85424"
     STS             4579..4703
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /standard_name="RH12050"
                     /db_xref="UniSTS:90455"
     regulatory      4758..4763
                     /regulatory_class="polyA_signal_sequence"
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
     polyA_site      4785
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
     STS             5239..5380
                     /gene="CHRNA4"
                     /gene_synonym="BFNC; EBN; EBN1; NACHR; NACHRA4; NACRA4"
                     /standard_name="RH78607"
                     /db_xref="UniSTS:87070"
ORIGIN      
        1 tcccagccgg ctgaggcggg cagggccggg cggggccgcg ccacggagcc cacagcccgg
       61 cgctccctgc cgcgccgccg ccgcaccgcg ccccacagga gaagacgaac cgggcccggc
      121 ggccgaagcg gcccgcgagg cgcgggaggc atgaagttgg gcgcgcacgg gcctcgaagc
      181 ggcggggagc cgggagccgc ccgcatctag agcccgcgag gtgcgtgcgc catggagcta
      241 gggggccccg gagcgccgcg gctgctgccg ccgctgctgc tgcttctggg gaccggcctc
      301 ctgcgcgcca gcagccatgt ggagacccgg gcccacgccg aggagcggct cctgaagaaa
      361 ctcttctccg gttacaacaa gtggtcccga cccgtggcca acatctcgga cgtggtcctc
      421 gtccgcttcg gcctgtccat cgctcagctc attgacgtgg atgagaagaa ccagatgatg
      481 accacgaacg tatgggtgaa gcaggagtgg cacgactaca agctgcgctg ggacccagct
      541 gactatgaga atgtcacctc catccgcatc ccctccgagc tcatctggcg gccggacatc
      601 gtcctctaca acaatgctga cggggacttc gcggtcaccc acctgaccaa ggcccacctg
      661 ttccatgacg ggcgggtgca gtggactccc ccggccattt acaagagctc ctgcagcatc
      721 gacgtcacct tcttcccctt cgaccagcag aactgcacca tgaaattcgg ctcctggacc
      781 tacgacaagg ccaagatcga cctggtgaac atgcacagcc gcgtggacca gctggacttc
      841 tgggagagtg gcgagtgggt catcgtggat gccgtgggca cctacaacac caggaagtac
      901 gagtgctgtg ccgagatcta cccggacatc acctatgcct tcgtcatccg gcggctgccg
      961 ctcttctaca ccatcaacct catcatcccc tgcctgctca tctcctgcct caccgtgctg
     1021 gtcttctacc tgccctccga gtgtggcgag aagatcacgc tgtgcatctc cgtgctgctg
     1081 tcgctcaccg tcttcctgct gctcatcacc gagatcatcc cgtccacctc actggtcatc
     1141 ccactcatcg gcgagtacct gctgttcacc atgatcttcg tcaccctgtc catcgtcatc
     1201 acggtcttcg tgctcaacgt gcaccaccgc tcgccacgca cgcacaccat gcccacctgg
     1261 gtacgcaggg tcttcctgga catcgtgcca cgcctgctcc tcatgaagcg gccgtccgtg
     1321 gtcaaggaca attgccggcg gctcatcgag tccatgcata agatggccag tgccccgcgc
     1381 ttctggcccg agccagaagg ggagccccct gccacgagcg gcacccagag cctgcacccg
     1441 ccctcaccgt ccttctgtgt ccccctggat gtgccggctg agcctgggcc ttcctgcaag
     1501 tcaccctccg accagctccc tcctcagcag cccctggaag ctgagaaagc cagcccccac
     1561 ccctcgcctg gaccctgccg cccgccccac ggcacccagg caccagggct ggccaaagcc
     1621 aggtccctca gcgtccagca catgtccagc cctggcgaag cggtggaagg cggcgtccgg
     1681 tgccggtctc ggagcatcca gtactgtgtt ccccgagacg atgccgcccc cgaggcagat
     1741 ggccaggctg ccggcgccct ggcctctcgc aacacccact cggctgagct cccaccccca
     1801 gaccagccct ctccgtgcaa atgcacatgc aagaaggagc cctcttcggt gtccccgagc
     1861 gccacggtca agacccgcag caccaaagcg ccgcccccgc acctgcccct gtcgccggcc
     1921 ctgacccggg cggtggaggg cgtccagtac attgcagacc acctgaaggc cgaagacaca
     1981 gacttctcgg tgaaggagga ctggaagtac gtggccatgg tcatcgaccg catcttcctc
     2041 tggatgttca tcatcgtctg cctgctgggg acggtgggcc tcttcctgcc gccctggctg
     2101 gctggcatga tctaggaagg gaccgggagc ctgcgtggcc tggggctgcc gtgcacgggg
     2161 ccagcatcca tgcggccggc ctggggccgg gctggcttct ccctggactc tgtggggcca
     2221 cacgtttgcc aaattttcct tcctgttctg tgtctgctgt aagacggcct tggacgggga
     2281 cacggcctct ggggagaccg agtgtggagc tgcttccagt tggactgtgg cctcaggagg
     2341 cagtggcttg gagcagaggt gggggtcggc gccttctacc tgcaggactc gggctaagtc
     2401 cagttcaaga gtctcctgag ctcccctgca gatggaactc actcctccca gtctctcagc
     2461 agatctggtg atggtcccga ccgccccaag gggcccttga agggagttca cgccccttca
     2521 gccccaagcg ccccgtcttc tggaggcccg gccgcctctc ccacccaccg tgtcctgcgc
     2581 ctgtgtgggc ttcaacttct cccccagggt ccttgagcct ctcgggcccc atgagggtga
     2641 gccagcagcg ctccaatctg cttcgctagg agagggtcca ggcagggatt gtgtttggtt
     2701 gtgttttggg taagttagtg agaactcaag tcttgtgcct aaggagcctt ggagaacaga
     2761 gttttgtgga gctggttccg cgtggggaga atcgggaggc ccacgcggga ccagctctcc
     2821 cctgcgcagg ccctccgcgg gggacaggaa cacccagccc cagcgagtct ggagaccagg
     2881 actctgcctt ccaggcgtag ggccagggct ctggcaggtg gccagggctc cacgggggcc
     2941 tagtggcttc agcccctggg gtacttctgt gttgtgattc cccggagctg ggaaggtccc
     3001 gaatggagtc cagacctggg ccctggttcc cccaggaccc tgagggtttc caccttggcg
     3061 cgcagcccgg gagatccgcc ctgggctctg ggttcgggaa gaaggacttc ctgctacagt
     3121 agctgtgggg agctggtggg ggcatccttg aggacctcca cctgggagat gctgggaccc
     3181 tcggggcagg aagtccctga gaagcctcat gggagtcagg gagccctggg gtttccacac
     3241 aggcccatgc cctccgtcct ggcagggcag gcagagctca gcacagcctc acccctgcag
     3301 gcggtatcca gaggtgaggg aggcctgaaa tgtttccagg catgaccctg gagcccggca
     3361 gtgcaccccc taaagatggc gcacccggca gccccccagt gtccccaggg gcacacttcc
     3421 ccttgggatg ggcacaggct gccccacccc tccatgattc caagggccca gaggggcggg
     3481 gccaggatgg cgtgtcccct gcctgtgagt gacatcggtt caggaggaga cagtcaggaa
     3541 gcctcctgct gagtggtcca cattctgctg cccccagacc ccatccagcc aggggtgggg
     3601 atggggttgg gctctgcgtc ccactgagtc tcattcctct gtccccgagc cgagctctcc
     3661 tgggccaggg tctcgtcagg aggtgcctga gagcagaatg aataattgag gttaggaacc
     3721 cggcatgccg agtgccccag aaatgccgct gtgtccccgc gggcagtgac gtgagtgggg
     3781 aggagactca ggcccacatt gcccacacct gcctctgaac tgctgctggt cacccccacc
     3841 cccgggtgcc tgtgaccggg gtcctgaggc tggggctttt gtgccaggag tgggtgggac
     3901 acagagaccc ccgtgccaaa tcctgggcat ccccagcccc ctggcccctc ccacatcccc
     3961 cgtgtattca ggaccccatc tgctgagctc tgacctgccc ccgctctgcc atcagcccct
     4021 gctgctcctc ggccagagct cccaggacgc agggagaccc tgggagagtg aggtcatccc
     4081 ggggccatgg aagctggcag gagacactgg tctcagagga gggatagaga cgcagatttc
     4141 agatgtaaga agcccgatcc acattgcata agtggaccca gctgccagat gtgtgggcag
     4201 ctgcagggag tggcccgggg tgctgggccc agctgtggtt gttcttttct gggtcaggag
     4261 tgactgcgga gtggctggga cgctcctgcc gggggaccct cctgccgggg gtggccgctt
     4321 gcctggggga agaccccttt ccctttcttt ttttttttga ggctcaaaaa aaaaatccaa
     4381 ataaagcacc atattcccac cacgccaaat taaccttgac aacaatgttg gcgtatttgc
     4441 gtccagtgct gtttttatta attaaggtaa aatttacata tggtgagatg catgctcttc
     4501 agtggagacc cagcccctag aaggtccctc cagcccttcc tgtcggcgtc cctccccatc
     4561 tgtggctctg acttaagcac cgtggattga cctctccggc tcacgacctt cgaggagtca
     4621 gaatcctgca gttggagcct ttgatgtctg gcttcttcat cctatgactt agagcttcat
     4681 ccgtgctgtt gtgtgtgtcc gtagctcctt gctgtctgtt ccttcgtttt taattgtgag
     4741 cagtgttctt ctgtctgaat aaactaagac gcagtttgtc catcacctgt tagtggactt
     4801 tgcttgcagc tgtcggtgat taggaatgga ggtgccggtg tgtgcatggc actctatccg
     4861 ttatgcatgt ttattttctg catctgttaa tgttttaaca tctttcagac ccaggactgc
     4921 tcctcctagg gccagttagt tcccgggggt ggacagcttg cctagaaccc accttgccag
     4981 ggctgtgacc cccgaccccc tatcaaagtc tcacactaag ccaacgtttc cccgccctgt
     5041 gccaggcacc aggcagcggg agacactgct gcagctcgaa gcctgccggg gcgccggccc
     5101 ctctgcggct gcttgcgcca tcctgccccc tccttcccag gggaggcccc agtgaaggcc
     5161 ctggccaggg cttcccagcc ccttctgcct ccggaccggc ctgggtgctt cccgtgtggc
     5221 ccggcctggg gcgacgtggc cctcctctca ggaagcgtgg taatctctca atggctggcc
     5281 tccggagttg tcactcaggc tcccttgtaa gttaaggccg ggcccaggac actctagggc
     5341 acgcagctgc tgtgggactg ctgggttagg tgtcccctgg agcagaggcg gcttcggatt
     5401 tctaaagtgg ctgagtcctt tcaccttgca ccagaaatgc tggatcaggc gcagcacaca
     5461 cagcacctgg ggatgcaagc ccggccagcc ccatggacca agccccgccg accgtgtggc
     5521 ccgccgagcc caggtgcggc cgc
//

Entrez.efetch(), rettype=“fasta”

fasta_handle = Entrez.efetch(db="nucleotide", id="NM_000744.6", rettype="fasta", retmode="text")
print(fasta_handle.read())
>NM_000744.6 Homo sapiens cholinergic receptor nicotinic alpha 4 subunit (CHRNA4), transcript variant 1, mRNA
TCCCAGCCGGCTGAGGCGGGCAGGGCCGGGCGGGGCCGCGCCACGGAGCCCACAGCCCGGCGCTCCCTGC
CGCGCCGCCGCCGCACCGCGCCCCACAGGAGAAGACGAACCGGGCCCGGCGGCCGAAGCGGCCCGCGAGG
CGCGGGAGGCATGAAGTTGGGCGCGCACGGGCCTCGAAGCGGCGGGGAGCCGGGAGCCGCCCGCATCTAG
AGCCCGCGAGGTGCGTGCGCCATGGAGCTAGGGGGCCCCGGAGCGCCGCGGCTGCTGCCGCCGCTGCTGC
TGCTTCTGGGGACCGGCCTCCTGCGCGCCAGCAGCCATGTGGAGACCCGGGCCCACGCCGAGGAGCGGCT
CCTGAAGAAACTCTTCTCCGGTTACAACAAGTGGTCCCGACCCGTGGCCAACATCTCGGACGTGGTCCTC
GTCCGCTTCGGCCTGTCCATCGCTCAGCTCATTGACGTGGATGAGAAGAACCAGATGATGACCACGAACG
TATGGGTGAAGCAGGAGTGGCACGACTACAAGCTGCGCTGGGACCCAGCTGACTATGAGAATGTCACCTC
CATCCGCATCCCCTCCGAGCTCATCTGGCGGCCGGACATCGTCCTCTACAACAATGCTGACGGGGACTTC
GCGGTCACCCACCTGACCAAGGCCCACCTGTTCCATGACGGGCGGGTGCAGTGGACTCCCCCGGCCATTT
ACAAGAGCTCCTGCAGCATCGACGTCACCTTCTTCCCCTTCGACCAGCAGAACTGCACCATGAAATTCGG
CTCCTGGACCTACGACAAGGCCAAGATCGACCTGGTGAACATGCACAGCCGCGTGGACCAGCTGGACTTC
TGGGAGAGTGGCGAGTGGGTCATCGTGGATGCCGTGGGCACCTACAACACCAGGAAGTACGAGTGCTGTG
CCGAGATCTACCCGGACATCACCTATGCCTTCGTCATCCGGCGGCTGCCGCTCTTCTACACCATCAACCT
CATCATCCCCTGCCTGCTCATCTCCTGCCTCACCGTGCTGGTCTTCTACCTGCCCTCCGAGTGTGGCGAG
AAGATCACGCTGTGCATCTCCGTGCTGCTGTCGCTCACCGTCTTCCTGCTGCTCATCACCGAGATCATCC
CGTCCACCTCACTGGTCATCCCACTCATCGGCGAGTACCTGCTGTTCACCATGATCTTCGTCACCCTGTC
CATCGTCATCACGGTCTTCGTGCTCAACGTGCACCACCGCTCGCCACGCACGCACACCATGCCCACCTGG
GTACGCAGGGTCTTCCTGGACATCGTGCCACGCCTGCTCCTCATGAAGCGGCCGTCCGTGGTCAAGGACA
ATTGCCGGCGGCTCATCGAGTCCATGCATAAGATGGCCAGTGCCCCGCGCTTCTGGCCCGAGCCAGAAGG
GGAGCCCCCTGCCACGAGCGGCACCCAGAGCCTGCACCCGCCCTCACCGTCCTTCTGTGTCCCCCTGGAT
GTGCCGGCTGAGCCTGGGCCTTCCTGCAAGTCACCCTCCGACCAGCTCCCTCCTCAGCAGCCCCTGGAAG
CTGAGAAAGCCAGCCCCCACCCCTCGCCTGGACCCTGCCGCCCGCCCCACGGCACCCAGGCACCAGGGCT
GGCCAAAGCCAGGTCCCTCAGCGTCCAGCACATGTCCAGCCCTGGCGAAGCGGTGGAAGGCGGCGTCCGG
TGCCGGTCTCGGAGCATCCAGTACTGTGTTCCCCGAGACGATGCCGCCCCCGAGGCAGATGGCCAGGCTG
CCGGCGCCCTGGCCTCTCGCAACACCCACTCGGCTGAGCTCCCACCCCCAGACCAGCCCTCTCCGTGCAA
ATGCACATGCAAGAAGGAGCCCTCTTCGGTGTCCCCGAGCGCCACGGTCAAGACCCGCAGCACCAAAGCG
CCGCCCCCGCACCTGCCCCTGTCGCCGGCCCTGACCCGGGCGGTGGAGGGCGTCCAGTACATTGCAGACC
ACCTGAAGGCCGAAGACACAGACTTCTCGGTGAAGGAGGACTGGAAGTACGTGGCCATGGTCATCGACCG
CATCTTCCTCTGGATGTTCATCATCGTCTGCCTGCTGGGGACGGTGGGCCTCTTCCTGCCGCCCTGGCTG
GCTGGCATGATCTAGGAAGGGACCGGGAGCCTGCGTGGCCTGGGGCTGCCGTGCACGGGGCCAGCATCCA
TGCGGCCGGCCTGGGGCCGGGCTGGCTTCTCCCTGGACTCTGTGGGGCCACACGTTTGCCAAATTTTCCT
TCCTGTTCTGTGTCTGCTGTAAGACGGCCTTGGACGGGGACACGGCCTCTGGGGAGACCGAGTGTGGAGC
TGCTTCCAGTTGGACTGTGGCCTCAGGAGGCAGTGGCTTGGAGCAGAGGTGGGGGTCGGCGCCTTCTACC
TGCAGGACTCGGGCTAAGTCCAGTTCAAGAGTCTCCTGAGCTCCCCTGCAGATGGAACTCACTCCTCCCA
GTCTCTCAGCAGATCTGGTGATGGTCCCGACCGCCCCAAGGGGCCCTTGAAGGGAGTTCACGCCCCTTCA
GCCCCAAGCGCCCCGTCTTCTGGAGGCCCGGCCGCCTCTCCCACCCACCGTGTCCTGCGCCTGTGTGGGC
TTCAACTTCTCCCCCAGGGTCCTTGAGCCTCTCGGGCCCCATGAGGGTGAGCCAGCAGCGCTCCAATCTG
CTTCGCTAGGAGAGGGTCCAGGCAGGGATTGTGTTTGGTTGTGTTTTGGGTAAGTTAGTGAGAACTCAAG
TCTTGTGCCTAAGGAGCCTTGGAGAACAGAGTTTTGTGGAGCTGGTTCCGCGTGGGGAGAATCGGGAGGC
CCACGCGGGACCAGCTCTCCCCTGCGCAGGCCCTCCGCGGGGGACAGGAACACCCAGCCCCAGCGAGTCT
GGAGACCAGGACTCTGCCTTCCAGGCGTAGGGCCAGGGCTCTGGCAGGTGGCCAGGGCTCCACGGGGGCC
TAGTGGCTTCAGCCCCTGGGGTACTTCTGTGTTGTGATTCCCCGGAGCTGGGAAGGTCCCGAATGGAGTC
CAGACCTGGGCCCTGGTTCCCCCAGGACCCTGAGGGTTTCCACCTTGGCGCGCAGCCCGGGAGATCCGCC
CTGGGCTCTGGGTTCGGGAAGAAGGACTTCCTGCTACAGTAGCTGTGGGGAGCTGGTGGGGGCATCCTTG
AGGACCTCCACCTGGGAGATGCTGGGACCCTCGGGGCAGGAAGTCCCTGAGAAGCCTCATGGGAGTCAGG
GAGCCCTGGGGTTTCCACACAGGCCCATGCCCTCCGTCCTGGCAGGGCAGGCAGAGCTCAGCACAGCCTC
ACCCCTGCAGGCGGTATCCAGAGGTGAGGGAGGCCTGAAATGTTTCCAGGCATGACCCTGGAGCCCGGCA
GTGCACCCCCTAAAGATGGCGCACCCGGCAGCCCCCCAGTGTCCCCAGGGGCACACTTCCCCTTGGGATG
GGCACAGGCTGCCCCACCCCTCCATGATTCCAAGGGCCCAGAGGGGCGGGGCCAGGATGGCGTGTCCCCT
GCCTGTGAGTGACATCGGTTCAGGAGGAGACAGTCAGGAAGCCTCCTGCTGAGTGGTCCACATTCTGCTG
CCCCCAGACCCCATCCAGCCAGGGGTGGGGATGGGGTTGGGCTCTGCGTCCCACTGAGTCTCATTCCTCT
GTCCCCGAGCCGAGCTCTCCTGGGCCAGGGTCTCGTCAGGAGGTGCCTGAGAGCAGAATGAATAATTGAG
GTTAGGAACCCGGCATGCCGAGTGCCCCAGAAATGCCGCTGTGTCCCCGCGGGCAGTGACGTGAGTGGGG
AGGAGACTCAGGCCCACATTGCCCACACCTGCCTCTGAACTGCTGCTGGTCACCCCCACCCCCGGGTGCC
TGTGACCGGGGTCCTGAGGCTGGGGCTTTTGTGCCAGGAGTGGGTGGGACACAGAGACCCCCGTGCCAAA
TCCTGGGCATCCCCAGCCCCCTGGCCCCTCCCACATCCCCCGTGTATTCAGGACCCCATCTGCTGAGCTC
TGACCTGCCCCCGCTCTGCCATCAGCCCCTGCTGCTCCTCGGCCAGAGCTCCCAGGACGCAGGGAGACCC
TGGGAGAGTGAGGTCATCCCGGGGCCATGGAAGCTGGCAGGAGACACTGGTCTCAGAGGAGGGATAGAGA
CGCAGATTTCAGATGTAAGAAGCCCGATCCACATTGCATAAGTGGACCCAGCTGCCAGATGTGTGGGCAG
CTGCAGGGAGTGGCCCGGGGTGCTGGGCCCAGCTGTGGTTGTTCTTTTCTGGGTCAGGAGTGACTGCGGA
GTGGCTGGGACGCTCCTGCCGGGGGACCCTCCTGCCGGGGGTGGCCGCTTGCCTGGGGGAAGACCCCTTT
CCCTTTCTTTTTTTTTTTGAGGCTCAAAAAAAAAATCCAAATAAAGCACCATATTCCCACCACGCCAAAT
TAACCTTGACAACAATGTTGGCGTATTTGCGTCCAGTGCTGTTTTTATTAATTAAGGTAAAATTTACATA
TGGTGAGATGCATGCTCTTCAGTGGAGACCCAGCCCCTAGAAGGTCCCTCCAGCCCTTCCTGTCGGCGTC
CCTCCCCATCTGTGGCTCTGACTTAAGCACCGTGGATTGACCTCTCCGGCTCACGACCTTCGAGGAGTCA
GAATCCTGCAGTTGGAGCCTTTGATGTCTGGCTTCTTCATCCTATGACTTAGAGCTTCATCCGTGCTGTT
GTGTGTGTCCGTAGCTCCTTGCTGTCTGTTCCTTCGTTTTTAATTGTGAGCAGTGTTCTTCTGTCTGAAT
AAACTAAGACGCAGTTTGTCCATCACCTGTTAGTGGACTTTGCTTGCAGCTGTCGGTGATTAGGAATGGA
GGTGCCGGTGTGTGCATGGCACTCTATCCGTTATGCATGTTTATTTTCTGCATCTGTTAATGTTTTAACA
TCTTTCAGACCCAGGACTGCTCCTCCTAGGGCCAGTTAGTTCCCGGGGGTGGACAGCTTGCCTAGAACCC
ACCTTGCCAGGGCTGTGACCCCCGACCCCCTATCAAAGTCTCACACTAAGCCAACGTTTCCCCGCCCTGT
GCCAGGCACCAGGCAGCGGGAGACACTGCTGCAGCTCGAAGCCTGCCGGGGCGCCGGCCCCTCTGCGGCT
GCTTGCGCCATCCTGCCCCCTCCTTCCCAGGGGAGGCCCCAGTGAAGGCCCTGGCCAGGGCTTCCCAGCC
CCTTCTGCCTCCGGACCGGCCTGGGTGCTTCCCGTGTGGCCCGGCCTGGGGCGACGTGGCCCTCCTCTCA
GGAAGCGTGGTAATCTCTCAATGGCTGGCCTCCGGAGTTGTCACTCAGGCTCCCTTGTAAGTTAAGGCCG
GGCCCAGGACACTCTAGGGCACGCAGCTGCTGTGGGACTGCTGGGTTAGGTGTCCCCTGGAGCAGAGGCG
GCTTCGGATTTCTAAAGTGGCTGAGTCCTTTCACCTTGCACCAGAAATGCTGGATCAGGCGCAGCACACA
CAGCACCTGGGGATGCAAGCCCGGCCAGCCCCATGGACCAAGCCCCGCCGACCGTGTGGCCCGCCGAGCC
CAGGTGCGGCCGC

Entrez.esearch()

hschrna4_handle = Entrez.esearch(db="nucleotide", term="Homo sapiens[Orgn] AND CHRNA4[Gene]", idtype="acc")
hschrna4_record = Entrez.read(hschrna4_handle)
hschrna4_record["Count"]
'43'
hschrna4_record["IdList"]
['NR_046317.1', 'NM_001256573.1', 'NM_000744.6', 'NG_011931.1', 'XM_024451822.1', 'XM_017027625.2', 'XM_011528524.1', 'NC_000020.11', 'AY878244.1', 'AY878243.1', 'AY816234.1', 'KR711794.1', 'KR711793.1', 'KR711792.1', 'KR711791.1', 'CM000271.1', 'CH471077.2', 'BC096293.3', 'BC096290.1', 'BC096292.1']
# nicotinic search
handle = Entrez.esearch(db="nucleotide", term="nicotinic", retmax=50, type="acc")
record = Entrez.read(handle)
handle.close
print(record.keys())
dict_keys(['IdList', 'TranslationSet', 'RetMax', 'RetStart', 'QueryTranslation', 'TranslationStack', 'Count'])
print(record['IdList'])
['1523147790', '1523145344', '1523142829', '1523133633', '1523131203', '1523116150', '1523113138', '1523112708', '1523103732', '1523092346', '1523091109', '1523080985', '1523080122', '1523069848', '1523063454', '1523057334', '1523053879', '1523051942', '1523044822', '1523024007', '1523023669', '1523018785', '1523017557', '1523016273', '1523004522', '1522997402', '1522993495', '1522982347', '1522981058', '1522971555', '1522966821', '1522962555', '1522949757', '1522944298', '1522933472', '1522931349', '1522918912', '1522872626', '1522872346', '1522866699', '1522859975', '1522856484', '1522851794', '1522850987', '1522839685', '1522832792', '1522832095', '1522824605', '1522820868', '1522809651']
#Texas AND hydra vulgaris search
handle = Entrez.esearch(db="nuccore", term="Texas AND hydra vulgaris")
record = Entrez.read(handle)
gi_list = record["IdList"]
print(gi_list)
['1215461482', '83595176', '83595170', '83595168', '83595164', '83595162', '83595158', '83595156', '83595154', '83595152', '830260241', '830260300', '830260278', '830260045', '401709912', '380765232', '83595174', '83595172', '83595166', '83595134']

Entrez.egquery

from Bio import Entrez
handle = Entrez.egquery(term="Ronald Lukas")
record = Entrez.read(handle)
for row in record["eGQueryResult"]:
    print(row["DbName"], row["Count"])
pubmed 91
pmc 53
mesh 0
books 0
pubmedhealth 2
omim 0
ncbisearch 5
nuccore 5
nucgss 458024
nucest 64594
protein 5
genome 0
structure 0
taxonomy 0
snp 0
dbvar 5
gene 1
sra 0
biosystems 4
unigene 0
cdd 0
clone 0
popset 0
geoprofiles 0
gds 0
homologene 0
pccompound 0
pcsubstance 0
pcassay 1
nlmcatalog 1
probe 0
gap 25438
proteinclusters 0
bioproject 0
biosample 0
biocollections 0

SeqIO

import os
import Bio
from Bio import SeqIO
from Bio import Entrez
filename = "NM_000744.6.gbk"
if not os.path.isfile(filename):
    # Downloading...
    net_handle = Entrez.efetch(db="nucleotide", id="NM_000744.6", rettype="fasta", retmode="text")
    out_handle = open(filename, "w")
    out_handle.write(net_handle.read())
    out_handle.close()
    net_handle.close()
    print("Saved")

print("Parsing...")
record = SeqIO.read(filename, "genbank")
print(record)
Parsing...
ID: NM_000744.6
Name: NM_000744
Description: Homo sapiens cholinergic receptor nicotinic alpha 4 subunit (CHRNA4), transcript variant 1, mRNA
Number of features: 29
/taxonomy=['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Euarchontoglires', 'Primates', 'Haplorrhini', 'Catarrhini', 'Hominidae', 'Homo']
/data_file_division=PRI
/keywords=['RefSeq']
/structured_comment=OrderedDict([('Evidence-Data', OrderedDict([('Transcript exon combination', 'U62433.1, BC096290.1 [ECO:0000332]'), ('RNAseq introns', 'single sample supports all introns')])), ('RefSeq-Attributes', OrderedDict([('regulatory uORF', 'PMID: 23843950')]))])
/references=[Reference(title='Structural principles of distinct assemblies of the human alpha4beta2 nicotinic receptor', ...), Reference(title='The association between the nicotinic acetylcholine receptor alpha4 subunit gene (CHRNA4) rs1044396 and Internet gaming disorder in Korean male adults', ...), Reference(title='Overdominant Effect of a CHRNA4 Polymorphism on Cingulo-Opercular Network Activity and Cognitive Control', ...), Reference(title='The influence of CHRNA4, COMT, and maternal sensitivity on orienting and executive attention in 6-month-old infants', ...), Reference(title='Nicotinic acetylcholine receptor subunits alpha4 and alpha5 associated with smoking behaviour and lung cancer are regulated by upstream open reading frames', ...), Reference(title='Localization of a gene for autosomal dominant nocturnal frontal lobe epilepsy to chromosome 20q 13.2', ...), Reference(title='Autosomal Dominant Nocturnal Frontal Lobe Epilepsy', ...), Reference(title='The genes coding for phosphoenolpyruvate carboxykinase-1 (PCK1) and neuronal nicotinic acetylcholine receptor alpha 4 subunit (CHRNA4) map to human chromosome 20, extending the known region of homology with mouse chromosome 2', ...), Reference(title='Chromosomal localization of seven neuronal nicotinic acetylcholine receptor subunit genes in humans', ...), Reference(title='Identification of a brain acetylcholine receptor alpha subunit able to bind alpha-bungarotoxin', ...)]
/sequence_version=6
/comment=REVIEWED REFSEQ: This record has been curated by NCBI staff. The
reference sequence was derived from U62433.1, AL121827.34,
L35901.1, BC096291.1, AK296430.1 and AB209359.1.
This sequence is a reference standard in the RefSeqGene project.
On Feb 10, 2012 this sequence version replaced NM_000744.5.
Summary: This gene encodes a nicotinic acetylcholine receptor,
which belongs to a superfamily of ligand-gated ion channels that
play a role in fast signal transmission at synapses. These
pentameric receptors can bind acetylcholine, which causes an
extensive change in conformation that leads to the opening of an
ion-conducting channel across the plasma membrane. This protein is
an integral membrane receptor subunit that can interact with either
nAChR beta-2 or nAChR beta-4 to form a functional receptor.
Mutations in this gene cause nocturnal frontal lobe epilepsy type
1. Polymorphisms in this gene that provide protection against
nicotine addiction have been described. Alternative splicing
results in multiple transcript variants. [provided by RefSeq, Feb
2012].
Transcript Variant: This variant (1) encodes the longer isoform
(1).
Sequence Note: This RefSeq record was created from transcript and
genomic sequence data to make the sequence consistent with the
reference genome assembly. The genomic coordinates used for the
transcript record were based on transcript alignments.
Publication Note:  This RefSeq record includes a subset of the
publications that are available for this gene. Please see the Gene
record to access additional publications.
                               SAMEA1968968, SAMEA2145743
                               [ECO:0000348]
/source=Homo sapiens (human)
/organism=Homo sapiens
/topology=linear
/accessions=['NM_000744']
/molecule_type=mRNA
/date=21-OCT-2018
Seq('TCCCAGCCGGCTGAGGCGGGCAGGGCCGGGCGGGGCCGCGCCACGGAGCCCACA...CGC', IUPACAmbiguousDNA())

Manipulate a fasta file

hv_fasta = open('hv_nt_seq.fasta')
hv_fasta.read()
'>DQ286061.1 Hydra vulgaris mitochondrial succinate dehydrogenase flavoprotein subunit mRNA, partial cds; nuclear gene for mitochondrial product\nTGTTGTGCTGTTGCTGATCGAACTGGTCATTCACTACTTCATACTCTTTATGGACAGTCATTGCGATACG\nATTGTAACTACTTTATAGAATACTTTGCATTGGATTTGTTGATGGATAAAGGAAAATGTGTTGGGATAAT\nTGCATTAAATCTTGAAGATGGATCTTTGCATAGGATTAAAGCAAAAAATACCGTCCTTGCAACCGGTGGT\nTCTGGAAGAACGTATTTCTCATGTACTTCAGCCCATACATGCACAGGAGATGGCACTGCTATGGTTACAA\nGAGCTGGTCTTGCAAATGAAGATTTAGAGTTCATTCAGTTTCATCCTACTGGTATTTATGGAGCTGGTTG\nTCTCATCACAGAAGGTTGTAGAGGAGAAGGAGGCTACTTGATTAATAGCGAGGGTGAACGCTTTATGGAA\nAGATATGCTCCTACTGCAAAGGATCTTGCCTCAAGAGATGTTGTTTCTCGATCGATGACAATTGAGATGA\nGAGAAGGGCGTGGATGTGGACCTGAAAAAGATCATGTATATTTACAATTGTCTCATCTTCCCCAAGAGAT\nACTTAAATCTCGTCTTCCTGGAATTTCTGAGACAGCCATGATATTTGCTGGTGTTGATGTAACTCGTGAT\nCCTATACCTGTTCTTCCAACTTGCCATTACAATATGGGTGGAATACCAACCAACTTTAACGGACAGGTAA\nTACAACATCATAATGGTAAAGATGTTATTGTGGAAGGTTTGTATGCTGCAGGTGAAGCTGCTTGTGCTTC\nAGTTCATGGTGCTAACCGTCTGGGAGCTAATTCTTTGCTCGATTTAGTCATATTTGGTCGTGCTTGTGCC\nTTAGATATTGCTGCCAAAAATAAGCCTGGAGACAGCATTCCTGATTTACCCAGTGATATTGGTGAAGTAT\nCTGTGGCCAATCTTGATAAGGTTCGGTTTGCCAATGGACACACACCAACTGCAAATTTGAGATTAAAGAT\nGCAAAAGATTATGCAAGGACATGCAGCTGTATTCAGGACTGGTGCTGTCTTGGCAGAAGGAGTGTCAAAG\nATTTATCAGGCTTATGATGAGTTGAAGGATCTTAAGCTTTATGACCGTGGTATGATATTGAATACAGATC\nTTGTTGAAGCTCTGGAACTTCAAAATTTAATGTTAAACTCCTGTCTAGCAATGGTGTCTGCAGAAGCCAG\nAAAAGAAAGTCGTGGTGCGCTT\n\n>DQ286057.1 Hydra vulgaris myosin heavy chain mRNA, partial cds\nTGACAACTTTCTGCTGTTGGGCGCCGGAAAGGTAGTGCTTTTCAAACTGTATCATTTCGTCACAAGGAAC\nAACTAAAAAATTTGCTTACTACTCTTGGAATGACTAGTCCTCATTTCGTAAGATGTATCATTCCTAATGA\nAAAGAAGGAACCAGGAGTTGTTGAGGGCCAACTTGTTCTTCATCAGTTGAGGTGTAATGGTGTCTTGGAA\nGGTATTCGCATATGTAGAAAAGGTTTTCCATCCAGAATGAATTTTCAAGATTTTAAGTTAAGGTACCAAA\nTACTAGCATCTAATGCGATCCCACCTGGTTTTATTGATGGCAAAGTAGCAGCTGAAAAATTAATTGAGGC\nACTTCAACTAGATCAAAGTGAATACAGAGTAGGAAAGACAAAAATATTCTTTAGAGCTGGTATTGTGGGA\nGAGTTAGAAGAAATGCGCGATGAGCGATTATCTAAAATTATTTCACAGTTCCAAGCATACTGTAAGGGCA\nGTATTATGCGCAGTGAATATAAAAAGATGGTGGCACAGCGTATTGGTCTAGCTGTTATTCAAAGAAATGT\nCAGAAAGTATTTATTCTTGCGCCATTGGTCTTGGTGGAAGTTGTACACTAAGGTTCAACCTTTATTGAGT\nGTTGCACGAGCAGAGGATGAAATGAGAGCAAAAGAAGAAGAGTTAGAAGCTGCTAAAGAACAATTAAAAA\nAAGATGCAGAAGCTAAGAAAAAAATGGAAGAAGAACTGACTGAGGCTATGGCTCAAAAAGAAAAACTTTA\nTGCAAGTTTACAAGCTGAGACTGACAGATTAATTACAATTGAAGACAAGCTTCTCAATCTGCAAACAGTT\nAAGGATAAACTTGAAAGTAGTCTAAATGAAGCATTAGAAAAGCTGGATGGAGAAGAACATAGTGTTTTAG\nTTCTTGAAGAAAAGATTCAAGAAGCAGAAGAAAAAATTGACGAACTTACTGAAAAGACTGAGGAACTCCA\nATCAAACATTAGTCGACTTGAAACTGAAAAACAAAATCGTGATAAACAAATTGATACCTTGAATGAAGAT\nATTCGCAAGCAAGATGAAACTATCTCTAAAATGAATGCAGAAAAGAAGCATGTAGATGAGGAGTTGAAAG\nATCGCACTGAACAACTACAGGCTGCTGAGGATAAATGCAACAACCTCAATAAAACAAAGAATAAATTAGA\nATCTTCTATTAGAGAGATTGAACAAGATTTAAAGAAAGAAAAAGACAGTAAAATGAAGTTAGAAAAAGAA\nAAAAAGAAAGTTGAGTCAGATCTTAAAGACAATCGAGATAAACTTTCAGAAACAGAAACTCGTCTAAAAG\nAAACTCAGGATCTTGTAACTAAACGAGAAAAGTCAATATCCGATTTAGAAAATGCAAAAGAAGGTCTTGA\nATCACAGATTAGTCAACTCCAAAGAAAAATACAAGAACTTCTTGCTAAAATTGAAGAATTAGAAGAAGAG\nCTTGAAAACGAAAGAAAGTTGAGGCAGAAATCAGAGCTACAAAGAAAAGAGTTAGAGTCAAGAATTGAGG\nAATTGCAAGACCAACTTGAAACAGCAGGCGGTGCTACATCAGCTCAAGTTGAAGTTGGTAAAAAACGTGA\nAGCTGAATGTAATCGCCTTAGAAAAGAGATTGAAGCCCTTAACATAGCAAATGATGCTGCCATCTCAGCT\nATTAAAGCAAAAACAAATGCTACAATAGCAGAAATTCAAGAGGAAAATGAAGCAATGAAAAAAGCAAAAG\nCAAAACTTGAGAAAGAAAAAAGTGCACTTAATAATGAATTAAATGAAACTAAAAACTCGCTTGATCAAAT\nTAAGAAGCAAAAAACTAATAGTGACAAGAACTCCCGTATGCTTGAAGAACAAATCAATGAACTAAACAGC\nAAGTTGGCTCAAGTTGATGAATTACATTCTCAAAGTGAGTCAAAGAATTCTAAAGTTAACAGTGAGTTGT\nTGGCTCTTAACAGTCAATTGAGCGAATCAGAACATAATTTGGGAATAGCTACTAAAAATATAAAAACTTT\nAGAAAGTCAACTTGCAGAAAGTAAAAATTTTAATGAAGCTGAATCAAAGGCTAAACTTGAGAATTACAAC\nAGCTCGAATGCTTTT\n\n>DQ286058.1 Hydra vulgaris chitinase mRNA, partial cds\nATTGGTCGTTATTGCGGTCAAGGACGTTACCCATTAATGTCTTCAGTGGGTAAACTTCTTGGTGGATACG\nTTCCTCCCGTTGAGCCTACGTTCTCCCCAACCACAAAAGGACCATCAACGCCTAGCAAAAGTAGTACTGC\nCACTGATCGTCCTGCAACAAACCCCCCAACTGGAGCATGTAAGGCAATCGATGCAAGAGTAAAAGATCAA\nTGGTGTAATGATAACTGTCCCAAAGGATATTGCCCTACTGAGTTTTGTAAATGTTAAATAAATAAAAAAA\nGTGTTAACTTATTGCTCTTAAAAAAAAAAAAAAAAAAA\n\n>DQ286055.1 Hydra vulgaris putative solute carrier family 30 mRNA, partial cds\nGAAGTTTAGAATTCTTCCAGACTACTTATTAACAGATTATACTGACAGTAGATGAAAGAAGTTAGTCGGT\nTTACAAATAGTCAAGGAATTAAGATGAAATGTAAGTTTGGCCGAAATGCTACCTTTATTTTAATGTTGGT\nATAACAATGTCATTTTTTATTGTGGAGCTTGTTGTTGGTTATATGACTAAATCAATGGCATTGGTTGCTG\nACTCCTTTCAGATGTTATCGGATACAGTCTCTATTATTGTTGGCTTTGTTGCTTTTCACTGTTCGAAGCG\nTAGTGAAACCTCCAGCCGATTTACATATGGCTGGGTTCGTGCTGAAATACTTGGAGCTTTAGTTAATTCA\nGTATTTCTTGCTGCTCTTTGTTTTACAATTCTCATAGAATCATTTAAGCGGTCTGCTATTCCAGAAAGAG\nTTGAAAATCCTAAACTTGTTCTTATAGTCGGAGCAGTTGGTTTGCTTGTTAATATAATTGGGTTGTTTCT\nTTTTAATCACCACAGTAATGGCCATTCAAATAATAGTGAATCTGTTGAAAAAGGACATAATAATGAAGTT\nGTAGACAATATTGTTGCTGAATTCCCATTAGTTGATAGTAGTGAAGTGGTTATTTATGATAGTGATAAAA\nGCAATTCCCAAGTACCTCAAGTTGTAAGTAATAATGAGAATAGTAAAAAAAAATTAGGAGCATCTCGTCT\nAAATATTCGTGGAGTTTATTTGAATATTCTTGGAGATGCTTTAGGGTCAG\n\n>DQ286054.1 Hydra vulgaris putative solute carrier family 39 member 1 mRNA, partial cds\nTTCTTCAATATTTTAATCAACACAGAAAAAAAATATTTTGACTGTAAAAAAGAAGAATCCTTTTTTAGCT\nATGGGTTTTTATGAATTACCAGAATGGTCAGTTAAACTAATTATAATTATTATTTTATTTTTATTGGGCA\nTGATATTTGGTGTTGTACCATTAAATCTTTCACGAAGCTCCTCATTTGAAGGAAGAGTTTCTCCTACTCG\nCAATCTGCTTATTAGTTTGTCAAATTGCTTTGCTGGAGGGGTGTTTTTTAGCACTGTTATTCTTGATTTA\nTTTCCATTGGTAAAGTTAACAGTAAATAATGCACTAATATCTGTTTATATTGATACTGATTTTCCGCTAG\nGGGATTTTATTATTGGTATTGGATTTATCTTTATGTTAATTTTAGAGCATATAGTTCATTCTTGTTGCCA\nTCCTAATCAGTTATCTTATGAAGCTCCTAAAAATGTTAATAGTAACCAGGATGAATTATCATGTAATGAA\nAATAATCATCTTTTATCTCATGACAACAACTTTGATGTAGTTACTGATATTGAGATAAACACTTCAGAAA\nGACAGCTGCAACAA\n\n>DQ286051.1 Hydra vulgaris ribosomal protein S19 mRNA, partial cds\nCAGCTTTAATGGCCCTCAAGAATTTGTAAAAGCTTTTGCTGGACACCTAAAAAAAGGTAATAAATTTAAA\nGTGCCAGAATTTGTTGAGATAGTAAAAACTTCGAAAGCTTGTGAACTTGGACCTTCTGATCCTGATTGGT\nTTTATATCCGTGCTGCAGCTGTTGCTAGACATATTTATTTAAGACCAAACCTTGGTGTTGGTGCTATTCG\nTAAAATTTATGGTCGTGCTCAAAGGAATGGAACAAGGCCATCACATTCATGCTTAGGATCAGCCTCAATT\nGCTCGAAAGGTCTTACAATCTCTAGAAGCAATGAAACTTGTTACTAAAGATGCAGCAGGTGGACGTAGCT\nTAACTCCTGCAGGTCGAAGGGATATGGATCGAATAGCTGGACAGGTTGTGAACAAAGTTTAAAAATATAT\nTACAGAGTTAATATTAAAAAAAAAAAAAAAAAA\n\n>DQ286050.1 Hydra vulgaris ribosomal protein S9 mRNA, partial cds\nCATGCAATGGTCTTGTTCGTATTGGAGTACTTGATGAAGGAAGAATGAAGCTAGATTATGTTTTAGGTTT\nAAAAGTAGAAGATTTTTTGGAAAGACGTCTACAAACTCAAGTGCTTAAGTTGGGTCTCGCTAAGTCTATT\nCACCATGCTCGTGTTCTTATCCGACAAAAGCATATTAGAGTGCGAAAGCAGTTAGTCAACATCCCATCAT\nTTATCGTGAGACTTGACTCTCAAAAGCACATAGATTTCAGTACTAATTCACCATTCGGTGGTGGTCGACC\nAGGACGTGTTTCACGAAAGAACATGAAGAAAGGTGGCAGTGGAGGAAACGATGAAGAAGACGAAGATGAA\nTAGATTATATTGAAATCTGGCATGTGATTGTTTTGTTAGGCGGTTAATAAAGATCATTTGTCAAATAAAT\nCTAAATACTGTACAATAAAAAAAAAAAAAAAAAAA\n\n>DQ286049.1 Hydra vulgaris ribosomal protein S7 mRNA, partial cds\nACATATGGCTGGGTTCGTGAGCTAGAGAAAAAGTTTTCTGGAAAGCATGTCATTGTTGTTGGACAGAGAA\nGAATCTTGCCTAAACCCAGTCGTAAGACAAGAAATCAAAAGCAAATGAGACCAAGAAGTCGTACTCTAAC\nTGCTGTGCACGATGCCATTCTTGAAGATCTTTGTTTCCCATCGGAAATCGTTGGTAAAAGCATTCGAGTT\nAAATTAGATGGTTCAAGATTGATAAAAATAGTTTTAGAAAAAGCTCAGCAAACAAATGTTGAACATAAAC\nTTGACACGTTTGCAAATGTTTACAAGAAACTAACTGGTAAAGACACTCATTTTACTTTCGAAATATAAGT\nCATAACAGAGAAAAAAAAAAAAAAAAAA\n\n>NM_001309772.1 Hydra vulgaris phospholipid hydroperoxide glutathione peroxidase, mitochondrial-like (LOC100215761), mRNA\nATGGCTGCATCAGACCCTACAAAAGCTTCTTCTATATTTGAATTTCAAGCAAAAAGTATAGATGGTGAAG\nATATCAGTCTTTCGAAATATAAAGGTTTTGTTACACTTATTGTTAACGTGGCTAGCAAGGGTTTAACTGA\nACTCAACTATGCTCAGCTTGCTGATCTGCACACCAAGTATGCTGAGAAAGGTCTTCGAATTCTTGCTTTT\nCCTTGTAATCAGTTTGGTAACCAAGAGCCTGGTACAGATTTAGAAATAAAAGCGTTTGCATTAGCGCGAG\nGCGCCCACTATGACTTATTCAGTAAAATTGATGTTAATGGAGATAAGGCAGATCCTCTGTATAAATATTT\nGAAATCAAAGCAGAAAGGTATTTTGGGTAATAAAATCAAATGGAATTTTTCAAAGTTTATTTGTGATAAA\nAACGGTATCCCTGTTAAAAGATATGCTCCTACAACAGAACCTTTGTCATTAGTTCCAGATATCGAAAAGT\nATTTATGCCAATAA\n\n>NM_001309736.1 Hydra vulgaris superoxide dismutase [Mn], mitochondrial-like (LOC100209764), mRNA\nATGTTTTCTTTTGGAATCCACCGCCTTTCAGTTTTTCGAAAAATATCGAGAATAGCATTTGCTAATAAGC\nACACTCTTCCAGAATTGGGGTATGAATATAATGCATTGGAACCAACAATCAGCAGTCAAATTATGGAGAT\nACATCATCGCAAACACCACCAAGCTTATGTAAATAACTTAAATACAGCAGAAGAACAGTTAGCTGAAGCT\nCAGCATAAAGGAGATACGTCAAAGATTATTTCTTTAGCTCCTGCGTTAAAATTCAATGGAGGTGGGCACA\nTCAATCATTCCATTTTTTGGACTAATCTTTCGCCAAACGGTGGAGGAAAACCAACAGGTGAACTATTAGA\nAGCCATATTAAAAGACTTTGGGTCTTTTGAGGCAATGAAAACACGGTTATCGTCTCCAGCTGTTGCAGTG\nCAAGGTTCGGGTTGGGGTTGGTTGGGATACGATTCTGTCACTAAAAGACTTGCAATTACAGCTTTACCTA\nATCAAGATCCTTTGCAAGCTACTACTGGGTTAATACCGTTACTCGGTATTGATGTTTGGGAGCATGCGTA\nCTACTTGCAGTATAAGAATGTTCGTCTTGATTATGTCAACGCAATATTTAACATCATTGATTGGAAAAAT\nGTATCCGCAAGGTTTGTCGCAGCTAAATAA\n\n'
#read  a fasta file from your computer

import Bio
from Bio import SeqIO
for seq_record in SeqIO.parse("hv_nt_seq.fasta", "fasta"):
    print(seq_record.id)
    print(repr(seq_record.seq))
    print(len(seq_record))
DQ286061.1
Seq('TGTTGTGCTGTTGCTGATCGAACTGGTCATTCACTACTTCATACTCTTTATGGA...CTT', SingleLetterAlphabet())
1212
DQ286057.1
Seq('TGACAACTTTCTGCTGTTGGGCGCCGGAAAGGTAGTGCTTTTCAAACTGTATCA...TTT', SingleLetterAlphabet())
2115
DQ286058.1
Seq('ATTGGTCGTTATTGCGGTCAAGGACGTTACCCATTAATGTCTTCAGTGGGTAAA...AAA', SingleLetterAlphabet())
318
DQ286055.1
Seq('GAAGTTTAGAATTCTTCCAGACTACTTATTAACAGATTATACTGACAGTAGATG...CAG', SingleLetterAlphabet())
750
DQ286054.1
Seq('TTCTTCAATATTTTAATCAACACAGAAAAAAAATATTTTGACTGTAAAAAAGAA...CAA', SingleLetterAlphabet())
574
DQ286051.1
Seq('CAGCTTTAATGGCCCTCAAGAATTTGTAAAAGCTTTTGCTGGACACCTAAAAAA...AAA', SingleLetterAlphabet())
453
DQ286050.1
Seq('CATGCAATGGTCTTGTTCGTATTGGAGTACTTGATGAAGGAAGAATGAAGCTAG...AAA', SingleLetterAlphabet())
455
DQ286049.1
Seq('ACATATGGCTGGGTTCGTGAGCTAGAGAAAAAGTTTTCTGGAAAGCATGTCATT...AAA', SingleLetterAlphabet())
378
NM_001309772.1
Seq('ATGGCTGCATCAGACCCTACAAAAGCTTCTTCTATATTTGAATTTCAAGCAAAA...TAA', SingleLetterAlphabet())
504
NM_001309736.1
Seq('ATGTTTTCTTTTGGAATCCACCGCCTTTCAGTTTTTCGAAAAATATCGAGAATA...TAA', SingleLetterAlphabet())
660
from Bio import SeqIO
sizes = [len(rec) for rec in SeqIO.parse("hv_nt_seq.fasta", "fasta")]
sizes
[1212, 2115, 318, 750, 574, 453, 455, 378, 504, 660]
len(sizes), min(sizes), max(sizes)
(10, 318, 2115)