changeset 5:706fe8139955 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/fetch_fasta_from_ncbi commit b5ef783237b244d684e26b1ed1cc333a8305ce3e"
author artbio
date Tue, 16 Mar 2021 23:26:58 +0000
parents c667d0ee39f5
children 4af77e1af12a
files fetch_fasta_from_NCBI.py fetch_fasta_from_NCBI.xml test-data/input_list.txt test-data/output_list.fa
diffstat 4 files changed, 561 insertions(+), 265 deletions(-) [+]
line wrap: on
line diff
--- a/fetch_fasta_from_NCBI.py	Wed Nov 29 17:38:52 2017 -0500
+++ b/fetch_fasta_from_NCBI.py	Tue Mar 16 23:26:58 2021 +0000
@@ -22,13 +22,17 @@
 (more than what they request)
 """
 import argparse
-import httplib
+import http.client
 import logging
 import re
 import sys
 import time
-import urllib
-import urllib2
+from urllib import error, parse, request
+
+
+LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
+LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
+LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
 
 
 class QueryException(Exception):
@@ -45,74 +49,67 @@
         self.base = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
         self.query_string = options.query_string
         self.dbname = options.dbname
-        if options.outname:
-            self.outname = options.outname
-        else:
-            self.outname = 'NCBI_download' + '.' + self.dbname + '.fasta'
+        if options.get_fasta:
+            self.get_fasta = options.get_fasta
         self.ids = []
         self.retmax_esearch = 100000
         self.retmax_efetch = 500
-        self.count = 0
-        self.webenv = ""
-        self.query_key = ""
-        if options.get_uids:
-            self.get_uids = True
-        else:
-            self.get_uids = False
-        if options.iuds_file:
-            with open(options.iuds_file, 'r') as f:
-                self.ids.extend(f.readline().split(' '))
-
-    def dry_run(self):
-        self.get_count_value()
+        self.webenv = ''
+        self.usehistory = ''
+        self.query_key = ''
+        self.iuds_file = options.iuds_file
+        if self.iuds_file:
+            with open(self.iuds_file, 'r') as f:
+                for line in f:
+                    self.ids.append(line.rstrip())
+        self.count = len(self.ids)  # 0 if query, some value if iuds_file
 
     def retrieve(self):
         """
-        Retrieve the fasta sequences corresponding to the query
+        Retrieve the iuds and fastas corresponding to the query
         """
-        if len(self.ids) == 0:
-            self.get_count_value()
+        if len(self.ids) == 0:  # retrieving from query (not required for file)
+            self.count = self.ecount()
+        # If no UIDs were found from query or file, exit
+        if self.count == 0:
+            self.logger.info("found no UIDs. Exiting script.")
+            sys.exit(-1)
+        if not self.iuds_file:
+            self.get_uids_list()
+            self.print_uids_list()
         else:
-            self.count = len(self.ids)
-        # If no UIDs are found exit script
-        if self.count > 0:
-            if len(self.ids) == 0:
-                self.get_uids_list()
-            if not self.get_uids:
-                try:
-                    self.get_sequences()
-                except QueryException as e:
-                    self.logger.error("Exiting script.")
-                    raise e
-            else:
-                with open(self.outname, 'w') as f:
-                    f.write('\t'.join(self.ids)+'\n')
-        else:
-            self.logger.error("No UIDs were found. Exiting script.")
-            raise Exception("")
+            # as self.ids already implemented
+            self.print_uids_list()
+        if self.get_fasta:
+            try:
+                self.get_sequences()
+            except QueryException as e:
+                self.logger.error("Exiting script.")
+                raise e
 
-    def get_count_value(self):
+    def print_uids_list(self):
+        with open("retrieved_uid_list.txt", 'w') as f:
+            f.write('\n'.join(self.ids))
+
+    def ecount(self):
         """
         just to retrieve Count (number of UIDs)
         Total number of UIDs from the retrieved set to be shown in the XML
         output (default=20). By default, ESearch only includes the first 20
         UIDs retrieved in the XML output. If usehistory is set to 'y',
         the remainder of the retrieved set will be stored on the History server
-
         http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch
         """
-        self.logger.info("retrieving data from %s" % self.base)
-        self.logger.info("for Query: %s and database: %s" %
-                         (self.query_string, self.dbname))
         querylog = self.esearch(self.dbname, self.query_string, '', '',
-                                "count")
+                                'count')
         self.logger.debug("Query response:")
         for line in querylog:
+            line = line.decode('utf-8')
             self.logger.debug(line.rstrip())
             if '</Count>' in line:
-                self.count = int(line[line.find('<Count>')+len('<Count>'):
-                                 line.find('</Count>')])
-        self.logger.info("Found %d UIDs" % self.count)
+                count = int(line.split("<Count>")[1].split("</Count>")[0])
+        self.logger.info("Found %d UIDs" % count)
+        return count
 
     def get_uids_list(self):
         """
@@ -121,22 +118,127 @@
         from http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
         """
         retmax = self.retmax_esearch
-        if (self.count > retmax):
-            num_batches = (self.count / retmax) + 1
+        self.logger.info("retmax = %s, self.count = %s" % (retmax, self.count))
+        if (int(self.count) > retmax):
+            num_batches = int(self.count / retmax) + 1
+            self.usehistory = 'y'
+            self.logger.info("Batch size for esearch action: %d UIDs" % retmax)
+            self.logger.info("Number of batches for esearch action: %s"
+                             % num_batches)
+            querylog = self.esearch(self.dbname, self.query_string, '', '', '')
+            for line in querylog:
+                line = line.decode('utf-8')
+                self.logger.debug(line.rstrip())
+                if '<WebEnv>' in line:
+                    self.webenv = line.split("<WebEnv>")[1].split(
+                                             "</WebEnv>")[0]
+            self.logger.info("Will use webenv %s" % self.webenv)
+            for n in range(num_batches):
+                querylog = self.esearch(self.dbname, self.query_string,
+                                        n*retmax, retmax, '')
+                for line in querylog:
+                    line = line.decode('utf-8')
+                    if '<Id>' in line and '</Id>' in line:
+                        uid = line.split("<Id>")[1].split("</Id>")[0]
+                        self.ids.append(uid)
+                self.logger.info("Retrieved %d UIDs" % len(self.ids))
+
         else:
-            num_batches = 1
-        self.logger.info("Batch size for esearch action: %d UIDs" % retmax)
-        self.logger.info("Number of batches for esearch action: %d " %
-                         num_batches)
-        for n in range(num_batches):
-            querylog = self.esearch(self.dbname, self.query_string, n*retmax,
+            self.logger.info("Batch size for esearch action: %d UIDs" % retmax)
+            self.logger.info("Number of batches for esearch action: 1")
+            querylog = self.esearch(self.dbname, self.query_string, 0,
                                     retmax, '')
             for line in querylog:
+                line = line.decode('utf-8')
                 if '<Id>' in line and '</Id>' in line:
-                    uid = (line[line.find('<Id>')+len('<Id>'):
-                           line.find('</Id>')])
+                    uid = line.split("<Id>")[1].split("</Id>")[0]
                     self.ids.append(uid)
             self.logger.info("Retrieved %d UIDs" % len(self.ids))
+        return self.ids
+
+    def get_sequences(self):
+        batch_size = self.retmax_efetch
+        count = self.count
+        uids_list = self.ids
+        self.logger.info("Batch size for efetch action: %d" % batch_size)
+        self.logger.info("Number of batches for efetch action: %d" %
+                         ((count / batch_size) + 1))
+        with open(self.get_fasta, 'w') as out:
+            for start in range(0, count, batch_size):
+                end = min(count, start+batch_size)
+                batch = uids_list[start:end]
+                self.logger.info("retrieving batch %d" %
+                                 ((start / batch_size) + 1))
+                try:
+                    mfasta = self.efetch(self.dbname, ','.join(batch))
+                    out.write(mfasta + '\n')
+                except QueryException as e:
+                    self.logger.error("%s" % e.message)
+                    raise e
+        request.urlcleanup()
+
+    def efetch(self, db, uid_list):
+        url = self.base + "efetch.fcgi"
+        self.logger.debug("url_efetch: %s" % url)
+        values = {'db': db,
+                  'id': uid_list,
+                  'rettype': "fasta",
+                  'retmode': "text",
+                  'usehistory': self.usehistory,
+                  'WebEnv': self.webenv}
+        data = parse.urlencode(values)
+        req = request.Request(url, data.encode('utf-8'))
+        self.logger.debug("data: %s" % str(data))
+        serverTransaction = False
+        counter = 0
+        response_code = 0
+        while not serverTransaction:
+            counter += 1
+            self.logger.info("Server Transaction Trial:  %s" % (counter))
+            try:
+                self.logger.debug("Going to open")
+                response = request.urlopen(req)
+                self.logger.debug("Going to get code")
+                response_code = response.getcode()
+                self.logger.debug("Going to read, de code was : %s",
+                                  str(response_code))
+                fasta = response.read()
+                self.logger.debug("Did all that")
+                response.close()
+                if((response_code != 200) or
+                   (b"Resource temporarily unavailable" in fasta) or
+                   (b"Error" in fasta) or (not fasta.startswith(b">"))):
+                    serverTransaction = False
+                    if (response_code != 200):
+                        self.logger.info("urlopen error: Response code is not\
+                                         200")
+                    elif ("Resource temporarily unavailable" in fasta):
+                        self.logger.info("Ressource temporarily unavailable")
+                    elif ("Error" in fasta):
+                        self.logger.info("Error in fasta")
+                    else:
+                        self.logger.info("Fasta doesn't start with '>'")
+                else:
+                    serverTransaction = True
+            except error.HTTPError as e:
+                serverTransaction = False
+                self.logger.info("urlopen error:%s, %s" % (e.code, e.read()))
+            except error.URLError as e:
+                serverTransaction = False
+                self.logger.info("urlopen error: Failed to reach a server")
+                self.logger.info("Reason :%s" % (e.reason))
+            except http.client.IncompleteRead as e:
+                serverTransaction = False
+                self.logger.info("IncompleteRead error:  %s" % (e.partial))
+            if (counter > 500):
+                serverTransaction = True
+        if (counter > 500):
+            raise QueryException({"message":
+                                  "500 Server Transaction Trials attempted for\
+                                  this batch. Aborting."})
+        fasta = self.sanitiser(self.dbname, fasta.decode('utf-8'))
+        time.sleep(0.1)
+        return fasta
 
     def esearch(self, db, term, retstart, retmax, rettype):
         url = self.base + "esearch.fcgi"
@@ -145,11 +247,13 @@
                   'term': term,
                   'rettype': rettype,
                   'retstart': retstart,
-                  'retmax': retmax}
-        data = urllib.urlencode(values)
+                  'retmax': retmax,
+                  'usehistory': self.usehistory,
+                  'WebEnv': self.webenv}
+        data = parse.urlencode(values)
         self.logger.debug("data: %s" % str(data))
-        req = urllib2.Request(url, data)
-        response = urllib2.urlopen(req)
+        req = request.Request(url, data.encode('utf-8'))
+        response = request.urlopen(req)
         querylog = response.readlines()
         response.close()
         time.sleep(1)
@@ -160,7 +264,7 @@
             return fastaseq
         regex = re.compile(r"[ACDEFGHIKLMNPQRSTVWYBZ]{49,}")
         sane_seqlist = []
-        seqlist = fastaseq.split("\n\n")
+        seqlist = fastaseq.split('\n\n')
         for seq in seqlist[:-1]:
             fastalines = seq.split("\n")
             if len(fastalines) < 2:
@@ -182,7 +286,7 @@
                                                        [:4]),
                                               fastalines[1]))
                     self.logger.info("%s download is skipped" %
-                                     (fastalines[0].split("|")[:4]))
+                                     fastalines[0].split("|")[:4])
                     continue
                 """ remove spaces and trim the header to 100 chars """
                 fastalines[0] = fastalines[0].replace(" ", "_")[:100]
@@ -202,115 +306,24 @@
         self.logger.info("clean sequences appended: %d" % (len(sane_seqlist)))
         return "\n".join(sane_seqlist)
 
-    def efetch(self, db, uid_list):
-        url = self.base + "efetch.fcgi"
-        self.logger.debug("url_efetch: %s" % url)
-        values = {'db': db,
-                  'id': uid_list,
-                  'rettype': "fasta",
-                  'retmode': "text"}
-        data = urllib.urlencode(values)
-        req = urllib2.Request(url, data)
-        self.logger.debug("data: %s" % str(data))
-        serverTransaction = False
-        counter = 0
-        response_code = 0
-        while not serverTransaction:
-            counter += 1
-            self.logger.info("Server Transaction Trial:  %s" % (counter))
-            try:
-                self.logger.debug("Going to open")
-                response = urllib2.urlopen(req)
-                self.logger.debug("Going to get code")
-                response_code = response.getcode()
-                self.logger.debug("Going to read, de code was : %s",
-                                  str(response_code))
-                fasta = response.read()
-                self.logger.debug("Did all that")
-                response.close()
-                if((response_code != 200) or
-                   ("Resource temporarily unavailable" in fasta) or
-                   ("Error" in fasta) or (not fasta.startswith(">"))):
-                    serverTransaction = False
-                    if (response_code != 200):
-                        self.logger.info("urlopen error: Response code is not\
-                                         200")
-                    elif ("Resource temporarily unavailable" in fasta):
-                        self.logger.info("Ressource temporarily unavailable")
-                    elif ("Error" in fasta):
-                        self.logger.info("Error in fasta")
-                    else:
-                        self.logger.info("Fasta doesn't start with '>'")
-                else:
-                    serverTransaction = True
-            except urllib2.HTTPError as e:
-                serverTransaction = False
-                self.logger.info("urlopen error:%s, %s" % (e.code, e.read()))
-            except urllib2.URLError as e:
-                serverTransaction = False
-                self.logger.info("urlopen error: Failed to reach a server")
-                self.logger.info("Reason :%s" % (e.reason))
-            except httplib.IncompleteRead as e:
-                serverTransaction = False
-                self.logger.info("IncompleteRead error:  %s" % (e.partial))
-            if (counter > 500):
-                serverTransaction = True
-        if (counter > 500):
-            raise QueryException({"message":
-                                  "500 Server Transaction Trials attempted for\
-                                  this batch. Aborting."})
-        fasta = self.sanitiser(self.dbname, fasta)
-        time.sleep(0.1)
-        return fasta
-
-    def get_sequences(self):
-        batch_size = 200
-        count = self.count
-        uids_list = self.ids
-        self.logger.info("Batch size for efetch action: %d" % batch_size)
-        self.logger.info("Number of batches for efetch action: %d" %
-                         ((count / batch_size) + 1))
-        with open(self.outname, 'w') as out:
-            for start in range(0, count, batch_size):
-                end = min(count, start+batch_size)
-                batch = uids_list[start:end]
-                self.logger.info("retrieving batch %d" %
-                                 ((start / batch_size) + 1))
-                try:
-                    mfasta = self.efetch(self.dbname, ','.join(batch))
-                    out.write(mfasta + '\n')
-                except QueryException as e:
-                    self.logger.error("%s" % e.message)
-                    raise e
-        urllib.urlcleanup()
-
-
-LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
-LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
-LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
-
 
 def command_parse():
     parser = argparse.ArgumentParser(description='Retrieve data from NCBI')
-    parser.add_argument('-i', dest='query_string', help='NCBI Query String')
-    parser.add_argument('--UID_list', dest='iuds_file',
-                        help='file containing a list of iuds to be fetched')
-    parser.add_argument('-o', dest='outname', help='output file name')
-    parser.add_argument('-d', dest='dbname', help='database type')
-    parser.add_argument('--count', '-c', dest='count_ids',
-                        action='store_true', default=False,
-                        help='dry run ouputing only the number of sequences\
-                        found')
-    parser.add_argument('--get_uids', '-u', dest='get_uids', default=False,
-                        action='store_true', help='prints to the output a list\
-                        of UIDs')
-    parser.add_argument('-l', '--logfile', help='log file (default=stderr)')
+    parser.add_argument('--query', '-i', dest='query_string',
+                        default=None, help='NCBI Query String')
+    parser.add_argument('--iud_file', dest='iuds_file', default=None,
+                        help='input list of iuds to be fetched')
+    parser.add_argument('--dbname', '-d', dest='dbname', help='database type')
+    parser.add_argument('--fasta', '-F', dest='get_fasta', default=False,
+                        help='file with retrieved fasta sequences')
+    parser.add_argument('--logfile', '-l',  help='log file (default=stderr)')
     parser.add_argument('--loglevel', choices=LOG_LEVELS, default='INFO',
                         help='logging level (default: INFO)')
     args = parser.parse_args()
+
     if args.query_string is not None and args.iuds_file is not None:
-        parser.error('Please choose either fetching the -i query or the -u\
-                     list.')
+        parser.error('Please choose either fetching by query (--query) \
+                      or by uid list (--iud_file)')
     return args
 
 
@@ -325,18 +338,11 @@
         kwargs['filename'] = args.logfile
     logging.basicConfig(**kwargs)
     logger = logging.getLogger('data_from_NCBI')
-
     E = Eutils(args, logger)
-    if args.count_ids:
-        try:
-            E.dry_run()
-        except Exception:
-            sys.exit(-1)
-    else:
-        try:
-            E.retrieve()
-        except Exception:
-            sys.exit(-1)
+    try:
+        E.retrieve()
+    except Exception:
+        sys.exit(-1)
 
 
 if __name__ == "__main__":
--- a/fetch_fasta_from_NCBI.xml	Wed Nov 29 17:38:52 2017 -0500
+++ b/fetch_fasta_from_NCBI.xml	Tue Mar 16 23:26:58 2021 +0000
@@ -1,111 +1,104 @@
-<tool id="retrieve_fasta_from_NCBI" name="Retrieve FASTA from NCBI" version="2.3.0">
-  <description></description>
-  <command><![CDATA[
+<tool id="retrieve_fasta_from_NCBI" name="Retrieve FASTA from NCBI" version="3.0.0">
+    <description></description>
+    <requirements>
+        <requirement type="package" version="1.25.9">urllib3</requirement>
+    </requirements>
+    <command><![CDATA[
     python '$__tool_directory__'/fetch_fasta_from_NCBI.py
-        -i "$queryString"
-        -d $dbname
-        -l '$logfile'
-        -c
-        -o '$outfile';
-    #if $dry_run == ""
-        number_UIDs=\$(tail -n 2 $logfile | perl -ne '/Found (\d+) UID/ && print \$1');
-        python '$__tool_directory__'/fetch_fasta_from_NCBI.py
-            -i "$queryString"
-            -d $dbname
-            -u
-            -l '$logfile'
-            -o 'uid_outfile';
-        UID_array=( \$(head uid_outfile) );
-        array_len=\${#UID_array[@]};
-        counter=0;
-        number_of_groups=\$((array_len / 200000));
-        modulo=\$((array_len % 200000));
-        if [ "\$modulo" -gt 0 ];then
-            number_of_groups=\$((number_of_groups + 1));
-        fi;
-        group_number=1;
-        echo "----- Number of groups of batches: \$number_of_groups -----" >> $logfile;
-        for ((i=0; i+200000<array_len;i+=200000)); do
-            echo "----- Group number: \$group_number -----" >> $logfile;
-            echo "\${UID_array[@]:\$i:99999}" > uid_list_1.txt;
-            echo "\${UID_array[@]:\$((i+100000)):99999}" > uid_list_2.txt;
-            python '$__tool_directory__'/fetch_fasta_from_NCBI.py
-                -d $dbname
-                -l '$logfile'
-                -o 'tmp1_outfile'
-                --UID_list uid_list_1.txt&
-            python '$__tool_directory__'/fetch_fasta_from_NCBI.py
-                -d $dbname
-                -l 'tmp1_logfile'
-                -o 'tmp2_outfile'
-                --UID_list uid_list_2.txt&
-            wait;
-            cat tmp1_outfile tmp2_outfile>> $outfile;
-            rm tmp1_outfile tmp2_outfile;
-            cat tmp1_logfile >> $logfile;
-            rm tmp1_logfile;
-            rm uid_list_1.txt uid_list_2.txt;
-            group_number=\$((group_number + 1));
-            counter=\$(( counter + 200000 ));
-        done;
-        echo "----- Group number: \$group_number -----" >> $logfile;
-        echo "----- Last group -----" >> $logfile;
-        if [ "\$counter" -lt "\$array_len" ]; then
-            echo "\${UID_array[@]:\$counter:\$((array_len - counter + 1))}" > uid_list.txt;
-            python '$__tool_directory__'/fetch_fasta_from_NCBI.py
-                -d $dbname
-                -l '$logfile'
-                -o 'tmp_outfile'
-                --UID_list uid_list.txt;
-            rm uid_list.txt;
-            cat tmp_outfile >> $outfile;
-            rm tmp_outfile;
-        fi;
-    #end if
+        #if $query.option == 'query':
+            --query '$query.queryString'
+        #else:
+            --iud_file '$query.iud_list'
+        #end if
+        --dbname '$dbname'
+        --logfile '$logfile'
+        #if $fetch_option == 'fasta':
+            --fasta $fasta
+        #end if
   ]]></command>
+  <inputs>
 
-  <inputs>
-    <param name="queryString" type="text" size="5x80" area="True" value="txid10239[orgn] NOT txid131567[orgn] AND complete[all] NOT partial[title] NOT phage[title]" label="Query to NCBI in entrez format" help="exemple: Drosophila melanogaster[Organism] AND Gcn5[Title]">
-      <sanitizer>
-        <valid initial="string.printable">
-          <remove value="&quot;"/>
-          <remove value="\"/>
-        </valid>
-        <mapping initial="none">
-          <add source="&quot;" target="\&quot;"/>
-          <add source="\" target="\\"/>
-        </mapping>
-      </sanitizer>
-    </param>
+    <conditional name="query">
+        <param name="option" type="select" label="retrieve data from query or IUD list" display="radio">
+            <option value="query" selected="true">Query string</option>
+            <option value="list">IUD list</option>
+        </param>
+        <when value="query">
+            <param name="queryString" type="text" size="5x80" area="True"
+                   value=""
+                   label="Query to NCBI in entrez format"
+                   help="exemple: `Drosophila melanogaster[Organism] AND Gcn5[Title]`">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&quot;"/>
+                    <remove value="\"/>
+                </valid>
+                <mapping initial="none">
+                    <add source="&quot;" target="\&quot;"/>
+                    <add source="\" target="\\"/>
+                </mapping>
+            </sanitizer>
+            </param>
+        </when>
+        <when value="list">
+            <param name="iud_list" format="txt,tabular" type="data" label="A list of NCBI UIDs"
+                   help="a file with a single column of UIDs, in txt or tabular format"/>
+        </when>
+    </conditional>      
     <param name="dbname" type="select" label="NCBI database">
       <option value="nuccore">Nucleotide</option>
       <option value="protein">Protein</option>
     </param>
-    <param name="dry_run" type="boolean" label="Get only the number of sequences" truevalue="--count" falsevalue="" checked="false"/>
+    <param name="fetch_option" type="select" label="select what will be retrieved">
+      <option value="fasta" selected="true">Fasta and IUDs</option>
+      <option value="justiuds">Only IUDs</option>
+    </param>
   </inputs>
   <outputs>
-    <data name="outfile" format="fasta" label="${tool.name} (${dbname.value_label}) with queryString '${queryString.value}'" >
-      <filter> dry_run == False</filter>
+    <data name="fasta" format="fasta" label="Fasta sequences retrieved from NCBI" >
+      <filter>fetch_option == "fasta"</filter>
     </data>
-    <data format="txt" name="logfile" label="${tool.name}: log"/>
+    <data name="UIDs" format="txt" label="UIDs" from_work_dir="retrieved_uid_list.txt">
+      <filter>query['option'] == "query"</filter>
+    </data>
+    <data format="txt" name="logfile" label="logs"/>
   </outputs>
   <tests>
       <test>
           <param name="queryString" value="9629650[gi]" />
           <param name="dbname" value="nuccore" />
-          <output name="outfilename" ftype="fasta" file="output.fa" />
+          <param name="fetch_option" value="fasta"/>
+          <output name="fasta" ftype="fasta" file="output.fa" />
       </test>
       <test>
           <param name="queryString" value="CU929326[Accession]" />
           <param name="dbname" value="nuccore" />
-          <param name="date_filter" value="1"/>
-          <param name="dry_run" value="True"/>
+          <param name="fetch_option" value="justiuds"/>
           <output name="logfile" ftype="txt" file="dry_run.log" compare="sim_size"/>
       </test>
       <test>
-          <param name="queryString" value="Drosophila[Organism] AND 2014[PDAT] AND virus" />
-          <output name="outfilename" ftype="fasta" >
-              <metadata name="sequences" value="13" />
+          <param name="option" value="list" />
+          <param name="iud_list" value="input_list.txt" ftype="txt" />
+          <param name="dbname" value="nuccore" />
+          <param name="fetch_option" value="fasta"/>
+          <output name="fasta" ftype="fasta" file="output_list.fa"/>
+      </test>
+      <test>
+          <param name="queryString" value="Drosophila[Organism] AND 2017[Modification Date] AND virus" />
+          <param name="dbname" value="nuccore" />
+          <param name="fetch_option" value="fasta"/>
+          <output name="fasta" ftype="fasta" >
+              <metadata name="sequences" value="9" />
+          </output>
+      </test>
+      <test>
+          <param name="queryString" value="labalbalbalbaalablalbabal[Title]" />
+          <param name="dbname" value="nuccore" />
+          <param name="fetch_option" value="justiuds"/>
+          <output name="logfile" ftype="txt">
+              <assert_contents>
+                  <has_line_matching expression=".*Found\s+0\s+UIDs" />
+              </assert_contents>
           </output>
       </test>
   </tests>
@@ -114,7 +107,7 @@
 
 This tool retrieves nucleotide/peptide sequences from the corresponding NCBI database (nuccore or protein) for a given entrez query.
 
-The tool is preset with "txid10239[orgn] NOT txid131567[orgn] AND complete NOT partial[title] NOT phage[title]" for metaVisitor use purpose
+The tool can be set with the query "txid10239[orgn] NOT txid131567[orgn] AND complete NOT partial[title] NOT phage[title]" for metaVisitor use purpose
 
 See `Entrez help`_ for explanation of query formats
 
@@ -126,6 +119,33 @@
 
 Retrieval progress is reported in the log dataset.
 
+**Options**::
+  <![CDATA[
+  usage: fetch_fasta_from_NCBI.py [-h] [--query QUERY_STRING]
+                                  [--iud_file IUDS_FILE] [--output OUTNAME]
+                                  [--dbname DBNAME] [--fasta GET_FASTA]
+                                  [--logfile LOGFILE]
+                                  [--loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL}]
+  
+  Retrieve data from NCBI
+  
+  optional arguments:
+    -h, --help            show this help message and exit
+    --query QUERY_STRING, -i QUERY_STRING
+                          NCBI Query String
+    --iud_file IUDS_FILE  input list of iuds to be fetched
+    --output OUTNAME, -o OUTNAME
+                          output file name
+    --dbname DBNAME, -d DBNAME
+                          database type
+    --fasta GET_FASTA, -F GET_FASTA
+                          retrieve fasta sequences
+    --logfile LOGFILE, -l LOGFILE
+                          log file (default=stderr)
+    --loglevel {DEBUG,INFO,WARNING,ERROR,CRITICAL}
+                          logging level (default: INFO)
+  ]]>
+
 **Acknowledgments**
 
 This Galaxy tool has been adapted from the galaxy tool `get_fasta_from_taxon`_.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_list.txt	Tue Mar 16 23:26:58 2021 +0000
@@ -0,0 +1,14 @@
+320545794
+4426154
+4425786
+4425768
+4425122
+4307732
+4283207
+4283103
+3945034
+3941865
+2287433
+1067145390
+1067137851
+3211727
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_list.fa	Tue Mar 16 23:26:58 2021 +0000
@@ -0,0 +1,256 @@
+>NM_140329.3_Drosophila_melanogaster_Gcn5_acetyltransferase_(Gcn5),_mRNA
+ATCATTGCGCACAGCAAGTGGGCAAACTCGTTGTATCGCTTGTGAAATTTAAGGAAAAGTACACTTTGTT
+CAAAACTAACGAAGAGAATCGCAATATCTGCCGATCTTGGAATGTCTGGTGGTCCATCCATAACGATAAA
+AAGCCAACCAATCGATGGTAACAACACGGGCAACGCTGCGGCGCAACAGCAGCAGCAGGCAGCGAATGGC
+GCAGCAACCGCTGGTGCATCTGGAGCCGCTGGCAGCGCCCAGAATCCAGGACACGGAGGAGCTGCGTCCG
+GCGCTGGCAGTGTGCCGGCAGAGGGAACGCGCCAGAACAGCCTACAGCGCATTCAGCAGCGAAAGCAAAA
+GGTCTTCAATCTGCCCGTGCCACAGAAACTGGCCAAATTGTCGATGTATTCCGCCTGCCAGTCTGAGGGA
+TGCCGTTGCACCGGCTGGAAGACACCGCAGGAAAATCGCCACCGTGACGTCGAGTCCTCCTACTGTCCGG
+AGTTCAACGAGGAGTGCCGCAACACCAGCTGTCGCCATTCGCTGAGATCGCACATAGCCCATCTGGACAA
+TATATCCAGTTCCAGCATGAACGAGCTACTGGGCGCCATTATAGACATGGAGAACCTATTCATGTCCATG
+CAGCGCGTCGAGGACGAGGACACCAAGAAGGTGTACCAGTACCTCTTCCGTTTGCTGCGCCAGTGCGTCC
+TCACGCGCCAGCAGGCGGTCATCCGGGGTCCTCTCGGCGATCCACCATTCGAGACGCCGTGCATCACCAA
+GGCTGTGCTCTCGTTGGTCTTCTACAAGTACAATCACCTGAGCACGCCTGAACTCCAAACCATGACCGAG
+GTGGCCAAGACGTTCTTGAACTTTCTGAACCACTACAACTTTGAATCGCCGTCGACCAGGAGAGGAGATC
+TCACCCACGAGGATGCCTCTAACTACAAAATCAACTACACCAGGTGGTTGGTGTTCTGCCACGTGCCCGC
+CTTCTGTAACTCCCTGCGCCAATGTGAAACGTCCCTAGTGTTTGGACGAACGCTCCTGCGCACCGTTTTC
+CAGTGCATGTCGCAGCAGCTGAAAAAGAAGTGCATCTCCGAGCGGGATCGCTTTCCCGAGGACAAGCGTT
+CGATCATCACGCTGATGCCGAAGTTTTTGGAAACGCTTCGCGCCGAATTGCTCAAGGATGATTCGCCCAT
+CTGGGATACCAGCTACCGCCCGTCAAATTCCTTTGTCATCCAGCAGAGAAAGCGTAACCAGGAGGTGGCA
+AATGTACCCATTGGTCCCAGTGCAGCAAGCATTGGTGGAAACAAGAGGACCAGTGTGGGTGAACCGCTCC
+ACAAGCGAATCAAAAAGGAGCCCACCGATCGTCCAAGCAGTGAGAATTTGGATGACCTGCCTGCGGATGT
+AGTAATGCGCGCCATGAAATCGGTGTCCGAGTCGAAGACCACCAACAAAGCGGAAATTCTCTTTCCGGTT
+AATGTGTCGCGGGATGAGAACGTCAAGGCGGAGGAGCAGAAGCGAGCCATTGAGTTCCATGTGGTGGGCA
+ATTCGCTGACCAAACCGGTGGACAAGCAGACTGTTTTGTGGCTACTGGGCCTGCAGCTCGTATTTGCCTA
+TCAGCTACCCGAGATGCCGCGCGAGTACATCAGCCAGTTGGTTTTCGACACCAAGCACAAAACACTGGCG
+CTCATCAAGGAAAACCAACCCATCGGCGGCATCTGTTTCCGCCCGTTTCCGTCGCAAGGCTTCACCGAGA
+TCGTTTTCTGCGCGGTCACCATGTCGGAACAAGTTAAGGGCTATGGCACGCACTTGATGAACCACTTGAA
+GGACTACAGCATACAGAGGGGTATCAAGCATCTGCTCACATTCGCCGATTGCGATGCCATCGGGTACTTC
+AAGAAGCAGGGTTTCTCCAAGGACATCAAATTGGCGCGACCAGTTTATGCGGGCTACATCAAGGAGTACG
+ATAGTGCTACTCTCATGCACTGCGAGCTGCATCCAAGCATTGTGAACACGCAGTTCATAGCTGTAATTCG
+CAGTCAGAGCGAGATTCTGAAGGAGCTGATCGCACAGCGCCACAACGAGGTGCAGAAAGTAAGACCCGGC
+TTGACTTGCTTCAAGGAGGGCCTGCCCGTGATTCCCGTGGAATCGATTCCTGGTCTGCGGGAGATCGGTT
+GGAAGCCGCAAAATCGTCCGGCGCGCTCATCGCGACCCCTCGAGGAATCCACCGATCCGGAGAAGCTGGC
+CACGTCCTTTGCATCCGTGCTGCAATCCGTGCGCCAGCACACCACCGCCTGGCCCTTCCTGCGCCCGGTG
+ACCGCGGCCGAAGTTCCGGACTACTACGATCACATTAAATATCCCATGGACCTGAAGACCATGGGCGAGC
+GCCTGAAGAAGGGTTACTACCAAACGCGCCGCCTGTTCATGGCGGACATGGCGCGCATTTTCTCCAACTG
+TCGGTTCTACAATTCGCCCGACACCGAGTATTATCGGTGTGCCAACTCCCTGGAGCGCTACTTCCAGACC
+AAGATGCGCGAGCTGGGGCTGTGGGACAAATGATGCAGTGATTCCGAGGAGCCCTGCATAAGGCCGAACA
+TTTAATCGAACTAAATTATTATATTAATTCTATTTATTTTTAGAATTCTCCCCAGCTAAAGAAAAACCCA
+TATAGTTAGTTTGTCGTTATAGATACTTAGGTTAAGCTCAGTGTGTCCGTCTTGTTCACAGACGCGTTGT
+GTCTTTGGGAAGAATAAATCAGATCTATCGCCTACTCATT
+>AI520300.1_LD40419.5prime_LD_Drosophila_melanogaster_embryo_pOT2_Drosophila_melanogaster_cDNA_clone
+GCAAACTCGTTGTATCGCTTGTGAAATTTAAGGAAAAGTACACTTTGTTCAAAACTAACGAAGAGAATCG
+CAATATCTGCCGATCTTGGAATGTCTGGTGGTCCATCCATAACGATAAAAAGCCAACCAATCGATGGTAA
+CAACACGGGCAACGCTGCGGCGCAACAGCAGCAGCAGGCAGCGAATGGCGCAGCAACCGCTGGTGCATCT
+GGAGCCGCTGGCAGCGCCCAGAATCCAGGACACGGAGGAGCTGCGTCCGGCGCTGGCAGTGTGCCGGCAG
+AGGGAACGCGCCAGAACAGCCTACAGCGCATTCAGCAGCGAAAGCAAAAGGTCTTCAATCTGCCCGTGCC
+ACAGAAACTGGCCAAATTGTCGATGTATTCCGCCTGCCAGTCTGAGGGATGCCGTTGCACCGGCTGGAAG
+ACACCGCAAGAAAATCGGCACCGTGACGTCGAGTCCTCCTACTGTCCGGAGTTCAACGAGGAGTGCCGCA
+ACACCAGCTGTCGCCATTCGCTGAGATCGCACATAGCCCATCTGGACAATATATCCAGTTCCAGCATGAA
+CGAGCTACTGGGCGCCATTATAGACATGGAGAACCTATTCATGTCCATGCAGCGCGTCCAGGACGAAGA
+>AI519932.1_LD39784.5prime_LD_Drosophila_melanogaster_embryo_pOT2_Drosophila_melanogaster_cDNA_clone
+CAGCAAGTGGGCAAACTCGTTGTATCGCTTGTGAAATTTAAGGAAAAGTACACTTTGTTCAAAACTAACG
+AAGAGAATCGCAATATCTGCCGATCTTGGAATGTCTGGTGGTCCATCCATAACGATAAAAAGCCAACCAA
+TCGATGGTAACAACACGGGCAACGCTGCGGCGCAACAGCAGCAGCAGGCAGCGAATGGCGCAGCAACCGC
+TGGTGCATCTGGAGCCGCTGGCAGCGCCCAGAATCCAGGACACGGAGGAGCTGCGTCCGGCGCTGGCAGT
+GTGCCGGCAGAGGGAACGCGCCAGAACAGCCTACAGCGCATTCAGCAGCGAAAGCAAAAGGTCTTCAATC
+TGCCCGTGCCACAGAAACTGGCCAAATTGTCGATGTATTCCGCCTGCCAGTCTGAGGGATGCCGTTGCAC
+CGGCTGGAAGACACCGCAGGAAAATCGCCACCGTGACGTCGAGTCCTCCTACTGTCCGGAGTTCAACGAG
+GAGTGCCGCAACACCAGCTGTCGCCATTCGCTGAGATCGCACATAGCCCATCTGGACAATATATCCAGTT
+CCAGCATGAACGAGCTACTGGGCGCCATTATAGACATGGAGAACCTATTCATGTCCATGCAGCGCGTTGA
+GGACGAAGACAC
+>AI519914.1_LD39759.5prime_LD_Drosophila_melanogaster_embryo_pOT2_Drosophila_melanogaster_cDNA_clone
+CAGCAGGTGGGCAAACTCGGTGTATCGCTTGTGAAATTTAAGGAAACGTACACTTTGTTCAAAACTAACG
+AAGAGAATCGCAATATCTGCCGATCTTGGAATGGCTGGCGGTCCATCCATAACGATCAAAAGCCAACCAA
+TCGATGGTAACAACACGGGCAACGCTGCGGCGCAACAGCATCAGCAGGCAGCGAATGGCGCACCAACCGA
+TGGTGCATCTGGAGCCGCTGGCAGCGCCCAGAATCCATGACACGGAGGAGCTGCGTCCGGCGCTGGCAGC
+GTGCCGGCAGAGGGAACGCGCCAGAACAGCCTACAGCGCATTCAGCACCGAAAGCACAAGGTCTTCAATC
+TGCCCGTGCCACAGAAACTGCGCCAAATTGTCGATGTATTCCGCCTGCCACTCTGAAGGATGCCGTTGCA
+CCGGCTGGAAGACACCGCACGAAAATCGCCACCGAGACGTGGAGTCCTCCTACTGTCCGGAGT
+>AI519268.1_LD38790.5prime_LD_Drosophila_melanogaster_embryo_pOT2_Drosophila_melanogaster_cDNA_clone
+ATTTGATGTCCTTGGAGAAACCCTGCTTCTTGAAGTACCCGATGGCATCGCAATCGGCGAATGTGAGCAG
+ATGCTTGATACCCCTCTGTATGCTGTAGTCCTTCAAGTGGTTCATCAAGTGCGTGCCATAGCCCTTAACT
+TGTTCCGACATGGTGACCGCGCAGAAAACGATCTCGGTGAAGCCTTGCGACGGAAACGGGCGGAAACAGA
+TGCCGCCGATGGGTTGGTTTTCCTTGATGAGCGCCAGTGTTTTGTGCTTGGTGTCGAAAACCAACTGGCT
+GATGTACTCGCGCGGCATCTCGGGTAGCTGATAGGCAAATACGAGCTGCAGGCCCAGTAGCCACAAAACA
+GTCTGCTTGTCCACCGGTTTGGTCAGCGAATTGCCCACCACATGGAACTCAATGGCTCGCTTCTGCTCCT
+CCGCCTTGACGTTCTCATCCCGCGACACATTAACCGGAAAGAGAATTTCCGCTTTGTTGGTGGTCTTCGA
+CTCGGACACCGATTTCATGGCGCGCATTACTACATCCGCAGGCAGGTCATCCAAATTCTCACTGCTTGGA
+CGATCGGTGGGCTCCTTTTTGATTCGC
+>AI456262.1_LD36152.5prime_LD_Drosophila_melanogaster_embryo_pOT2_Drosophila_melanogaster_cDNA_clone
+AGGAAAAGTACACTTTGTTCAAAACTAACGAAGAGAATCGCAATATCTGCCGATCTTGGAATGTCTGGTG
+GTCCATCCATAACGATAAAAAGCCAACCAATCGATGGTAACAACACGGGCAACGCTGCGGCGCAACAGCA
+GCAGCAGGCAGCGAATGGCGCAGCAACCGCTGGTGCATCTGGAGCCGCTGGCAGCGCCCAGAATCCAGGA
+CACGGAGGAGCTGCGTCCGGCGCTGGCAGTGTGCCGGCAGAGGGAACGCGCCAGAACAGCCTACAGCGCA
+TTCAGCAGCGAAAGCAAAAGGTCTTCAATCTGCCCGTGCCACAGAAACTGGCCAAATTGTCGATGTATTC
+CGCCTGCCAGTCTGAGGGATGCCGTTGCACCGGCTGGAAGACACCGCAGGAAAATCGCCACCGTGACGTC
+GAGTCCTCCTACTGTCCGGAGTTCAACGAGGAGTGCCGCAACACCAGCTGTCGCCATTCGCTGAGATCGC
+ACATAGCCCATCTGGACAATATATCCAGTTCCAGCATGAACGAGCTACTGGGCGCCATTATAGACATGGA
+GAACCTATTCATGTCCATGCAGCGCGTCGAGGACGAGGACACCAAG
+>AI456802.1_LD36935.5prime_LD_Drosophila_melanogaster_embryo_pOT2_Drosophila_melanogaster_cDNA_clone
+CAGCAAGTGGGCAAACTCGTTGTATCGCTTGTGAAATTTAAGGAAAAGTACACTTTGTTCAAAACTAACG
+AAGAGAATCGCAATATCTGCCGATCTTGGAATGTCTGGTGGTCCATCCATAACGATAAAAAGCCAACCAA
+TCGATGGTAACAACACGGGCAACGCTGCGGCGCAACAGCAGCAGCAGGCAGCGAATGGCGCAGCAACCGC
+TGGTGCATCTGGAGCCGCTGGCAGCGCCCAGAATCCAGGACACGGAGGAGCTGCGTCCGGCGCTGGCAGT
+GTGCCGGCAGAGGGAACGCGCCAGAACAGCCTACAGCGCATTCAGCAGCGAAAGCAAAAGGTCTTCAATC
+TGCCCGTGCCACAGAAACTGGCCAAATTGTCGATGTATTCCGCCTGCCAGTCTGAGGGATGCCGTTGCAC
+CGGCTGGAAGACACCGCAGGAAAATCGCCACCGTGACGTCGAGTCCTCCTACTGTCCGGAGTTCAACGAG
+GAGTGCCGCAACACCAGCTGTCGCCATTCGCTGAGATCGCACATAGCCCATCTGGACAATATATCCAGTT
+CCAGCATGAACGAGCTACTGGGCGCCATTATAGACATGGAGAACCTATTCATGTCCATGCAGCGC
+>AI456789.1_LD36919.5prime_LD_Drosophila_melanogaster_embryo_pOT2_Drosophila_melanogaster_cDNA_clone
+AGAATCGCAATATCTGCCGATCTTGGAATGTCTGGTGGTCCATCCATAACGATAAAAAGCCAACCAATCG
+ATGGTAACAACACGGGCAACGCTGCGGCGCAACAGCAGCAGCAGGCAGCGAATGGCGCAGCAACCGCTGG
+TGCATCTGGAGCCGCTGGCAGCGCCCAGAATCCAGGACACGGAGGAGCTGCGTCCGGCGCTGGCAGTGTG
+CCGGCAGAGGGAACGCGCCAGAACAGCCTACAGCGCATTCAGCAGCGAAAGCAAAAGGTCTTCAATCTGC
+CCGTGCCACAGAAACTGGCCAAATTGTCGATGTATTCCGCCTGCCAGTCTGAGGGATGCCGTTGCACCGG
+CTGGAAGACACCGCAGGAAAATCGCCACCGTGACGTCGAGTCCTCCTACTGTCCGGAGTTCAACGAGGAG
+TGCCGCAACACCAGCTGTCGCCATTCGCTGAGATCGCACATAGCCCATCTGGACAATATATCCAGTTCCA
+GCATGAACGAGCTACTGGGCGCCATTATAGACATGGAGAACCTATTCATGTCCATGCAGCGCGTCGAGGA
+CGAGGACACCAAG
+>AI295627.1_LP09312.5prime_LP_Drosophila_melanogaster_larval-early_pupal_pOT2_Drosophila_melanogaste
+GAGATCTCACCCACGAGGATGCCTCTAACTACAAAATCAACTACACCAGGTGGTTGGTGTTCTGCCACGT
+GCCCGCCTTCTGTAACTCCCTGCGCCAATGTGAAACGTCCCTAGTGTTTGGACGAACGCTCCTGCGCACC
+GTTTTCCAGTGCATGTCGCAGCAGCTGAAAAAGAAGTGCATCTCCGAGCGGGATCGCTTTCCCGAGGACA
+AGCGTTCGATCATCACGCTGATGCCGAAGTTTTTGGAAACGCTTCGCGCCGAATTGCTCAAGGATGATTC
+GCCCATCTGGGATACCAGCTACCGCCCGTCAAATTCCTTTGTCATCCAGCAGAGAAAGCGTAACCAGGAG
+GTGGCAAATGTACCCATTGGTCCCAGTGCAGCAAGCATTGGTGGAAACAAGAGGACCAGTGTGGGTGAAC
+CGCTCCACAAGCGAATCAAAAAGGAGCCCACCGATCGTCCAAGCAGTGAGAATTTGGATGACCTGCCTGC
+GGATGTAGTAATGCGCGCCATGAAATCGGTGTCCGAGTCGAAGACCACCAACAAAGCGGAAATTCTCTTT
+CCGGTTAATGTGTCGCGGGATGAGAACGTCAAG
+>AI292458.1_GH15365.5prime_GH_Drosophila_melanogaster_head_pOT2_Drosophila_melanogaster_cDNA_clone_G
+CAAACTCGTTGTATCGCTTGTGAAATTTAAGGAAAAGTACACTTTGTTCAAAACTAACGAAGAGAATCGC
+AATATCTGCCGATCTTGGAATGTCTGGTGGTCCATCCATAACGATAAAAAGCCAACCAATCGATGGTAAC
+AACACGGGCAACGCTGCGGCGCAACAGCAGCAGCAGGCAGCGAATGGCGCAGCAACCGCTGGTGCATCTG
+GAGCCGCTGGCAGCGCCCAGAATCCAGGACACGGAGGAGCTGCGTCCGGCGCTGGCAGTGTGCCGGCAGA
+GGGAACGCGCCAGAACAGCCTACAGCGCATTCAGCAGCGAAAGCAAAAGGTCTTCAATCTGCCCGTGCCA
+CAGAAACTGGCCAAATTGTCGATGTATTCCGCCTGCCAGTCTGAGGGATGCCGTTGCACCGGCTGGAAGA
+CACCGCAGGAAAATCGCCACCGTGACGTCGAGTCCTCCTACTGTCCGGAGTTCAACGAGGAGTGCCGCAA
+CACCAGCTGTCGCCATTCGCTGAGATCGCACATAGCCCATCTGGACAATATATCCAGTTCCAGCATGAAC
+GAGCTACTGTGCGCCATTATAGACATGGAGAACCTATTCATGTNCATGCAGCGCGTCGAGGACGAGGACA
+CC
+>AA540999.1_LD20744.5prime_LD_Drosophila_melanogaster_embryo_BlueScript_Drosophila_melanogaster_cDNA
+GCAAACTCGTTGTATCGCTTGTGAAATTTAAGGAAAAGTACACTTTGTTCAAAACTAACGAAGAGAATCG
+CAATATCTGCCGATCTTGGAATGTCTGGTGGTCCATCCATAACGATAAAAAGCCAACCAATCGATGGTAA
+CAACACGGGCAACGCTGCGGCGCAACAGCAGCAGCAGGCAGCGAATGGCGCACAACCGCTGGTGCATCTG
+GAGCCGCTGGCAGGCCCAGAATCCAGGACACGGAGGAGCTGCGTCCGGCGCTGGCAGTGTGCCGGCAGAG
+GGAACGCGCCAGAACAGCCTACAGCGCATTCAGCAGCGAAAGCAAAAGGTCTTCAATCTGCCCGTGCCAC
+AGAAACTGGCCAAATTGTCGATGTATTCCGCCTGCCAGTCTGAGGGATGCCGTTGCACCGGCTGGAAGAC
+ACCGCAGGAAAATCGCCACCGTGACGTCGAGTCCTCCTACTGTCCGGAGTTCAACGAGGAGTGCCGCAAC
+ACCAGCTGTCGCCATTCGCTGAGATCGCACATAGCCCATCTGGACAATATATCCAGTTTCAGCATGAACG
+AGCTACTGGGCGCCATTATAGACATGGAGA
+>KX804674.1_Synthetic_construct_clone_BS09434_Gcn5-PA_(Gcn5)_gene,_complete_cds
+GAAGTTATCAGTCGTCATGTCTGGTGGTCCATCCATAACGATAAAAAGCCAACCAATCGATGGTAACAAC
+ACGGGCAACGCTGCGGCGCAACAGCAGCAGCAGGCAGCGAATGGCGCAGCAACCGCTGGTGCATCTGGAG
+CCGCTGGCAGCGCCCAGAATCCAGGACACGGAGGAGCTGCGTCCGGCGCTGGCAGTGTGCCGGCAGAGGG
+AACGCGCCAGAACAGCCTACAGCGCATTCAGCAGCGAAAGCAAAAGGTCTTCAATCTGCCCGTGCCACAG
+AAACTGGCCAAATTGTCGATGTATTCCGCCTGCCAGTCTGAGGGATGCCGTTGCACCGGCTGGAAGACAC
+CGCAGGAAAATCGCCACCGTGACGTCGAGTCCTCCTACTGTCCGGAGTTCAACGAGGAGTGCCGCAACAC
+CAGCTGTCGCCATTCGCTGAGATCGCACATAGCCCATCTGGACAATATATCCAGTTCCAGCATGAACGAG
+CTACTGGGCGCCATTATAGACATGGAGAACCTATTCATGTCCATGCAGCGCGTCGAGGACGAGGACACCA
+AGAAGGTGTACCAGTACCTCTTCCGTTTGCTGCGCCAGTGCGTCCTCACGCGCCAGCAGGCGGTCATCCG
+GGGTCCTCTCGGCGATCCACCATTCGAGACGCCGTGCATCACCAAGGCTGTGCTCTCGTTGGTCTTCTAC
+AAGTACAATCACCTGAGCACGCCTGAACTCCAAACCATGACCGAGGTGGCCAAGACGTTCTTGAACTTTC
+TGAACCACTACAACTTTGAATCGCCGTCGACCAGGAGAGGAGATCTCACCCACGAGGATGCCTCTAACTA
+CAAAATCAACTACACCAGGTGGTTGGTGTTCTGCCACGTGCCCGCCTTCTGTAACTCCCTGCGCCAATGT
+GAAACGTCCCTAGTGTTTGGACGAACGCTCCTGCGCACCGTTTTCCAGTGCATGTCGCAGCAGCTGAAAA
+AGAAGTGCATCTCCGAGCGGGATCGCTTTCCCGAGGACAAGCGTTCGATCATCACGCTGATGCCGAAGTT
+TTTGGAAACGCTTCGCGCCGAATTGCTCAAGGATGATTCGCCCATCTGGGATACCAGCTACCGCCCGTCA
+AATTCCTTTGTCATCCAGCAGAGAAAGCGTAACCAGGAGGTGGCAAATGTACCCATTGGTCCCAGTGCAG
+CAAGCATTGGTGGAAACAAGAGGACCAGTGTGGGTGAACCGCTCCACAAGCGAATCAAAAAGGAGCCCAC
+CGATCGTCCAAGCAGTGAGAATTTGGATGACCTGCCTGCGGATGTAGTAATGCGCGCCATGAAATCGGTG
+TCCGAGTCGAAGACCACCAACAAAGCGGAAATTCTCTTTCCGGTTAATGTGTCGCGGGATGAGAACGTCA
+AGGCGGAGGAGCAGAAGCGAGCCATTGAGTTCCATGTGGTGGGCAATTCGCTGACCAAACCGGTGGACAA
+GCAGACTGTTTTGTGGCTACTGGGCCTGCAGCTCGTATTTGCCTATCAGCTACCCGAGATGCCGCGCGAG
+TACATCAGCCAGTTGGTTTTCGACACCAAGCACAAAACACTGGCGCTCATCAAGGAAAACCAACCCATCG
+GCGGCATCTGTTTCCGCCCGTTTCCGTCGCAAGGCTTCACCGAGATCGTTTTCTGCGCGGTCACCATGTC
+GGAACAAGTTAAGGGCTATGGCACGCACTTGATGAACCACTTGAAGGACTACAGCATACAGAGGGGTATC
+AAGCATCTGCTCACATTCGCCGATTGCGATGCCATCGGGTACTTCAAGAAGCAGGGTTTCTCCAAGGACA
+TCAAATTGGCGCGACCAGTTTATGCGGGCTACATCAAGGAGTACGATAGTGCTACTCTCATGCACTGCGA
+GCTGCATCCAAGCATTGTGAACACGCAGTTCATAGCTGTAATTCGCAGTCAGAGCGAGATTCTGAAGGAG
+CTGATCGCACAGCGCCACAACGAGGTGCAGAAAGTAAGACCCGGCTTGACTTGCTTCAAGGAGGGCCTGC
+CCGTGATTCCCGTGGAATCGATTCCTGGTCTGCGGGAGATCGGTTGGAAGCCGCAAAATCGTCCGGCGCG
+CTCATCGCGACCCCTCGAGGAATCCACCGATCCGGAGAAGCTGGCCACGTCCTTTGCATCCGTGCTGCAA
+TCCGTGCGCCAGCACACCACCGCCTGGCCCTTCCTGCGCCCGGTGACCGCGGCCGAAGTTCCGGACTACT
+ACGATCACATTAAATATCCCATGGACCTGAAGACCATGGGCGAGCGCCTGAAGAAGGGTTACTACCAAAC
+GCGCCGCCTGTTCATGGCGGACATGGCGCGCATTTTCTCCAACTGTCGGTTCTACAATTCGCCCGACACC
+GAGTATTATCGGTGTGCCAACTCCCTGGAGCGCTACTTCCAGACCAAGATGCGCGAGCTGGGGCTGTGGG
+ACAAATGAAAGCATTCTAGACCAT
+>KX802381.1_Synthetic_construct_clone_BS30101_Gcn5-RA_(Gcn5)_gene,_complete_cds
+GAAGTTATCAGTCGACATGTCTGGTGGTCCATCCATAACGATAAAAAGCCAACCAATCGATGGTAACAAC
+ACGGGCAACGCTGCGGCGCAACAGCAGCAGCAGGCAGCGAATGGCGCAGCAACCGCTGGTGCATCTGGAG
+CCGCTGGCAGCGCCCAGAATCCAGGACACGGAGGAGCTGCGTCCGGCGCTGGCAGTGTGCCGGCAGAGGG
+AACGCGCCAGAACAGCCTACAGCGCATTCAGCAGCGAAAGCAAAAGGTCTTCAATCTGCCCGTGCCACAG
+AAACTGGCCAAATTGTCGATGTATTCCGCCTGCCAGTCTGAGGGATGCCGTTGCACCGGCTGGAAGACAC
+CGCAGGAAAATCGCCACCGTGACGTCGAGTCCTCCTACTGTCCGGAGTTCAACGAGGAGTGCCGCAACAC
+CAGCTGTCGCCATTCGCTGAGATCGCACATAGCCCATCTGGACAATATATCCAGTTCCAGCATGAACGAG
+CTACTGGGCGCCATTATAGACATGGAGAACCTATTCATGTCCATGCAGCGCGTCGAGGACGAGGACACCA
+AGAAGGTGTACCAGTACCTCTTCCGTTTGCTGCGCCAGTGCGTCCTCACGCGCCAGCAGGCGGTCATCCG
+GGGTCCTCTCGGCGATCCACCATTCGAGACGCCGTGCATCACCAAGGCTGTGCTCTCGTTGGTCTTCTAC
+AAGTACAATCACCTGAGCACGCCTGAACTCCAAACCATGACCGAGGTGGCCAAGACGTTCTTGAACTTTC
+TGAACCACTACAACTTTGAATCGCCGTCGACCAGGAGAGGAGATCTCACCCACGAGGATGCCTCTAACTA
+CAAAATCAACTACACCAGGTGGTTGGTGTTCTGCCACGTGCCCGCCTTCTGTAACTCCCTGCGCCAATGT
+GAAACGTCCCTAGTGTTTGGACGAACGCTCCTGCGCACCGTTTTCCAGTGCATGTCGCAGCAGCTGAAAA
+AGAAGTGCATCTCCGAGCGGGATCGCTTTCCCGAGGACAAGCGTTCGATCATCACGCTGATGCCGAAGTT
+TTTGGAAACGCTTCGCGCCGAATTGCTCAAGGATGATTCGCCCATCTGGGATACCAGCTACCGCCCGTCA
+AATTCCTTTGTCATCCAGCAGAGAAAGCGTAACCAGGAGGTGGCAAATGTACCCATTGGTCCCAGTGCAG
+CAAGCATTGGTGGAAACAAGAGGACCAGTGTGGGTGAACCGCTCCACAAGCGAATCAAAAAGGAGCCCAC
+CGATCGTCCAAGCAGTGAGAATTTGGATGACCTGCCTGCGGATGTAGTAATGCGCGCCATGAAATCGGTG
+TCCGAGTCGAAGACCACCAACAAAGCGGAAATTCTCTTTCCGGTTAATGTGTCACGGGATGAGAACGTCA
+AGGCGGAGGAGCAGAAGCGAGCCATTGAGTTCCATGTGGTGGGCAATTCGCTGACCAAACCGGTGGACAA
+GCAGACTGTTTTGTGGCTACTGGGCCTGCAGCTCGTATTTGCCTATCAGCTACCCGAGATGCCGCGCGAG
+TACATCAGCCAGTTGGTTTTCGACACCAAGCACAAAACACTGGCGCTCATCAAGGAAAACCAACCCATCG
+GCGGCATCTGTTTCCGCCCGTTTCCGTCGCAAGGCTTCACCGAGATCGTTTTCTGCGCGGTCACCATGTC
+GGAACAAGTTAAGGGCTATGGCACGCACTTGATGAACCACTTGAAGGACTACAGCATACAGAGGGGTATC
+AAGCATCTGCTCACATTCGCCGATTGCGATGCCATCGGGTACTTCAAGAAGCAGGGTTTCTCCAAGGACA
+TCAAATTGGCGCGACCAGTTTATGCGGGCTACATCAAGGAGTACGATAGTGCTACTCTCATGCACTGCGA
+GCTGCATCCAAGCATTGTGAACACGCAGTTCATAGCTGTAATTCGCAGTCAGAGCGAGATTCTGAAGGAG
+CTGATCGCACAGCGCCACAACGAGGTGCAGAAAGTAAGACCCGGCTTGACTTGCTTCAAGGAGGGCCTGC
+CCGTGATTCCCGTGGAATCGATTCCTGGTCTGCGGGAGATCGGTTGGAAGCCGCAAAATCGTCCGGCGCG
+CTCATCGCGACCCCTCGAGGAATCCACCGATCCGGAGAAGCTGGCCACGTCCTTTGCATCCGTGCTGCAA
+TCCGTGCGCCAGCACACCACCGCCTGGCCCTTCCTGCGCCCGGTGACCGCGGCCGAAGTTCCGGACTACT
+ACGATCACATTAAATATCCCATGGACCTGAAGACCATGGGCGAGCGCCTGAAGAAGGGTTACTACCAAAC
+GCGCCGCCTGTTCATGGCGGACATGGCGCGCATTTTCTCCAACTGTCGGTTCTACAATTCGCCCGACACC
+GAGTATTATCGGTGTGCCAACTCCCTGGAGCGCTACTTCCAGACCAAGATGCGCGAGCTGGGGCTGTGGG
+ACAAATGAAAGCTTTCTAGACCAT
+>AF029776.1_Drosophila_melanogaster_histone_acetyltransferase_GCN5_(Gcn5)_mRNA,_complete_cds
+GCAAGTGGGCAAACTCGTTGTATCGCTTGTGAAATTTAAGGAAAAGTACACTTTGTTCAAAACTAACGGA
+GAGAATCGCAATATCTGCCGATCTTGGAATGTCTGGTGGTCCATCCATAACGATAAAAAGCCAACCAATC
+GATGGTAACAACACGGGCAACGCTGCGGCGCAACAGCAGCAGCAGGCAGCGAATGGCGCAGCAACCGCTG
+GTGCATCTGGAGCCGCTGGCAGCGCCCAGAATCCAGGACACGGAGGAGCTGCGTCCGGCGCTGGCAGTGT
+GCCGGCAGAGGGAACGCGCCAGAACAGCCTACAGCGCATTCAGCAGCGAAAGCAAAAGGTTTTCAATCTG
+CCCGTGCCACAGAAACTGGCCAAATTGTCGATGTATTCCGCCTGCCAGTCTGAGGGATGCCGTTGCACCG
+GATGGAAGACACCGCAGGAAAATCGCCACCGTGACGTTGAGTCCTCCTACTGTCCGGAGTTCAACGAGGA
+GTGCCGCAACACCAGCTGTCGCCATTCGCTGAGATCGCACATAGCCCATCTGGACAATATATCCAGTTCC
+AGCATGAACGAGCTACTGGGCGCCATTATAGACATGGAGAACCTATTCATGTCCATGCAGCGCGTCGAGG
+ACGAGGACACCAAGAAGGTGTACCAGTACCTCTTCCGTTTGCTGCGCCAGTGCGTCCTCACGCGCCAGCA
+GGCGGTCATCCGGGGTCCACTCGGCGATCCACCATTCGAGACGCCGTGCATCACCAAGGCAGTGCTCTCG
+TTGGTCTTCTACAAGTACAATCACCTGAGCACGCCTGAACTCCAAACCATGACCGAGGTGGCCAAGACGT
+TCTTGAACTTTCTGAACCACTACAACTTTGAATCGCCGTCGACCAGGAGAGGAGATCTCACCCACGAGGA
+TGCCTCTAACTACAAAATCAACTACACCAGGTGGTTGGTGTTCTGCCACGTGCCCGCCTTCTGTAACTCC
+CTGCGCCAATGTGAAACGTCCCTAGTGTTTGGACGAACGCTCCTGCGCACCGTTTTCCAGTGCGTGTCGC
+AGCAGCTGAAAAAGAAGTGCATCTCCGAGCGGGATCGATTTCCCGAGGACAAGCGTTCGATTATCACGCT
+GATGCCGAAATTTCTGGAAACGCTTCGCGCCGAATTGCTCAAAGATGATTCGCCCATCTGGGATACCAGC
+TACCGCCCATCAAATTCCTTTGTCATCCAGCAGAGAAAACGTAATCAGGAGGTGGCAAGTGTACCCATTG
+GTCCCAGTGCAGCAAGCATTGGTGGAAACAAGAGGACCAGTGTGGGTGAACCGCTCCACAAGCGAATCAA
+AAAGGAGCCCACCGATCGTCCAAGCAGTGAGAATTTGGATGACCTGCCTGCGGATGTAGTTATGCGCGCC
+ATGAAATCGGTGTCCGAGTCGAAGACCACCAACAAAGCGGAAATTCTCTTTCCGGTTAATGTGTCGCGGG
+ATGAGAACGTCAAGGCGGAGGAGCAGAAGCGAGCCATTGAGTTCCATGTGGTGGGCAACTCGCTGACCAA
+ACCGGTGGACAAGCAGACTGTTTTGTGGCTACTGGGCCTGCAGCTCGTATTTGCCTATCAGCTACCCGAG
+ATGCCGCGCGAGTACATCAGCCAGTTGGTCTTCGACACCAAGCACAAAACACTGGCGCTCATCAAGGAAA
+ACCAACCCATCGGCGGCATCTGCTTCCGCCCGTTTCCGTCGCAGGGATTCACCGAGATCGTTTTCTGCGC
+GGTCACCATGTCGGAACAAGTTAAGGGCTATGGCACGCACTTGATGAACCACTTGAAGGACTACAGCATA
+CAGAGGGGTATCAAGCATCTGCTCACATTCGCCGATTGCGATGCCATCGGGTACTTCAAGAAGCAGGGTT
+TCTCCAAGGACGTCAAATTGGCGCGACCAGTTTATGCGGGCTACATCAAGGAGTACGATAGTGCTACTCT
+CATGCACTGCGAGCTGCATCCAAGCATTGTGAACACGCAGTTCATAGCTGTAATTCGCAGTCAGAGCGAG
+ATTCTGAAGGAGCTGATCGCACAGCGCCACAACGAGGTGCAGAAAGTAAGACCCGGCTTGACTTGCTTCA
+AGGAGGGCCTGCCCGTGATTCCCGTGGAATCGATTCCTGGTCTGCGGGAGATCGGTTGGAAGCCGCAAAA
+TCGTCCGGCGCGCTCATCGCGACCCCTCGAGGAATCCACCGATCCGGAGAAGCTGGCCACGTCCTTTGCA
+TCCGTGCTGCAATCCGTGCGCCAGCACACCACCGCCTGGCCCTTCCTGCGCCCGGTGACCGCGGCAGAAG
+TTCCGGACTACTACGATCACATTAAATATCCCATGGACCTGAAGACCATGGGCGAGCGCCTGAAGAAGGG
+TTACTACCAAACGCGCCGCCTGTTCATGGCGGACATGGCGCGCATTTTCTCCAACTGTCGGTTCTACAAT
+TCGCCCGACACCGAGTATTATCGGTGTGCCAACTCCCTGGAGCGCTACTTCCAGACCAAGATGCGCGAGC
+TGGGGCTGTGGGACAAATGATGCAGTGATTCCGAGGAGCCCTGCATAAGGCCGAACATTTAATCGAACTA
+AAATATTATATTAATTCTATTTATTTTTAGAATTCTCTCCAGCTAAAGAAAAACCCGTATAGTTAGTTTG
+TCGTTATAGATACTTAGGTTAAGATCAGTGCGTCCGTCTTGTTCACAGACGCGTTGTGTCTTTGGGAAGA
+ATAAATCAGATCTATCGCCTACTCATTAACCACGAAAAAAAAAAAAAAA