Next changeset 1:79cb7620843d (2015-10-28) |
Commit message:
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ |
added:
retrieve_fasta_from_NCBI.py retrieve_fasta_from_NCBI.xml test-data/output.fa |
b |
diff -r 000000000000 -r 0bdc5a73c8d1 retrieve_fasta_from_NCBI.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/retrieve_fasta_from_NCBI.py Sun Jun 21 14:29:45 2015 -0400 |
[ |
b'@@ -0,0 +1,279 @@\n+#!/usr/bin/env python\n+# -*- coding: utf-8 -*-\n+"""\n+From a taxonomy ID retrieves all the nucleotide sequences\n+It returns a multiFASTA nuc/prot file\n+\n+Entrez Database UID common name E-utility Database Name\n+Nucleotide GI number nuccore\n+Protein GI number protein\n+\n+Retrieve strategy:\n+\n+esearch to get total number of UIDs (count)\n+esearch to get UIDs in batches\n+loop untile end of UIDs list:\n+ epost to put a batch of UIDs in the history server\n+ efetch to retrieve info from previous post\n+\n+retmax of efetch is 1/10 of declared value from NCBI\n+\n+queries are 1 sec delayed, to satisfy NCBI guidelines (more than what they request)\n+\n+\n+python get_fasta_from_taxon.py -i 1638 -o test.out -d protein\n+python get_fasta_from_taxon.py -i 327045 -o test.out -d nuccore # 556468 UIDs\n+"""\n+import sys\n+import logging\n+import optparse\n+import time\n+import urllib\n+import urllib2\n+import httplib\n+import re\n+class Eutils:\n+\n+ def __init__(self, options, logger):\n+ self.logger = logger\n+ self.base = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"\n+ self.query_string = options.query_string\n+ self.dbname = options.dbname\n+ if options.outname:\n+ self.outname = options.outname\n+ else:\n+ self.outname = \'NCBI_download\' + \'.\' + self.dbname + \'.fasta\'\n+ self.ids = []\n+ self.retmax_esearch = 100000\n+ self.retmax_efetch = 1000\n+ self.count = 0\n+ self.webenv = ""\n+ self.query_key = ""\n+\n+ def retrieve(self):\n+ """ """\n+ self.get_count_value()\n+ self.get_uids_list()\n+ self.get_sequences()\n+\n+ def get_count_value(self):\n+ """\n+ just to retrieve Count (number of UIDs)\n+ Total number of UIDs from the retrieved set to be shown in the XML\n+ output (default=20). By default, ESearch only includes the first 20\n+ UIDs retrieved in the XML output. If usehistory is set to \'y\',\n+ the remainder of the retrieved set will be stored on the History server;\n+\n+ http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch\n+ """\n+ self.logger.info("retrieving data from %s" % self.base)\n+ self.logger.info("for Query: %s and database: %s" %\n+ (self.query_string, self.dbname))\n+ querylog = self.esearch(self.dbname, self.query_string, \'\', \'\', "count")\n+ self.logger.debug("Query response:")\n+ for line in querylog:\n+ self.logger.debug(line.rstrip())\n+ if \'</Count>\' in line:\n+ self.count = int(line[line.find(\'<Count>\')+len(\'<Count>\') : line.find(\'</Count>\')])\n+ self.logger.info("Founded %d UIDs" % self.count)\n+\n+ def get_uids_list(self):\n+ """\n+ Increasing retmax allows more of the retrieved UIDs to be included in the XML output,\n+ up to a maximum of 100,000 records.\n+ from http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch\n+ """\n+ retmax = self.retmax_esearch\n+ if (self.count > retmax):\n+ num_batches = (self.count / retmax) + 1\n+ else:\n+ num_batches = 1\n+ self.logger.info("Batch size for esearch action: %d UIDs" % retmax)\n+ self.logger.info("Number of batches for esearch action: %d " % num_batches)\n+ for n in range(num_batches):\n+ querylog = self.esearch(self.dbname, self.query_string, n*retmax, retmax, \'\')\n+ for line in querylog:\n+ if \'<Id>\' in line and \'</Id>\' in line:\n+ uid = (line[line.find(\'<Id>\')+len(\'<Id>\') : line.find(\'</Id>\')])\n+ self.ids.append(uid)\n+ self.logger.info("Retrieved %d UIDs" % len(self.ids))\n+\n+ def esearch(self, db, term, retstart, retmax, rettype):\n+ url = self.base + "esearch.fcgi"\n+ self.logger.debug("url: %s" % url)\n+ values = {\'db\': db,\n+ \'term\': term,\n+ '..b'if db == "nuccore":\n+ badnuc = 0\n+ for nucleotide in fastalines[1]:\n+ if nucleotide not in "ATGC":\n+ badnuc += 1\n+ if float(badnuc)/len(fastalines[1]) > 0.4:\n+ self.logger.info("%s ambiguous nucleotides in %s or download interrupted at this offset | %s" % ( float(badnuc)/len(fastalines[1]), "|".join(fastalines[0].split("|")[:4]), fastalines[1]) )\n+ self.logger.info("%s download is skipped" % (fastalines[0].split("|")[:4]) )\n+ continue\n+ fastalines[0] = fastalines[0].replace(" ","_")[:100] # remove spaces and trim the header to 100 chars\n+ cleanseq = "\\n".join(fastalines)\n+ sane_seqlist.append(cleanseq)\n+ elif db == "protein":\n+ fastalines[0] = fastalines[0][0:100]\n+ fastalines[0] = fastalines[0].replace(" ", "_")\n+ fastalines[0] = fastalines[0].replace("[", "_")\n+ fastalines[0] = fastalines[0].replace("]", "_")\n+ fastalines[0] = fastalines[0].replace("=", "_")\n+ fastalines[0] = fastalines[0].rstrip("_") # because blast makedb doesn\'t like it \n+ fastalines[0] = re.sub(regex, "_", fastalines[0])\n+ cleanseq = "\\n".join(fastalines)\n+ sane_seqlist.append(cleanseq)\n+ self.logger.info("clean sequences appended: %d" % (len(sane_seqlist) ) )\n+ return "\\n".join(sane_seqlist)\n+\n+ def get_sequences(self):\n+ """\n+ Total number of records from the input set to be retrieved, up to a maximum\n+ of 10,000. Optionally, for a large set the value of retstart can be iterated\n+ while holding retmax constant, thereby downloading the entire set in batches\n+ of size retmax.\n+ \n+ http://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.EFetch\n+ \n+ """\n+ batch_size = self.retmax_efetch\n+ count = self.count\n+ uids_list = self.ids\n+ self.logger.info("Batch size for efetch action: %d" % batch_size)\n+ self.logger.info("Number of batches for efetch action: %d" % ((count / batch_size) + 1))\n+ with open(self.outname, \'w\') as out:\n+ for start in range(0, count, batch_size):\n+ end = min(count, start+batch_size)\n+ batch = uids_list[start:end]\n+ self.epost(self.dbname, ",".join(batch))\n+ mfasta = \'\'\n+ while not mfasta:\n+ self.logger.info("retrieving batch %d" % ((start / batch_size) + 1))\n+ mfasta = self.efetch(self.dbname, self.query_key, self.webenv)\n+ out.write(mfasta + \'\\n\')\n+\n+\n+LOG_FORMAT = \'%(asctime)s|%(levelname)-8s|%(message)s\'\n+LOG_DATEFMT = \'%Y-%m-%d %H:%M:%S\'\n+LOG_LEVELS = [\'DEBUG\', \'INFO\', \'WARNING\', \'ERROR\', \'CRITICAL\']\n+\n+\n+def __main__():\n+ """ main function """\n+ parser = optparse.OptionParser(description=\'Retrieve data from NCBI\')\n+ parser.add_option(\'-i\', dest=\'query_string\', help=\'NCBI Query String\')\n+ parser.add_option(\'-o\', dest=\'outname\', help=\'output file name\')\n+ parser.add_option(\'-l\', \'--logfile\', help=\'log file (default=stderr)\')\n+ parser.add_option(\'--loglevel\', choices=LOG_LEVELS, default=\'INFO\', help=\'logging level (default: INFO)\')\n+ parser.add_option(\'-d\', dest=\'dbname\', help=\'database type\')\n+ (options, args) = parser.parse_args()\n+ if len(args) > 0:\n+ parser.error(\'Wrong number of arguments\')\n+ \n+ log_level = getattr(logging, options.loglevel)\n+ kwargs = {\'format\': LOG_FORMAT,\n+ \'datefmt\': LOG_DATEFMT,\n+ \'level\': log_level}\n+ if options.logfile:\n+ kwargs[\'filename\'] = options.logfile\n+ logging.basicConfig(**kwargs)\n+ logger = logging.getLogger(\'data_from_NCBI\')\n+ \n+ E = Eutils(options, logger)\n+ E.retrieve()\n+\n+\n+if __name__ == "__main__":\n+ __main__()\n' |
b |
diff -r 000000000000 -r 0bdc5a73c8d1 retrieve_fasta_from_NCBI.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/retrieve_fasta_from_NCBI.xml Sun Jun 21 14:29:45 2015 -0400 |
[ |
@@ -0,0 +1,64 @@ +<tool id="retrieve_fasta_from_NCBI" name="Retrieve FASTA from NCBI" version="0.9.2"> + <description></description> + <command interpreter="python">retrieve_fasta_from_NCBI.py -i "$queryString" -d $dbname -o $outfilename -l $logfile </command> + + <inputs> + <param name="queryString" type="text" size="5x80" area="True" value="txid10239[orgn] NOT txid131567[orgn] AND complete[all] NOT partial[title] NOT phage[title]" label="Query to NCBI in entrez format" help="exemple:'Drosophila melanogaster[Organism] AND Gcn5[Title]"> + <sanitizer> + <valid initial="string.printable"> + <remove value="""/> + <remove value="\"/> + </valid> + <mapping initial="none"> + <add source=""" target="\""/> + <add source="\" target="\\"/> + </mapping> + </sanitizer> + </param> + <param name="dbname" type="select" label="NCBI database"> + <option value="nuccore">Nucleotide</option> + <option value="protein">Protein</option> +<!-- <option value="pubmed">Pubmed (experimental)</option> --> + </param> + </inputs> + <outputs> + <data name="outfilename" format="fasta" label="${tool.name} on ${on_string}: queryString${queryString.value}.${dbname.value_label}.fasta" /> + <data format="txt" name="logfile" label="${tool.name} on ${on_string}: log"/> + </outputs> + <tests> + <test> + <param name="queryString" value="9629650[gi]" /> + <param name="dbname" value="nuccore" /> + <output name="outfilename" ftype="fasta" file="output.fa" /> + <!-- <output name="logfile" ftype="txt" file="log.txt" /> log.txt changes with timestamp. removed to pass the test --> + </test> + </tests> + <help> +**What it does** + +This tool retrieves nucleotide/peptide sequences from the corresponding NCBI database for a given entrez query. + +The tool is preset with "txid10239[orgn] NOT txid131567[orgn] AND complete NOT partial[title] NOT phage[title]" for metaVisitor use purpose + +See `Entrez help`_ for explanation of query formats + +Be sure to use the appropriate NCBI query syntax. Always use [] to specify the search fields. + +Note that the tool may fail in case of interrupted connexion with the NCBI database (see the log dataset) + +**Acknowledgments** + +This Galaxy tool has been adapted from the galaxy tool `get_fasta_from_taxon`_. + +It is Copyright © 2014-2015 `CNRS and University Pierre et Marie Curie`_ and is released under the `MIT license`_. + +.. _Entrez help: http://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Entrez_Searching_Options +.. _get_fasta_from_taxon: https://toolshed.g2.bx.psu.edu/view/crs4/get_fasta_from_taxon +.. _CNRS and University Pierre et Marie Curie: http://www.ibps.upmc.fr/en +.. _MIT license: http://opensource.org/licenses/MIT + + </help> + <citations> + <citation type="doi">10.1186/1471-2105-14-73</citation> + </citations> +</tool> |
b |
diff -r 000000000000 -r 0bdc5a73c8d1 test-data/output.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fa Sun Jun 21 14:29:45 2015 -0400 |
b |
b'@@ -0,0 +1,134 @@\n+>gi|9629650|ref|NC_001834.1|_Drosophila_C_virus,_complete_genome\n+TTTATATCGTGTGTACATATAAATATGTACACACGGCTTTTAGGTAGAATATTGTTTTCAATGTTGATTT\n+TAAAGGTAACTTTGGTTATTATGCTTTACGGTTTTCATTGTTGATGGTATTTGTGGCCTGCGGTCCCTAA\n+TTGTTGAATTATTTATTCTGATACGTTGTTTTCATTGTTGATGGTAAGGATTCTTATTTTGAAGTGGTTT\n+TTCAGAAGATAACTCTAAATATGAATTATGCCTTATTGTTTTCAATGTTGATGGCCTTCGTTTAAATACT\n+CTTTGTTAATGACGGTAATCAAAGATTACATCTCAAACTTAGATTAATATTTTTAAGTAGGGTATACTGA\n+GTTAGTCCTCTCTCTTTACTGATTTTGATATCTGGTAATTGACTTCGAAGAAAGATGCGTCTTTTGGATT\n+TGTAATGACTGGGCCTTAAGTTCATAGGTGTTATTACATGGAGGAACACATTACTTTGGTTGATGATGAT\n+GTTTTGATGATGACTTTCAATGTATGTGCTTATGTTAAGCCTGACATAAGAACTTACTAGTTTGCATAAT\n+GCAAAGGGTTAGTATATGATTTTTAGTATGTGGATTTTGACACTGCCTTTGATTAGGATGTGTGAATGAT\n+TTTGAAACATATTAAGATGTTTATACGAGCGTGTTGTTTACTATTTTCAGGATATGTGGAAGCGGTTGTG\n+TATGATCTATACGCACATTTAGTTCCCAGAGGGCGTTGTCGTCTCCCCCTAAGCAAGGGAGAAACACGTG\n+GCACATGATCTTGCGCTTAACGATAAAAATGGAATCTGATAAAAGTATGGCCTGTTTAAATAGAATTTTG\n+ATGAATAAGATGATGTTTGTGGAAGATAAGATCTCTACCCTTAAGATGGTTGCTGATTATTATCAAAAAG\n+AAGTAAAGTATGATTTTGATGCAGTTGAATCTCCCCGTGAGGCACCTGTATTTAGATGTACTTGTCGATT\n+CCTTGGTTATACCATTATGACTCAAGGCATCGGTAAGAAGAATCCGAAACAGGAAGCTGCACGTCAGATG\n+TTGCTCTTGTTATCAGGAGATGTTGAGACTAACCCTGGACCCGTTCAATCGCGCCCCGTGTATTATCGTT\n+ACAACGACCCTAGATATACACGGTTGGAAAAAGCTATTGAACGTCGAGACGATAAAATTAAAACATTAAT\n+TAAAGAGTTGCGTCGACAAATCAAAAATAGGAAAATTTATTCCCAAGGAATGTTTGATAAATTAACTAAA\n+CAAATTTCTGATGGGATAAAAGATGGTGTTGGCTCTGAACAGATGAATGGAAATTTGACTCGTATTTGTG\n+ATTTCCTAGAGAACACTCTTCCTGGGTTACAAGCAAATATTCAAGCCACTGTGATTGATACAACAGACAA\n+ATATGTTTCTTTAAAAGAGGATATTATGAAGATTGTTTTAGTGATATTGCTTGTTCGTCTTTTAATGGTT\n+TGGAAGAAGTATCGTGCTTCTCTGTGTGTTATTTTAATCTTTATTTTTAAATTTTATGGATTCGATCAAA\n+AGTTGATTGATTTAATTATGGATTTGAAGAATAAAATATTTTCACAGGGTGCATTGGAAGATACAGTTGA\n+GGAGGTTGTATATCATCCTTGGTTCCATACGTGTGGAAAAATCATCTTTGCGGTTATGGCTTTCTTAACA\n+ATTAAGAAAATTCCTGGTAAACAGGATTGGGATAGTTACATAACACGTTTAGATCGTATCCCAAAATCTA\n+TTGAGGGAGCTAAAAAGATCACTGATTACTGTTCAGAATATTTTAATATTGCTAATGATCAGATCAAGAT\n+GATGGTTCTTGGAAAGACTAAAGAAGAATTGCAACGTGCTAATGGACTATATGGAGAAATTCAAGCTTGG\n+GCTCAAGAGGTTCGCCAGTATTTGGAATTGGATCAACGGAATAAAATTGATCTAGATACTGAAACCGCAA\n+ATCGTGTTGAACAACTTTGGATAAAGGGCTTGAAATTCAAGAGTGAACCCCTTTTGAGTAAGGAAATGTC\n+AGCTTTAGTTCATACAACTCTTTTACCAGCTAAGCAATTGTACGAGTATGTATCGTGTTCTCCTGTTAAA\n+GGGGGAGGACCACGTATGCGTCCAATTTGTTTATGGTTGGTAGGTGAATCAGGAGTTGGTAAGACTGAAA\n+TGGTATATCCATTGTGCATTGATGTTCTTCGGGAAATGGGGATGATTAAGAAAGATGATTTTCATCATCA\n+AGTTTATGGTCGTCAAGTTGAAACTGAATTCTGGGATGGTTATAAAGGACAGAAAATTGTCATTTATGAT\n+GATGCATTTCAGAAGAAAGATGACAAAACAGCAGCTAACCCAGAAATTTTTGAGGTTATTCGCTCTTGCA\n+ACACTTTTCCTCAGCATTTACATATGGCAGCTCTTCATGATAAAAATACTTTTTCTGCTGCTGAATTACT\n+CTTATATACCACTAATGATTATAATGTTAAGCTGGAATCTATTACTTTTCCCGATGCTTTCTTTAATCGT\n+ATGGGCGATATGGCTTATAAAGTTAGTCCTAAGAAAGAGTATGGTATTGAAACCGAGAAAGGGAATTCAG\n+GTAAAACTTATTTAAAATTGGATAAGAGTAAATTGGACAAAACAAAAGCTATTGACCTTTCAGTGTATGA\n+ATTCCAAAAAATTGTACGTGACGAGAAAAGTGATGCAGGTTGGATTGATTCTGGATCACCCTTGGACTAT\n+GAAGATTTTGCTAAATTAGTGTGTTCAAAATGGAAAGAAGCGAAACAATCTTCAATGAATAAATTGAAAT\n+TTTTGGAAGAATATGCTATTCGTGCTCAGGTTGGATCAGAAGAAAATTCTGAATATGGTGATTGTATAGA\n+TTTTGTCGATGATATTGCCAAACGCTTACAAAAAGGTGAAACTCTTGAAGAAATAGAGTTTGATTATGCC\n+TCAGATCCAGAGATGTTTACTCAATACTATCATTTTAAATCTACAATTAAACCGGCATCGCGTTGGCAGA\n+AGTATAAGGATCGGATGGACATTTGTTTGAGCGACTGTAAGACTTATTTAGCAAAGAAATACGAAGAAAT\n+TAAGAAAATTCTTGCCGAACATCCTATCTTGACGATTTTAGGAATGATAGGGGTTGCCTTATCTGCTCTG\n+GCAATGTACTATTGGTTTTCTAAATCGTTGGATCCTGTAGAAGCCGAGGTTGCTCCTTCTGGTGACGCTA\n+AAACAGTGCGCTTACCAAGGAAACTCGTTGAGATTGGTGCTTCTGGAGATGTTAAAACACAGAAGATTGT\n+GAAACCCGTTGTAGAGACCGAATGGCATCGTAACAATAAAGGAGAGATTGAAATTTCTTGTGATGAATGT\n+GGTATGCATAGGATGTCTGCATTTAACAATATGACAGATGAAGAATTTGATAACTGTACATATGAAGATT\n+TGAATAAGGACCAGAAACGTGAACTTGCCCAGTGGTCTACTAAAGATTCTTGGTTAGGTCGATTCTTTTT\n+GAGTCGAGATCGCAAGAATAAGGTTGGAATTTGGGCCGAAGTGGGACAATCAGGTGATGTTAAAACAAAT\n+AAAGCTCAGATTAAACGTGTTGAAGCTGGAGCCGAAGAATTAGTTACTGTTGCTTTAACTCAAGGTTGTT\n+CTGATGATGCTGCACACAATTTGATGATTGACGTTTTCCAAAAAAATACATATAGAATGTCATACTTCCG\n+TGGAGACAAGCGTTATCAACTTGGAAATTGTACATTTGTTCGTGGTTGGTCTTTTATTATGCCATATCAT\n+TTTGTACAGGCTGTGTTTGCGCGAAG'..b'TATTTTATGTAT\n+CTGTCTTGGTTTGTGGTCACATTTAGTTCACTCTGTTCATATTTATGAAGATAATGTATATATGTGGACT\n+CATTCTCAACCTTCTGGCAATCCTTTCACTGTTATTATTAATTGCTTGTATAATTCGATTATTATGCGAC\n+TGTCATGGATTCGTGTGATGGAGAAATTTCAACCTAGACTTAAGTCCATGAAGTGGTTCAACGAATATGT\n+CGCCTTGATAACATATGGTGACGACAATGTTTTAAACATTGATGCAAAGGTTGTGGAATGGTTTAATCAG\n+ATTAACATTAGTGAGGTTATGACTGAAATGCGACATGAATATACGGACGAAGCTAAAACTGGTGATATTG\n+TTAAATCTCGTAAATTAGAAGATATTTTCTTTTTGAAGAGAAAATTTCGTTTTAGCCCAGAATTACAACG\n+CCATGTTGCTCCATTGAAGATCGAAGTTATTTATGAAATGTTGAATTGGTCTCGCCGCTCTATAGATCCA\n+GATGAAATCTTGATGTCGAACATTGAAACGGCTTTTCGTGAAGTAGTTTACCACGGAAAAGAAGAATACG\n+ATAAACTAAGGTCAGCGGTATTGGCGTTGAAGGTACCCCAGGAACTTCCTGAAAACCCTCAGATTTTGAC\n+GTACAACCAATATTTGCACGATATTGAATATCTTGCGGACCCTTTGTACGACTTTTAGTTAAGATGTGAT\n+CTTGCTTCCTTATACAATTTTGAGAGGTTAATAAGAAGGAAGTAGTGCTATCTTAATAATTAGGTTAACT\n+ATTTAGTTTTACTGTTCAGGATGCCTATTGGCAGCCCCATAATATCCAGGACACCCTCTCTGCTTCTTAT\n+ATGATTAGGTTGTCATTTAGAATAAGAAAATAACCTGCTAACTTTCAAACAAATAATAATAACATTGAAA\n+ATGAAGATCGGAAAATTACTTCCGAGCAAAAAGAGATTGTACACTTTTCTAGTGAAGGAGTTACCCCTAG\n+TACCACTGCGGTGCCTGATATCGTTAGTCTTTCAACAGATTATTTGTCTATGACTACTCGTGAAGATCGT\n+ATCCACACGATTAAAGATTTTCTTTCTCGTCCAATTATAATTCAAACTGGTCTTTGGTCTTCCGCTACAA\n+CTGCCGAAACTCAATTGTATACTGCTAATTTCCCTGAAGTGTTCATTTCTAATACTATGTATCAAGAAAA\n+GTTGCGTGGGTTCGTGGGTTTGCGAGCAACTTTAGTCATTAAAGTGCAAGTGAATTCCCAACCTTTCCAG\n+CAAGGACGATTGATGCTACAGTATTATCCGTATGCACAGTATATGCCTAACCGTGTTTCTTTGGTGAATT\n+CCACTCTCCAAGGACGCTCTGGTTGTCCTCGAACAGATTTGGATTTGAGCGTTGGTACGGAAGTTGAAAT\n+GCGAATTCCTTATGTGTCCCCTCATGTATATTACAATCTTATTACTGGACAAGGATCATTTGGCGCTATA\n+TATTTGGTTGTATATAGCCAACTAAGAGATCAAGTTACAGGAACAGGTTCTGTTGAATATACTGTTTGGG\n+CTCATTTGGAAGATGTAGATGTGCAATACCCGACCGGTGCAAACATTTTCACGGGTAGCTCTCCAAATTT\n+TGCCTCTTTGGGTCAGAAAATGAGTGATGGAAAATTCACTGAAAAAGACTTGAGAGATATTTGGACTTCA\n+AAAGCGTACAATAAACAACCAGACAAAATTTTCGCACAAGTGGCTTCTGAAATAACACAACTCAAAGAAT\n+CAGGAACAATTAGTTCTGGAATTGGACAAGTTTCTGAAGGTCTTTCTACCATGTCTAAAATCCCTATACT\n+CGGAAATATGTTTACAAAACCCGCCTGGATTTCAGCTCAAGTATCTAATATCTTCAAGATGCTTGGTTTT\n+TCAAAACCCACTGTTCAAGGTCTTCCTTGTGAATCGAAACTGCGTGGTCAAGTTCGAATGGCGAATTTTG\n+ATGGCGCTGATACATCACATAAATTGGCTTTGTCTGCCCAAAACGAAATTGAAACAAAATCTGGACTTTC\n+TGGAACTTCTCCTGATGAAATGGATTTATCACACGTCCTTTCCATACCAAATTTTTGGGATCGTTTTACT\n+TGGAACACAACCGATGCCACTAGTTCTATTTTATGGGATAATTATGTTACACCAATGAAAATTAAACCAT\n+ATTCCTCTACAATATTAGATAGATTTAGATGCACTCATATGGGTTTTGTAGCCAACACACACGGTTATTG\n+GTGTGGATCAATAGTTTATACTTTTAAATTTGTTAAGACTCAATTTCATTCTGGACGTTTACGCATTAGT\n+TTTATTCCATTTTATTATAATACGACTATATCTGCAGGAGTTCCCGATGTTTCTCGTACCCAAAAAGTAA\n+TCGTTGATCTGCGCACCTCTACAGAAGTCTCTTTCACTATTCCGTATGTGTCTTCACGACCTTGGATGTA\n+CTGTATTCGTCCTGAAGCTTCGTGGCTTGGAACCGATAATGCTTTGATGTACAACGCCGTTACGGGTATA\n+GTGAGAGTTGAGGTTCTTAACCAGTTGGTTGCCGCTAACAACGTGTTTCAATCTATAGACACTATTGTTG\n+AAGTTAGTGGTGGTCCTGATTTAACTTTTGCAGCACCAATGGCTCCCTCTTATGTTCCTTATTCTGGAGG\n+TTTTACTTTAGCAGATGATGCGGCAGCAAAGAAACAGCGTGAGGAGGAGTATGACAACAACATACCTCAA\n+ACTATTTCTAATCGTGGAAAACGTGAGGTTGAAGATGCTCGTATTGTTGCGCAAGTAATGGGTGAAGATT\n+TAGCTATTCAAAGAAACGATGCTCAACATGGTGTTCATCCAATGACTATAGACACTCATAAGATCGACTC\n+AAATTGGTCTCCGGAAGCGCATTGTATTGGTGAAAAGATTATGTCTATTCGCCAATTGATTAAGCGTTTT\n+GGCATGGCTTTGAACTCCTTGAATTTGATAAGTGATGCACCAAACACCTTGATAGCACCATTTTCAGTTC\n+AGCACCCAACTCCTGTTGTTGCCCCTGCTGAACCCATGTCCCTTTTTGAATATTATTATTTCATTTATGG\n+ATTTTGGAGAGGTGGCATGAGATTTAAACTTCAGGCAGTACGTACAAACTCAGCAGAAACATCAGTTAAA\n+ACCGACACAACTTGGACTGTAAATTTGTGGAATTCTGTACAAGATTCTTTTAATTCTCTAATTAATGTAT\n+TTAGTACTACTGATTACCCTATAAAATCCACAGGAGCACTTCCAGCCGGAACAAGCGGTTTTGGCAATTC\n+GATGACGTATATAGATCCTGAGGTTGAAGGTTTTATGGAATTTGAGATTCCATATTATAATATCTCCCAT\n+ATTTCTCCAGCTACAACCTATGTTCGTGGTACTGAATCTCCTATTACAATTAATAGTGTCTTGCGTGGAC\n+ATTTGCCACCACAAATTGTGGCTGTTGCACCACAGGGCACTATTGCCACTACAGATGTAGTGAACGCTCA\n+ATTTGCTCGTGCTCCTTCTGACGACTTTTCATTTATGTATCTCGTTGGTGTTCCACCACTTACCAACGTC\n+GCTCGTCCCTAACTCCCTTACTATTCTGGATCCTTTAAAATTTATTAGGATAGACAAAAATTAACTCTAT\n+ATTAGATAGTATTAGATTAAGTTTCTTTTTGGTTTTGGGTTTTATTCAGTAACTATCTGCCCTGCTTACA\n+CGGGTATTATTTTTAATTCTTGTCCCTTCTGGACTCTTTTATTTTGTATTTTCAAAATTTTTACTAATTT\n+TTAGTCAGAGTCCTTAGGGGCTACCAGGTTTTTCGCAATTTTCCTGCTTACTGACAGTAATTGCAATTTC\n+GAATTAAAATAATAGTTGTTTTCT\n' |