Repository 'ncbi_acc_download'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ncbi_acc_download

Changeset 0:1c58de56d587 (2019-12-04)
Next changeset 1:28587613264f (2021-07-20)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_acc_download commit 6747338e8e02cb87c4f3b9cdea0b761f236a02d1"
added:
macros.xml
ncbi_acc_download.xml
test-data/CP011064.fa
test-data/CP021680.fa
test-data/accessions_1.tsv
b
diff -r 000000000000 -r 1c58de56d587 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Dec 04 07:01:37 2019 -0500
b
@@ -0,0 +1,3 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.2.5</token>
+</macros>
b
diff -r 000000000000 -r 1c58de56d587 ncbi_acc_download.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ncbi_acc_download.xml Wed Dec 04 07:01:37 2019 -0500
[
b'@@ -0,0 +1,313 @@\n+<tool id="ncbi_acc_download" name="NCBI Accession Download" version="@TOOL_VERSION@+galaxy0">\n+    <description>Download sequences from GenBank/RefSeq by accession through the NCBI ENTREZ API</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <requirements>\n+        <requirement type="package" version="@TOOL_VERSION@">ncbi-acc-download</requirement>\n+    </requirements>\n+    <command detect_errors="exit_code"><![CDATA[\n+        #if $query_source.select == "accession_file":\n+            { grep -v "^[ \\t]*$" $query_source.accession_file > accessions ||\n+            { echo "No accession numbers in input. Aborting." 1>&2; exit 1; } } &&\n+        #else if $query_source.select == "accession_list":\n+            echo \'$query_source.accession_list\' | sed -r \'s/(\\,|__cn__)/\\n/g\' | grep -v "^[ \\t]*$" > accessions &&\n+        #end if\n+        mkdir outdir &&\n+        cd outdir &&\n+        ignore_errors=$ignore_failed &&\n+        while read accession; do\n+        echo "Downloading accession number: " \\$accession " ..." >> ../error.log &&\n+        ncbi-acc-download\n+            --molecule \'${molecule.select}\'\n+            --format \'${molecule.format}\'\n+            #if $molecule.format != \'featuretable\' and $molecule.format != \'gff3\':\n+                --extended-validation all\n+            #end if\n+            \\${accession};\n+        failure=\\$?;\n+        if [ \\$failure -ne 0 ]; then\n+            echo " failed." >> ../error.log;\n+            if [ \\$ignore_errors -ne 0 ]; then\n+                echo \\$accession >> ../failed.txt;\n+            else\n+                exit 1;\n+            fi;\n+        else\n+            echo " done." >> ../error.log;\n+        fi;\n+        sleep 2;\n+        done < ../accessions 2> >(tee -a ../error.log >&2);\n+    ]]></command>\n+    <inputs>\n+        <conditional name="query_source">\n+            <param name="select" type="select" label="Select source for IDs">\n+                <option value="accession_file">File containing Accessions (one per line)</option>\n+                <option value="accession_list">Direct Entry</option>\n+            </param>\n+            <when value="accession_file">\n+                <param label="Accession File" name="accession_file" type="data" format="txt,tabular"/>\n+            </when>\n+            <when value="accession_list">\n+                <param label="ID List" name="accession_list" type="text" area="true" help="Newline/Comma separated list of IDs">\n+                    <validator type="expression" message="ID list cannot be empty">value.strip()</validator>\n+                </param>\n+            </when>\n+        </conditional>\n+        <conditional name="molecule">\n+            <param name="select" type="select" label="Molecule Type">\n+                <option value="nucleotide" selected="true">Nucleotide</option>\n+                <option value="protein">Protein</option>\n+            </param>\n+            <when value="nucleotide">\n+                <param name="format" type="select" label="File Format">\n+                    <option value="fasta" selected="true">FASTA</option>\n+                    <option value="genbank">GenBank</option>\n+                    <option value="featuretable">Feature Table</option>\n+                    <option value="gff3">GFF3</option>\n+                </param>\n+            </when>\n+            <when value="protein">\n+                <param name="format" type="select" label="File Format">\n+                    <option value="fasta" selected="true">FASTA</option>\n+                </param>\n+            </when>\n+        </conditional>\n+        <param name="ignore_failed" type="select" display="radio"\n+        label="How to handle download failures">\n+            <option value="0">Abort with error on first failure</option>\n+            <option value="1">Add accession to failed list and continue</option>\n+        </param>\n+    </inputs>\n+    <outputs>\n+        <collection name="output" type="list" label="${tool.'..b'            </output_collection>\n+            <output name="failed_accessions">\n+                <assert_contents>\n+                    <has_line line="CP0XXXXX" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <test>\n+            <conditional name="molecule">\n+                <param name="select" value="nucleotide"/>\n+                <param name="format" value="fasta"/>\n+            </conditional>\n+            <conditional name="query_source">\n+                <param name="select" value="accession_list" />\n+                <param name="accession_list" value="CP0XXXXX"/>\n+            </conditional>\n+            <param name="ignore_failed" value="1" />\n+            <output name="failed_accessions">\n+                <assert_contents>\n+                    <has_line line="CP0XXXXX" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <test expect_failure="true">\n+            <conditional name="molecule">\n+                <param name="select" value="nucleotide"/>\n+                <param name="format" value="fasta"/>\n+            </conditional>\n+            <conditional name="query_source">\n+                <param name="select" value="accession_list" />\n+                <param name="accession_list" value="CP011064,CP0XXXXX,CP021680"/>\n+            </conditional>\n+            <param name="ignore_failed" value="0" />\n+        </test>\n+        <test>\n+            <conditional name="molecule">\n+                <param name="select" value="nucleotide"/>\n+                <param name="format" value="fasta"/>\n+            </conditional>\n+            <conditional name="query_source">\n+                <param name="select" value="accession_list" />\n+                <param name="accession_list" value="CP011064&#10;CP021680"/>\n+            </conditional>\n+            <output_collection name="output" type="list">\n+                <element name="CP011064" ftype="fasta">\n+                    <assert_contents>\n+                        <has_line line=">CP011064.1 Escherichia coli str. Sanji plasmid pSJ_94, complete sequence" />\n+                    </assert_contents>\n+                </element>\n+                <element name="CP021680" ftype="fasta">\n+                    <assert_contents>\n+                        <has_line line=">CP021680.1 Escherichia coli strain AR_0162 plasmid tig00002623, complete sequence" />\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n+        </test>\n+        <test>\n+            <conditional name="molecule">\n+                <param name="select" value="protein"/>\n+                <param name="format" value="fasta"/>\n+            </conditional>\n+            <conditional name="query_source">\n+                <param name="select" value="accession_list" />\n+                <param name="accession_list" value="NP_003192"/>\n+            </conditional>\n+            <output_collection name="output" type="list">\n+                <element name="NP_003192" ftype="fasta">\n+                    <assert_contents>\n+                        <has_line line=">NP_003192.1 transcription factor A, mitochondrial isoform 1 precursor [Homo sapiens]" />\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+**What it does**\n+Given a file containing a list of NCBI accession numbers or a direct entry of accession numbers in the tool text input box, this tool will download the corresponding sequence records via the NCBI API. \n+\n+**Limitations**\n+- For protein sequence downloads, only fasta format is supported\n+- To avoid rate-limits imposed by the NCBI API, records are downloaded sequentially with a delay between requests. This may make it impractical to use this tool to download many (>100) records.\n+\n+**Output**\n+A collection of sequence records in the desired format.\n+    ]]></help>\n+    <citations>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r 1c58de56d587 test-data/CP011064.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CP011064.fa Wed Dec 04 07:01:37 2019 -0500
b
b'@@ -0,0 +1,1356 @@\n+>CP011064.1 Escherichia coli str. Sanji plasmid pSJ_94, complete sequence\n+AGATGTCAGGCATATGGATATGTTCAGCCTATGTTATTTTCTCTCTTTCCCCGTGCAGATCCGGTTTCTG\n+GTGGTTCCGGATCCGCGGATCCTTCGCTGCCGCCCGCTGGTCCGCTTTCACAGGATGTGGTGTCCCGCTC\n+CGCTGACATGACACTGTATGTTGTGTTTCTTCCCGTGTTCCCGAGCCAGTCATCAGCGGCAGAAAAGATC\n+GCTTCGCTTATTCACTGATCCTGATTCACTTCCGGAAAATCCTGAATCCGATAAAAATCGGATCATAGAA\n+AAACAGTCATCCTGTAATTTATCATGGTTACTACATTGCAGGTTATTATTGTCATGATTACTTTTCAGGT\n+CAGTTATTGATGGTGGTGTTAATAATAGCTGGATAACAATATTCCGTTGCCGGAGGCATGTTATAGCGTA\n+ATACTGGTGTATACTTAATGGTGTACAGAGAGTATGTAAGATGAGGTGCATTATGAAGCAGCGCATTACA\n+GTCACCGTCGACAGTGACAGCTATCAGTTGCTCAAGGCATATGATGTCAATATCTCCGGGCTGGTAAGCA\n+CCACCCTGCAGAATGAAGCCCGTCGTCTGCGTGCCGAACGCTGGCAGGCAGAAAATCAGGAAGGGATGGC\n+TGAGGTCGCCCGGTTTATTGAAATGAACGGCTCTTTTGCCGACGAGAACAGGGACTGGTGAAATGCAGTT\n+TAAGGTTTACACCTATAAAAGAGAGAGCCGTTATCGTCTGTTTGTGGATGTCCAGAGTGACATTATTGAC\n+ACGCCTGGGCGACGGATGGTGATCCCCCTGGCCAGTGCACGTCTGTTGTCAGATAAGGTCTCCCGTGAGC\n+TTTACCCGGTGGTGCATGTCGGGGATGAAAGCTGGCGCATGATGACCACCGATATGGCCAGTGTGCCGGT\n+CTCCGTTATCGGGGAGGAAGTGGCTGATCTCAGCCACCGAGAAAATGACATCAAAAATGCCATTAACCTG\n+ATGTTCTGGGGAATATAAATGTCAGGCTCCGTTATACACAGCCAGTCTGCAGCCATGGTACCGGCAGTGT\n+ATTCTGCCGGACAGTCTGCACAACTTCCTGTTGCCATTGATTATCCGGCAGCTCTGGCACTCCGCCAGAT\n+GTCGATGGTTCATGATGAACTGCCGAAATATCTGCTGGCTCCCGAGGTGAGTGCCCTGCTTCATTATGTC\n+CCGGATCTGCGCCGCAAGATGCTGCTGGCCACTCTGTGGAACACCGGCGCGCGTATTAATGAAGCACTAG\n+CGCTGACGCGGGGGGATTTTTCGCTCACGCCTCCGTATCCGTTTGTGCAGCTTGCGACCCTGAAACAACG\n+GACCGAAAAAGCTGCCAGGACGGCGGGGAGAATGCCCGCCGGTCAGCAGACTCACCGGCTGGTTCCGCTC\n+TCTGACTCCTGGTACGTCAGCCAGCTGCAGACGATGGTGGCAACACTGAAAATACCCATGGAGCGGCGTA\n+ACCGTCGCACAGGAAGGACAGAGAAAGCGCGGATCTGGGAAGTGACGGACAGAACGGTCAGGACCTGGAT\n+TGGGGAGGCGGTTGCCGCCGCTGCTGCTGACGGTGTGACGTTCTCTGTTCCGGTCACACCACATACGTTC\n+CGCCATTCCTATGCGATGCACATGCTGTATGCCGGTATACCGCTGAAAGTTCTGCAAAGCCTGATGGGAC\n+ATAAGTCCATCAGTTCAACGGAAGTCTACACGAAGGTTTTTGCGCTGGATGTGGCTGCCCGGCACCGGGT\n+GCAGTTTGCGATGCCGGAGTCTGATGCGGTTGCGATGCTGAAACAATTATCCTGAGAATAAATGCCTTGG\n+CCTTTATATGGAAATGTGGAACTGAGTGGATATGCTGTTTTTGTCTGTTAAACAGAGAAGCTGGCTGTTA\n+TCCACTGAGAAGCGAACGAGACAGTCGGGAAAATCTCCCATTATCGTAGAGATCCGCATTATTAATCTCA\n+GGAGCCTGTGTAGCGTTTATAGGAAGTAGTGTTCTGTCATGATGCCTGCAAGCGGTAACGAAAACGATTT\n+GAATATGCCTTCAGGAACAATAGAAATCTTCGTGCGGTGTTACGTTGAAGTGGAGCGGATTATGTCAGCA\n+ATGGACAGAACAACCTAATGAACACAGAACCATGATGTTTGGTCTGTCCTTTTACAGCCAGTAGTGCTCG\n+CCGCAGTCGAGCGACAGGGCGAAGCCCTCGAGTGAGCGAGGAAGCACCAGGGAACAGCACTTATATATTC\n+TGCTTACACACGATGCCTGAAAAAACTTCCCTTGGGGTTATCCACTTATCCACGGGGATATTTTTATAAT\n+TATTTTTTTTATAGTTTTTAGATCTTCTTTTTTAGAGCGCCTTGTAGGCCTTTATCCATGCTGGTTCTAG\n+AGAAGGTGTTGTGACAAATTGCCCTTAACCCTGTGACAAATTGCCCTCAGAAGAAGCTGTTTTTTCGCAA\n+AGTTATCCCTGCTTATTGACTCTTTTTTATTTAGTGTGACAATCTAAAAACTTGTCACACTTCACATGGA\n+TCTGTCATGGCGGAAACAGCGGTTATCAATCACAAGAAACGTAAAAATAGCCCGCGAATCGTCCAGTCAA\n+ACGACCTCACTGAGGCGGCATATAGTCTCTCCCGGGATCAAAAACGTATGCTGTATCTGTTCGTTGACCA\n+GATCAGAAAATCTGATGGCACCCTACAGGAACATGACGGTATCTGCGAGATCCATGTTGCTAAATATGCT\n+GAAATATTCGGATTGACCTCTGCGGAAGCCAGTAAGGATATACGGCAGGCATTGAAGAGTTTCGCGGGGA\n+AGGAAGTGGTTTTTTATCGCCCTGAAGAGGATGCCGGCGATGAAAAAGGCTATGAATCTTTTCCCTGGTT\n+TATCAAACGTGCGCACAGTCCATCCAGAGGGCTTTACAGTGTACATATCAACCCATATCTCATTCCCTTC\n+TTTATCGGGTTACAGAACCGGTTTACGCAGTTTCGGCTTAGTGAAACAAAAGAAATCACCAATCCGTATG\n+CCATGCGTTTATACGAATCCCTGTGTCAGTATCGTAAGCCGGATGGCTCAGGCATCGTCTCTCTGAAAAT\n+CGACTGGATCATAGAGCGTTACCAGCTGCCTCAAAGTTACCAGCGTATGCCTGACTTCCGCCGCCGCTTC\n+CTGCAGGTCTGTGTTAATGAGATCAACAGCAGAACTCCAATGCGCCTCTCATACATTGAGAAAAAGAAAG\n+GCCGCCAGACGACTCATATCGTATTTTCCTTCCGCGATATCACTTCCATGACGACAGGATAGTCTGAGGG\n+TTATCTGTCACAGATTTGAGGGTGGTTCGTCACATTTATTCTGACCTACTGAGGGTAATTTGTCACAGTT\n+TTGCTGTTTCCTTCAGCCTGCATGGATTTTCTCATACTTTTTGAACTGTAATTTTTAAGGAAGCCAAATT\n+TGAGGGCAGTTTGTCACAGTTGATTTCCTTCTCTTTCCCTTCGTCATGTGACCTGATATCGGGGGTTAGT\n+TCGTCATCATTGATGAGGGTTGATTATCACAGTTTATTACTCTGAATTGGCTATCCGCGTGTGTACCTCT\n+ACCTGGAGTTTTTCCCACGGTGGATATTTCTTCTTGCGCTGAGCGTAAGAGCTATCTGACAGAACAGTTC\n+TTCTTTGCTTCCTCGCCAGTTCGCTCGCTATGCTCGGTTACACGGCTGCGGCGAGCGCTAGTGATAATAA\n+GTGACTGAGGTATGTGCTCTTCTTATCTCCTTTTGTAGTGTTGCTCTTATTTTAAACAACTTTGCGGTTT\n+TTTGATGACTTTGCGA'..b'GCGTCTTATGAGCCTGCTGTCACCCTTTGACG\n+TGGTGATATGGATGACGGATGGCTGGCCGCTGTATGAATCCCGCCTGAAGGGAAAGCTGCACGTAATCAG\n+CAAGCGATATACGCAGCGAATTGAGCGGCATAACCTGAATCTGAGGCAGCACCTGGCACGGCTGGGACGG\n+AAGTCGCTGTCGTTCTCAAAATCGGTGGAGCTGCATGACAAAGTCATCGGGCATTATCTGAACATAAAAC\n+ACTATCAATAAGTTGGAGTCATTACCGCGCTTCCTGGGGGGTTAATGGCTGATGACGATAAAATCATATA\n+TCGAATGATTTATTTCTCTGCCTGTAAATATCAGGCCAGGATTTATTATGCGATTAATGATAAATATCAT\n+ATCATCTGCGGGTTTCCCTCCACGCAGGTCCGGTTTCCGGCGAAACCGGAGCCTCATTTTTCGGCCTGAA\n+GTTTTGTCGTTGACTACTTTTTACCGGCTTGTGCCCAGTTGAGAAGCCGCTGACTTAGTTTCTGAATCTC\n+CGAAGATGGCTTTCCGGAATAAAATAGATGTTGCCCCGTGCCCGTGAGCAGGCCACGTATAGCCTGTTAC\n+GGGATGATGGTGGAAGTGCTGCCAGCTTCTGTTGGATGAGGCGCATCCAGTGGTTGGCCCCCATCACGAT\n+GCAGACGTCCTGAAAGTGGTCGAGTCCTTTACTGCCCCCCCAGTTCATTGAGTGGCATCCGTAACGGTGA\n+TGTTCGCGATAGAAGAGCTTGATCATCGTGTTGTCCGCGTGCAGGGCCGCTGAACGCGCTTCATCCGTGA\n+CGATTTCAATCTGGGTGGTATGTGTTCCATGAGCCGAAATGCGAATGTTCAGTTGTCCGGTAATGAACTC\n+ACACACTGTAGCTGAACAGCGCCATGTCCGGCTGAGCGTCTCACAATCCACCATGATACCCGCAGCCCTG\n+AAGCGAGCCTCGTATCGGGTTATGTCTTCATGCAGCGTAGCATTGACGTTACCGTCGCGGCTTGTGTCAA\n+AGGTATGCTGATAAAAATCCCCGCAGCACAGCACTGAGATTTCCGCGCGGCATAGCTCCAGCAGAAAATT\n+AAAGTCATGGCCGGCAAAGTCCTGGACTTCATCAACGAACAACTCATCGTAGTAACGGGCAAGTCGGGTA\n+CGGATATCTGGCAGCAACCCCCGGGCTGTCAGCAGGTGTGCGAGCCGCCGGTGATACAGCCGTCCGGCGG\n+GATCCTGATAATGCCGGATATTCGTGCGCGGTATTCTGGAAGGGGGCTGGTTGAAGCTCAGACCACGCGA\n+TGACAGTTGCTCCTGCAGGAAAGGGCGAAAACAGAAACCGTGCAGAAACTCAAACCAGGTCATCACACGG\n+ATCCCGTACGGTATGTAGCCAAAGCGTCTGATAATCTGCGCCCGCAGATGTGCTTCATTATTTACGGTGA\n+AGGTCAGGATCAGCGTCCGCCGGTCCTCTCTTAACCGACGTATCAGCAGCGTGGTTTTACCCGAGCCGGC\n+AACCGCAAATATCACTCTCTTATCCATGCCAGTGCCTCCTGGATATAATCAGGAACGGTCAGCTTTTCGG\n+CGTGCAACTGTAACAGCTGGAATGCTGCTTCTGCCTTATTGGCCAGCATGTAATCCTGAACCGTGAGTGT\n+CCGGCGGGTTCCGCGGAAAAGCGCATCACACAGGTCGGCATTATCCTGATACAGGCAGATTTCAAATGTG\n+GAGCGGCTGTTGTCGTGGTCAGCAAATACCCGGGAGCGGGAACAAAGCACGTCAGCATAGCGTTCATCGC\n+AATTTTGCTGATAGTTGCCGTCATTGTCGCGAAGGGCCGCGACACGATTTTCCAGAAGACGGGCCAGTTC\n+AAGATAACGACGGAAACTCGTTCCGCCGATAGCGATGATGTGAACCCCGTCATCCTCCGGTGCCCTGCCA\n+TACAGGCGGTGATAAAAGGCCTCGATCAGAATAAACTCAGCATCCCCCTCCACGAGCAGCACGCGTCTGG\n+CCAGCGCAAATTCCAGCACGTTATTATCCGGTGCTTTCATGAAGAAGGCGGCAGTCTCAGCGGAAAGTTC\n+ATTCATCAGTACCGGCCGGGTAGCTCCGAGGAGAATAGCCTTACGTAGGTCGAGGCGGGAGGAGATATGG\n+CTACTGTGTGTGGCAATAAACACCTGCGTCTGCCGTTCCGTGGCAAGCTGATTAACCAGCCTCTTCATAC\n+TGACATGGCTGAGATGATTCTCCGGCTCCTCAAGCAACAGCGCGTGGATCTCTCCCCCCTGCTGCTGATG\n+CCGCTGCAGCGCAAACTCCGTTTTGATGAAACACTGTCTTCCTTTTCCGCGGTGACGAATGGAAATACCG\n+TCCTCAGTAATGTCCAGGTTCGCCTCAAGACCTGATTTAGCGCCGGAACGCACGCCAAACTGATACGTTT\n+TCAGCGTGTCGTTTATGGCGGACAGATGCCGGGCGCAGAAGTGCATTTTCTGTTGCCGGTAACTGTTTTC\n+AAGCCGGTACCGATCGGCCACCGGCACATTGACGCTGTAAACTGTGCGCGTATACTCCTGTGCGGCATGC\n+TCGTTATCAATGCGTGCACTGTCTAAGAACAGATGGCGCAGATAGCGTCTGAAGCCTGCGAAATGACCAC\n+CGGAGAATGTGCTGAACTGCACGGAATAGTATTCATAGGGAAAATTATCAGGATCCTGCTGCAAAACATG\n+GTGTATATCCTGACCGTATTCCTCCATCATGGGAGCAATGCGCATTCTGAGACCATCCGCATCGATACCC\n+GCCAGATTTTGCCTGCCGTTCAGGTCAGGCTCTCCCCCATTACTCAGGAAAACGTCAGCGGTCAGCACGG\n+GTAACTGATCGGCACGGCGCTCACCTTCCTGAAAATGCCTGACTGCTGACTGTGACAGGAGCGATTCCAC\n+CCCCAGCGCCTCAACCCGATGCCTGCTGTCACTCAGCACAAGGTCAAGAGCCAGCAGAATAGTGCTTTTC\n+CCGGACTCATTGTCCCCAACCAGAATGTTACGATCGTTGCTGAAGCGCAGGTCCAGTTCCGGAAACTTTT\n+TGAAGTTTTGCAGCATAAGCCGCGTGATCAGTGGCATTTTTTTATTCCTTTATATACCGCTGCCTGAAAG\n+AGCGGCCGGCCGCGCCTGACCGCCTGTTTTGTTTTACAGCGTTATCTTACCCAGTCTTCCAGCACAAGAC\n+CCGGTACCCGCTCAAACTCTCTCACATTATTCGTCACCAGAATGGCACAGGCAGCGATGGCGTGTCCGGC\n+TATCGCCGTGTCGTTCGGACCGATCGGCGTGCCGGCGAGACGCAGTGCCACCTTAATCTCCGTGGTCGCA\n+TCCACCGCGGCGCGGTCCCACGGCAGGACGGCATCGAGCCTCTCACAGAACGCGTCAACCAGTTGCACGT\n+GGCGTGGCGAGGCCTTCGGACCGGTGGCACCGAAGCGCATCTCGGAGTAGGTGATGGCCGAGACCACGAT\n+ACGGTGACCGCGCAGCACCGACTGCTCCAGGTGCTTAAGCAATGCTTCAGGTTGCTCGCGCATGATGAAC\n+GAGCAGATGTTCGTGTCGAGCATATAGATCTTGTTCACAGGTTAACTCGTCCTTCGTCGCTGACAACGTC\n+CTCGCGCTCCGTCATAAAGTCCGGATCGGCCTTTTCTAGCTCCAGGAATGAGCCCCAGGTCGGCCGGACG\n+GGGCGCAGAATGATGCTGTCCCCTTCCCGGACGATCTCCAGCTCGCTCACCCCCTCAAAATCCAGATCGC\n+GTGGAAGGCGGATAGCACGGTTGTTGCCATTTTTAAAAATGGATACGGTTCTCATGATTCTCTCCCGGGT\n+AAGTGTGGATATGTGATGCATATCCGGAATGATGTTCATTGCACCGGGTGTGTGCATATGTTTAGTATAG\n+GC\n+\n'
b
diff -r 000000000000 -r 1c58de56d587 test-data/CP021680.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/CP021680.fa Wed Dec 04 07:01:37 2019 -0500
b
b'@@ -0,0 +1,336 @@\n+>CP021680.1 Escherichia coli strain AR_0162 plasmid tig00002623, complete sequence\n+TTCTTGTCGCCTTTGCCGCCGTATTCCCGGATGTCGCCGACAAAAATTTTAAACTACTTCAGCAACAAAT\n+CTTCACGAATGCCAGCTGGTTCTACATCCTTGCTGTGGCCCTGATTTTACTGAGTGTCACGTTCCTTGGA\n+CTCTCACGCTACGGTGATATCAAGCTGGGCCCGGACCATGCGCAGCCTGATTTCAGCTACCACTCCTGGT\n+TTGCGATGCTTTTTTCGGCAGGGATGGGGATCGGCCTGATGTTCTTTGGCGTTGCCGAACCTGTAATGCA\n+TTATCTTTCGCCACCCGTTGGCACTCCAGAAACCGTTGCGGCAGCTAAGGAAGCAATGCGTCTGACCTTT\n+TTCCACTGGGGACTGCACGCATGGGCAATTTATGCCATTGTGGCGCTGATTCTGGCGTTCTTCAGTTACC\n+GTCACGGTCTGCCTTTAACTCTGCGCTCCGCACTCTATCCCATTATTGGCGATCGCATATACGGACCTGT\n+AGGACATGCGGTTGATATTTTCGCTGTTATAGGCACGGTCTTTGGCGTTGCGACATCACTGGGTTACGGT\n+GTTTTGCAGGTGAATGCCGGTTTGAACCATCTTTTCGGGGTGCCCATCAATGAAACGGTGCAGGTAATTC\n+TGATCGTGGTCATCACGGGGTTAGCGACGATTTCAGTGGTGTCCGGTCTGGATAAGGGAATACGTATCCT\n+GTCTGAACTCAATCTGGGTCTGGCTTTGTTGCTCCTGGCGCTGGTCCTGTGTCTGGGACCAACCGTGCTT\n+CTGCTGAAGTCATTTGTGGAAAATACGGGCGGTTATCTTTCGGAACTGGTGAGTAAAACGTTCAACCTTT\n+ACGCGTATGAGCCCAAGTCGAGCAACTGGCTGGGGGGCTGGACATTACTGTACTGGGGATGGTGGCTTTC\n+ATGGTCGCCGTTTGTGGGGATGTTCATCGCACGGGTCTCCCGCGGGCGAACCATTCGCGAGTTTGTCACC\n+GGCGTGCTGTTTGTTCCCGCGGGTTTTACGCTAATGTGGATGACGGTGTTTGGTAACAGCGCGATCTATC\n+TCATTATGAACCAGGGGGCCACAGACCTCGCCAATACTGTTCAGCAGGATGTGGCGCTGGCCCTGTTTAA\n+TTTCCTGGAGCATTTCCCGTTCTCTTCTGTGCTGTCATTCATTGCAATGGCGATGGTCATCGTCTTCTTT\n+GTAACGTCTGCTGATTCGGGGGCAATGGTTGTGGATACTCTGGCATCAGGTGGAGTGGCAAACACACCCG\n+TCTGGCAGCGAATATTCTGGGCCTCGCTCATGGGCATTGTTGCAATTGCGCTTCTCCTTGCCGGAGGGCT\n+AAGTGCGCTGCAAACGGTGACAATAGCGAGTGCATTGCCCTTCTCAGTGATCTTACTAATATCTATATAC\n+GGACTTTTAAAAGCTTTGCGCCGGGATTTGACCAAGCGTGAAAGCCTGAGCATGGCGACAATTGCTCCTA\n+CGGCTGCACGTAACCCAATTCCTTGGCAGAGAAGGTTACGCAATATCGCGTATCTGCCGAAGCGATCTCT\n+TGTGAAACGTTTTATGGACGACGTTATCCAGCCTGCCATGACGCTGGTTCAGGAGGAACTGAACAAGCAG\n+GGGACGATAAGCCACATTAGTGATGCAGTCGACGATCGTATTCGTCTTGAAGTCGATTTGGGCAACGAGC\n+TGAATTTTATATATGAAGTGAGGCTTCGCGGGTATATCTCACCGACCTTCGCGCTCGCCGCAATGGATAA\n+TGATGAGCAGCAGACTGAACAACATCGATATTATCGCGCTGAGGTTTATCTCAAAGAAGGCGGTCAAAAT\n+TATGATGTGATGGGCTGGAACCAGGAACAGCTGATTAATGACATACTGGACCAGTACGAAAAACACCTGC\n+ACTTCCTGCACCTGGTTCGTTAATAGCAACATGCCGTCCTGGGGGCGGCAATTATTATCCCGGCCGCAAT\n+ATGAGGGAATGCAGAATGATTTCACGCTGGAAATGGATGCTGAAGCAGACAATTAAAAAACTATGGTTCA\n+GGGCAACGTTATTCGCAATTGTCGCGATAATAACGGCCCTTTTATCAATTCTTTTTAAATCAATGATACC\n+TGAGTCGGTTTCCGTGAAGGTTGGTGCGGAAGCAGTCGATAACATTCTGAACATACTGGCATCGAGTATG\n+CTGGCAGTGACCACATTTTCGCTGAGTATCATGGTCACAGCCTACGGTTCAGCCACTACTAATGTGACTC\n+CCAGAGCTACGCGTTTAGTTGTTGAAGACGTCACCACACAAAATGTACTGGCCACCTTCATCGGTTCTTT\n+TCTCTTCAGTCTGGTAGGGATTATTGCCCTCAATATGGGAGCTTATGGAGAAAGGGGGAGAGTCATTTTA\n+TTCATTGTCACACTGGTTGTCATTGCCTTAATCCTCATCACATTGCTTCGCTGGATACAGCATTTGACCT\n+CTCTGGGGAGGGTTGGTGAGACAACGGCACAAGTAGAACAGGCGGCCATCGAAACATTTATTGCGAGAGC\n+AAGAAATCCCTGTCTCGGTGGATATCCATGGCTTGAGAACAATGAACAGCCGAAAGGAACGGTTGCAGTT\n+TATCCGAAGAAGATTGGCTATGTTGAATATATTGATATGGTGAAACTTAGCAAGCTGCTGACCAATGATC\n+CCCGTCATGTATACCTCGTGGCGCAGCCAGGCAGTTTCATACATCCGTCCATGCCAGTTTTGTACCTGAG\n+TCAGGGCCAGGAGTCGTCAATCAACACCGATTTACTTGAGACGATTATTGTCTCGGATGCACGTTCATTT\n+GCTCAGGATCCTCGATTTTGTCTTAGCGTCATGGCCGAAATAGCCTGCCGGGCCCTTTCCCCTGCAGTGA\n+ACGATCCTGGAACCGCCATTGATGTCATTGGCAGAGGTGTTCGTATACTTTCCACTTACGCGCAGAATAA\n+ATCTGATGAAATAGAAGTGAAATATCCTTCAGTACATGTTGCACCACTTCAGAATAACGATCTACTGGAA\n+GACTTTTTCTCACCTGTCGCGCGCGATGGTGCCAGTATGAGGGAGATTCAGATAAGAGTCCTTAAAGGAC\n+TGTCGATGCTGAGTAAAGGTTGGCCTGAGATATTTGCTGAAGCCGCACATACCCTGGCATTTGAAACATT\n+AGAGCATGCAACTCGTGCTGACCATATAGATTCTGATAGATATCTAATAAAATCAACTTATTATAATTTA\n+TTTAGTGGCGAATATTCTAATAAAAAAACATAGCTGCAGAGCAAGCAATTCGGTTGTTGGCAAGTCAAGG\n+GCAAAGCATTCGCGTGAGAATGGGCGTATGCTTTGTGCCAAAAGCAAACGTTGCTAATTCTGCACTGTTC\n+AGCGATGGCGCAGTAGAGACAGGCACTGCCACTGTCCAGTCGGCAAAAGCGAACACTTTGCCACTGCGGG\n+GATAAACTGATTGAAGCTCTCGCGCAAAATCACTTATTCTTTTGCGTTAATAAAATGTAGGAGATGGGTC\n+ATTGATCCTACCCACGTAATATGGACACAGGCCTAAGCGAGGTTCTTGTTTTCAAATTGTTCCGGACTGA\n+GGCCGCCACACCAACTGTGCCGCCGCCACCGATTGTAATCACATTCGATATAATTAAACACCGTTGCCCG\n+CATTATTTCCCGGCTGATAAAGTGTTCTCCATGGATACATTCCACTTTCAGCGAATGAAAGAAGCTTTCC\n+ACGCAGGCATTATCGTAGCAGCAACCTTTTGCGCTCATACTTCCACGCAGATTATGCCGCTTCAGTTGCG\n+CCTGATAA'..b'ACAGTATCAGAT\n+TGACAGTATGGGCATGCATGATGATGTTTTTATTGCTGACAATGTTTTTCCTGCCGCCCCCGTATATCGG\n+GTTGCCAGTCTGGTGGTTCTGCCTTCAGAAACGAATCTTTTGGTATGGTGCTGGCAGAAGCATCGGCATT\n+TTCTGTGCCTGTAGTGGCCACTCAGATTGGTGGAATCCCTGAGGTTATTCAGAACAACCAGACCGGGACA\n+TTGTTACCAGCAAGTAATAAGCACGCATGGATGTGCGCCCTGAATGATTTTTTAATGACCCTGGGCGTTT\n+TTATCAGATGGCTCGCCTGGCAAAACAGGATATAGAAGAGCGGTTTGATATTAATAAAACTGCGTTAAAA\n+TACTCACATTAGCGAAGCAAAGTACAATATGTTTCTATAAGTAACTTATCATTTATCGGTTTCCTTAAAA\n+GGATTATTTTTTCCTCAGATTCCTACCGGGGAGTGGAACACAGAAATTTCGGTTCATGTACATTTTGCGA\n+TGTTCTATAAATCCGGTTGTCAGTATTCGATATTATATGAACTGCGTTCCTTGCCGTGCATTGAGGATTC\n+TGGCATACACCCCACGTTGCCAGCAGAATTCATCGCCCTTATTTGCATAAGGGGGGCGCGCCTGGACCGT\n+GGACAATATATTCTTGAGCATTATCGCTTTGTGCAAAATTTGCCAGAAAAATATTCTAATTTTTGTTTCC\n+ACAAAATCGGTATCATTAGTCCATTCATCGGGAAGATGGAGAGAATTTTGATATCCAGTGTTCTCCCAGT\n+GGTTTTGACCGGGAAGGAGAGTTGATGTTGTCATTGTTTACAATAAAACAGTTATTGCCCGGTTAACATT\n+TTCAGTCATTCTGACTCAAATGGACACATTGCTTTTATTGGCGGATTACAGGGAGCCCTAAAAATACCGG\n+ACTGATGTTATTCGGTGCGCAACCCGGGCTGTTATGGGATTTTCCCAAACGGATTATTTTGAAGCTTTTG\n+TGCGTTGATGAAAGCTTGCAATATCTCTGAGTGTCTGGCAGTCAGTGAGCACAGTCATGTTTTCCGGCAA\n+TTGAGATACTGGTATCAGAAACGCAAAACCTTTGTTGCGGTCTATAGTGATTTCTGGGAGTCTGTAGCAG\n+GAAAAACCTGTGGTGACTGGTACAGATTACCAACCCAGGTGATTCGAAAGCCGCTAAGCGATATAGCCAG\n+TAAAAAACGCTCTGAGTACCGAAAACGATACGCTTTGCTGGATTATATCCATGAAACTACTATCAGCTCT\n+TTGGATGCATATCCTGTACACTCAGAACATCAGAATTTAAATTAAGGTTTGATGTATTCTCTGAATAAAA\n+CAGTAATGATAATTTTGGTAGAGGCATTCGCACTAAATAATGAAAAAATATAAATCTGAGTTTATTCCTG\n+AATTTAAGAAAAATTACCTTTCCCCTGTTTACTGGTCTACATGGTTCCTTTTGGGAATGATTGCAGGTAT\n+TTCAATGTTTCCCCCTTCATTCAGAGATCCTGTCTTGGCAAAAATAGGGCGTTGGGCGGGCAGATTGAGC\n+AAAAAAGCTCGTCGCAGGGCGACGATTAATTTATCGCTTTGTTTCCCAGAAAAGAGTGATACAGAACGGG\n+AAATAATTGTCGACAAAATGTTTATCACAGCATTGCAATCCATAGTGATGATGGCAGAACTGGCAATTCG\n+TGGTCCGGAAAAGTTCCAGAAACGAGTGTTCTGGAAAGGCCTCGAAATTCTTGAGGAAATTCGACACAAT\n+AATAGAAATGTGATTTTTCTGGTTCCCCATGGCTGGAGCGTGGATATTCCTGCAATGTTGCTGGCAGCTC\n+AGGGAAAAAAAATGGCCGCCATGTTTCATCAGCAACGAAATCCAGTGATTGATTATGTCTGGAATTCAGT\n+ACGGCGTAAATTCGGGGGGCGCTTACATGCCCGGGAGGATGGGATAAAACCATTTATTCAGTCAGTACGC\n+CAGGGATACTGGGGATATTACCTCCCAGATCAGGATCATGGTCCTGAATACAGTGAATTTGCTGATTTTT\n+TTGCGACCTATAAAGCGACATTACCAATCATTGGACGTCTGATGAACATCAGTCAGGCTATGATTATACC\n+ACTTTTCCCGGTTTATGATGAAAAAAAACATCTACTGACTATTGAAATTCGGCCACCAATGGATGCTTGC\n+ATTGCCAGCGCGGACAATAAAACGATTGCCCGACAAATGAACAAAACAGTGGAAATTTTGGTGGGGCCAC\n+ATCCGGAACAGTATGTCTGGGTTTTAAAATTGTTAAAAACACGCAAATCAAACGAAGCGGACCCGTACCC\n+TTGAAATTATAAAGTACAAGTATTTCCACTAGTTGTTTGTAAGCGTACAGCCTGAACCGTCTGGTCAGAA\n+TCTGACGAATTAGACAAAGTGGTGTCCACCAAATAAGTAGTGGGAACCAATGAGGTAGCCTGAGTTTAAC\n+GGACACTCCTTCCTGAAATAGAATGGCATCAGAAGGAGCTAATAATGAGCAGAAAAACCCAACGTTACTC\n+TAAAGAGTTCAAAGCCGAAGCTGTCAGAACGGTTCTTGAAAATCAACTTTCGATCAGTGAAGGCGCTTCC\n+CGATTATCTCTTCCTGAAGGCACTTTAGGACAATGGGTTACCGCCGCCAGAAAAGGGCTCGGTACTCCTG\n+GTTCCCGCACGGTGGCTGAACTGGAATCTGAAATTCTGCAACTGCGTAAGGCGTTAAATGAAGCTCGCCT\n+TGAGCGAGATATATTAAATTGCACAGGAGTCGCTGAAAAATACGCGTTAATCGAACAATGGCGACAACAA\n+TTTCCCATTGAAGCGATGTGTCAGGTATTTGGTGTATCCAGGAGCGGTTATTACAACTGGGTACAGCATG\n+AACCCTCAGACAGAAAACAAAGTGATGAGCGGCTAAAACTGGAGATTAAGGTGGCACATATCCGCACTCG\n+CGAAACATATGGAACCCGGCGGCTCCAGACGGAGCTGGCAGAGAATGGCATCATCGTTGGTCGTGACCGA\n+CTGGCACGTCTTCGTAAGGAGCTAAGGCTACGCTGTAAGCAGAAACGCAAGTTCAGAGCGACTACGAACT\n+CGAACCACAATCTGCCAGTTGCGCCAAATCTGCTGAACCAGACGTTCGCTCCTACAGCACCAAATCAGGT\n+CTGGGTGGCGGACCTGACGTATGTTGCCACACAGGAGGGATGGTTGTACCTCGCTGGCATCAAAGATGTT\n+TATACGTGCGAAATTGTCGGCTACGCCATGGGAGAGCGCATGACAAAAGAGCTGACAGGTAAAGCCCTGT\n+TTATGGCGCTCAGGAGCCAGCGCCCACCTGCCGGGCTAATCCACCACTCTGATCGAGGTTCACAGTACTG\n+CGCATACGATTACCGGGTCATACAGGAGCAGTTTGGTCTGAAAACATCAATGTCGCGTAAAGGTAACTGT\n+TACGACAACGCTCCGATGGAAAGCTTCTGGGGAACGCTGAAAAATGAGAGCCTGAGCCACTATCGTTTTA\n+ATAACCGGGATGAAGCCATCTCAGTAATACGGGAATACATTGAGATTTTCTACAATCGTCAGCGTCGTCA\n+CTCTCGTCTGGGGAATATCTCCCCGGCAGCCTTCAGGGAAAAATATCATCAGATGGCTGCTTAAAAAAAG\n+AACAAATGGTAGTGTCCGCTATTGCCAGTACACCTCAACATTCCACCATGCATTACGATTAACGCCGCAT\n+AGCCAGTTGAACTTTGCTACTTTGTGAGAGGTAGTACCTTCTATCCAGTGCGAATTTAATTAATGGAATA\n+AATGATTATGAGTGAAAATGATACAATCCCAAAGAAGTCTACAAGTCAGATTAACAAAGCGGTATTCTTT\n+ACATCTGCTTTGCTAATTTTCC\n+\n'
b
diff -r 000000000000 -r 1c58de56d587 test-data/accessions_1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/accessions_1.tsv Wed Dec 04 07:01:37 2019 -0500
b
@@ -0,0 +1,2 @@
+CP011064
+CP021680