Repository 'sistr_cmd'
hg clone https://toolshed.g2.bx.psu.edu/repos/nml/sistr_cmd

Changeset 0:ebee10be4297 (2017-03-01)
Next changeset 1:9d7e381dfa5a (2017-03-03)
Commit message:
planemo upload commit 1ea98fb88a93a571beda5bbd56449c946860a258
added:
sistr_cmd.xml
test-data/13-1101 Paratyphi_B.fasta
test-data/13-1101-Paratyphi_B.fasta
test-data/AE014613-699860.fasta
test-data/alleles-output-13-1101.json
test-data/alleles-output.json
test-data/cgmlst-profiles-13-1101.csv
test-data/cgmlst-profiles.csv
test-data/novel-alleles-13-1101.fasta
test-data/novel-alleles.fasta
test-data/sistr-results-13-1101.tab
test-data/sistr-results.tab
b
diff -r 000000000000 -r ebee10be4297 sistr_cmd.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sistr_cmd.xml Wed Mar 01 12:35:39 2017 -0500
[
b'@@ -0,0 +1,331 @@\n+<tool id="sistr_cmd" name="sistr_cmd" version="0.3.4">\n+  <description>\n+    Salmonella In Silico Typing Resource commandline tool for serovar prediction\n+  </description>\n+  <requirements>\n+    <requirement type="package" version="0.3.4">sistr_cmd</requirement>\n+  </requirements>\n+  <stdio>\n+    <exit_code range="1:" />\n+  </stdio>\n+  <command><![CDATA[\n+  sistr \n+    #for $fasta in $input_fastas\n+      -i \'$fasta\' \'${$fasta.name.replace("." + $fasta.ext, "")}\'\n+    #end for\n+    -f $output_format\n+    #if $output_format == "tab"\n+      -o sistr-report.tab\n+    #elif $output_format == "csv"\n+      -o sistr-report.csv\n+    #elif $output_format == "json"\n+      -o sistr-report.json\n+    #end if\n+    -p $cgmlst_profiles\n+    -n $novel_alleles\n+    -a $alleles_output\n+    $use_full_cgmlst_db\n+    $no_cgmlst\n+    $run_mash\n+    $qc\n+    --threads "\\${GALAXY_SLOTS:-1}"\n+    -T "\\${TMPDIR:-/tmp}"\n+    $keep_tmp\n+    $verbosity\n+  ]]></command>\n+  <inputs>\n+    <param \n+      name="input_fastas" \n+      type="data" \n+      label="Input Genome(s)" \n+      optional="false" \n+      multiple="true"\n+      format="fasta"\n+      />\n+    <param \n+      name="output_format" \n+      type="select" \n+      label="Results output format"\n+      multiple="false">\n+      <option value="tab" selected="true">\n+        Tabular (tab-delimited values)\n+      </option>\n+      <option value="csv">\n+        CSV (Comma Separated Values)\n+      </option>\n+      <option value="json">\n+        JSON (JavaScript Object Notation)\n+      </option>\n+    </param>\n+    <param \n+      name="use_full_cgmlst_db"\n+      type="boolean"\n+      checked="false"\n+      truevalue="--use-full-cgmlst-db"\n+      falsevalue=""\n+      label="Use full cgMLST database for serovar prediction. About 10X slower with equivalent results to reduced centroid allele database."\n+      />\n+    <param \n+      name="run_mash"\n+      type="boolean"\n+      checked="true"\n+      truevalue="--run-mash"\n+      falsevalue=""\n+      label="Run Mash MinHash-based serovar prediction"\n+      />\n+    <param \n+      name="no_cgmlst"\n+      type="boolean"\n+      checked="false"\n+      truevalue="--no-cgmlst"\n+      falsevalue=""\n+      label="Skip running cgMLST-based serovar prediction"\n+      />\n+    <param \n+      name="qc"\n+      type="boolean"\n+      checked="true"\n+      truevalue="--qc"\n+      falsevalue=""\n+      label="Basic QC of results"\n+      />\n+    <param \n+      name="keep_tmp"\n+      type="boolean"\n+      checked="false"\n+      falsevalue=""\n+      truevalue="--keep-tmp"\n+      label="Keep temporary analysis directory"\n+      />\n+    <param \n+      name="verbosity"\n+      type="select" \n+      label="Logging verbosity">\n+      <option value="">\n+        Error messages only\n+      </option>\n+      <option value="-v">\n+        Show warning messages\n+      </option>\n+      <option value="-vv" selected="true">\n+        Show info messages\n+      </option>\n+      <option value="-vvv">\n+        Show debug messages\n+      </option>\n+    </param>\n+  </inputs>\n+  <outputs>\n+    <data \n+      name="output_prediction_csv" \n+      format="csv" \n+      label="SISTR Results"\n+      from_work_dir="sistr-report.csv">\n+      <filter>output_format == "csv"</filter>\n+    </data>\n+    <data \n+      name="output_prediction_json" \n+      format="json" \n+      label="SISTR Results"\n+      from_work_dir="sistr-report.json">\n+      <filter>output_format == "json"</filter>\n+    </data>\n+    <data \n+      name="output_prediction_tab" \n+      format="tabular" \n+      label="SISTR Results"\n+      from_work_dir="sistr-report.tab">\n+      <filter>output_format == "tab"</filter>\n+    </data>\n+    <data \n+      name="cgmlst_profiles" \n+      format="csv" \n+      label="cgMLST results" />\n+    <data\n+      name="novel_alleles"\n+      format="fasta" \n+      label="Novel cgMLST alleles" />\n+    <data \n+      name="alleles_output"\n+      format="json"\n+      label="cgMLST allele match results" />\n+  </outputs'..b' \n+        value="alleles-output-13-1101.json"\n+        ftype="json"\n+        compare="sim_size"/>\n+    </test>\n+  </tests>\n+  <help>\n+  <![CDATA[\n+\n+Usage::\n+\n+    usage: sistr_cmd [-h] [-i fasta_path genome_name] [-f OUTPUT_FORMAT]\n+                     [-o OUTPUT_PREDICTION] [-p CGMLST_PROFILES]\n+                     [-n NOVEL_ALLELES] [-a ALLELES_OUTPUT] [-T TMP_DIR] [-K]\n+                     [--use-full-cgmlst-db] [--no-cgmlst] [-m] [--qc] [-t THREADS]\n+                     [-v] [-V]\n+                     [F [F ...]]\n+\n+    SISTR (Salmonella In Silico Typing Resource) Command-line Tool\n+    ==============================================================\n+    Serovar predictions from whole-genome sequence assemblies by determination of antigen gene and cgMLST gene alleles using BLAST.\n+\n+    Note about using the "--use-full-cgmlst-db" flag:\n+        The "centroid" allele database is ~10% the size of the full set so analysis is much quicker with the "centroid" vs "full" set of alleles. Results between 2 cgMLST allele sets should not differ.\n+\n+    If you find this program useful in your research, please cite as:\n+\n+    The Salmonella In Silico Typing Resource (SISTR): an open web-accessible tool for rapidly typing and subtyping draft Salmonella genome assemblies.\n+    Catherine Yoshida, Peter Kruczkiewicz, Chad R. Laing, Erika J. Lingohr, Victor P.J. Gannon, John H.E. Nash, Eduardo N. Taboada.\n+    PLoS ONE 11(1): e0147101. doi: 10.1371/journal.pone.0147101\n+\n+    positional arguments:\n+      F                     Input genome FASTA file\n+\n+    optional arguments:\n+      -h, --help            show this help message and exit\n+      -i fasta_path genome_name, --input-fasta-genome-name fasta_path genome_name\n+                            fasta file path to genome name pair\n+      -f OUTPUT_FORMAT, --output-format OUTPUT_FORMAT\n+                            Output format (json, csv, pickle)\n+      -o OUTPUT_PREDICTION, --output-prediction OUTPUT_PREDICTION\n+                            SISTR serovar prediction output path\n+      -p CGMLST_PROFILES, --cgmlst-profiles CGMLST_PROFILES\n+                            Output CSV file destination for cgMLST allelic\n+                            profiles\n+      -n NOVEL_ALLELES, --novel-alleles NOVEL_ALLELES\n+                            Output FASTA file destination of novel cgMLST alleles\n+                            from input genomes\n+      -a ALLELES_OUTPUT, --alleles-output ALLELES_OUTPUT\n+                            Output path of allele sequences and info to JSON\n+      -T TMP_DIR, --tmp-dir TMP_DIR\n+                            Base temporary working directory for intermediate\n+                            analysis files.\n+      -K, --keep-tmp        Keep temporary analysis files.\n+      --use-full-cgmlst-db  Use the full set of cgMLST alleles which can include\n+                            highly similar alleles. By default the smaller\n+                            "centroid" alleles or representative alleles are used\n+                            for each marker.\n+      --no-cgmlst           Do not run cgMLST serovar prediction\n+      -m, --run-mash        Determine Mash MinHash genomic distances to Salmonella\n+                            genomes with trusted serovar designations. Mash binary\n+                            must be in accessible via $PATH (e.g. /usr/bin).\n+      --qc                  Perform basic QC to provide level of confidence in\n+                            serovar prediction results.\n+      -t THREADS, --threads THREADS\n+                            Number of parallel threads to run sistr_cmd analysis.\n+      -v, --verbose         Logging verbosity level (-v == show warnings; -vvv ==\n+                            show debug info)\n+      -V, --version         show program\'s version number and exit\n+]]>\n+  \n+  </help>\n+  <citations>\n+    <!-- Citation for SISTR PLOS ONE paper -->\n+    <citation type="doi">10.1371/journal.pone.0147101</citation>\n+  </citations>\n+</tool>\n'
b
diff -r 000000000000 -r ebee10be4297 test-data/13-1101 Paratyphi_B.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/13-1101 Paratyphi_B.fasta Wed Mar 01 12:35:39 2017 -0500
b
b'@@ -0,0 +1,126 @@\n+>10664704\n+CAATTCACGAATAACAGAGGTGATCTTTCTACCGAATTTAAAATAGAGCTCAATAGCACGAAGGCGCTCAGCTTTTGAATACATAAATTATCTCCTGAGAGTCCAGGAAATCGTCCGCACCCCCATTACCGTCTTATATCATTGGCGCTAACACCCGGGATAACGGAAGCCGCCTGCTGGGCTCATGCCCGCCGGAAGATCCACGATGTGCACGTCCGCACCCCGTCAGCTCTGACGGAAGAAGCCCTGAGGCAGATCGGTCAGTTGTACGCCATAGAGGCGAATATAAGGGGAATACCGGCGGAGCAGCGGCTTGCTGAACGTCAGCGAAAAACGAAACCGCTGCTGAAATCCCTGGAAAGCTGGTTGCGTGAAAAGATGAAGACCCTGTCGCGACACTCAGAGCTGGCGAAAGCGTTCGCGTACGCACTTAACCAGTGGCCGGCACTGACGTACTATGCGGACGATGGCTGGGCCGAAGCAGACAACAACAGTGCTGAAAATGCGCTGCGGACGGTCAGTCTGGGTCGTAAAAACTTCCTGTTCTTCGGCTCTGACCATGGTGGTGAGCGGGGAGCGCTGCTGTACAGCCTGATCGGGACGTGCAAACTGAACGGCGTGGGTCCAGAAAGCTACCTTCGCCATGTGCTTGGCGTCATAGCTGACTGGCCGGTCAACCGGGTCAGCGAACTGCTCCCCTGGCGCATAGCACTGCCAGCTGAATAACGCATCCCTGTCAATACGGCTCTCGCTGTACGCTTACGTTGAAGCAAGAAAGTAACATTAACTGGTGAGGCAACACGTTGGATAATATAAATGACGCCGGGTTATGCGAGTTTTCTTTCCTGTTGAATTTGAATCCGTGCATTTTTATTCAGTGCCTGAATAATACAGCTATAAAATCTGAAAAAAGTGAAAGTAATATGCCCGGGGCACTGGAAAAGCCGGTTACTCCCGGACATTAAGGCTCATAAATCAGTTAAGATCGTGAAGAATGCATATTAACGGCGCGTGGTGGTGGTTATTTTTGCCATCCTTTAAAAATAATTCTGTCCTGCTTTTCGATAACGACATGCTTCTCTATGCGAACATTTAAATACTGAGACGTGACCTGAAAACGACCTCTTGGTGCTTTAGATTGTGTGGATAACTGAGCGCCGGCATAGGCG\n+>10664706\n+ACGATCAACAACCTGTTTGACTGCTTCAATTTTAAACTCTTCGGGATAACGTTTACCGCTCATGGGCACCTCTCTTTAAGTCATTTTAAATGACTCTGAGGTGTCTGTTAAACCCGTGGCGATTCAATCAGAGTAATTTAGTTCTATCGATATTTTTCGCACCACTGGTTCAGGATACAAAGGTGGCAATTGACGATGATCGCCGACCAGTAACACCCTGCGACCCGACTGAATTGCTATAGCCAATTCGCCAGGTGTCGCACGAGCAGCTTCGTCGACTATCACCCAATCATAGCGATTTTTTGCTACACCAAATTGTGAGCGTCCTAGACCCACACAAGTCCCACATACAAGTGATCGGGTCTTCGCAAGAAATTCTTCAAAGTTACCTCTCAGAGTGCCTAAGCGATCTACCCACTCTTGAGACATTGCAATTACTTGCTCCAGTCTAGACACTGCATCTAAAGAAGTTACACCAAACTGATTCATTGTCTCACGGCTTAATTGATTAATCACTTCCTCTGGAGTTCCCTCCCCCGCATAACCAAACTTTTCAGATGCAATTTTTTTAAATCGTTCAAGTCGCTGATTTATTCGAGCTATCAGAGGATTATTATTAGCTTCTGATATTTCATTTTTATTTAGTTTTGTGGTTAATCTTTCTATTTCACGATTCAAACGCTTAAGTTGATATTCAATATCGAACCATCTTTCTACAAATTTGTTAGGGAGACCTAAATTTCGGTTCATTGCTGAAATTCTAACTCGCATTTCTGAGCGGAATAAGTCTCGATAATTTTGCAAAATGGAAGATGAATGATATGGATGAAGTTTTTCTGAGACCATTCCTTCTGCACCGAACCTAACAACATCTAGGGGCAGATTGCTGTGCTGACAAAGTTCAATTACTTTTTCTGCAGCATTATTAACTGCTTCATGTGACTGGCTTGCAAGAAGTATACTTTGGGCTCCTTGTGAGAGAGCATAATGGATAAATGAAGCAATGAATGAAGTTTTACCAGTGCCTGGAGGGCCTTGCAAAAGACTCAATGGGCCGTATGACCAAAGTTTAGAAAAGGCATCTCGTTGCTGTCGATTGAGTGAAAAAGATAGCTCTCCATCTTTATCATATCGATTATAAGCATCAAGATCAGAATCATTTGGCTCTGGTTGTAGATATTGTGGATGAGGGCAAGTTAATGGTTCAAAGTAGTTGATTAATGAGGGGATAGCCGATTCAGCATTTAAAATACGTGTAACTGCATGACGCCTACGAATAAAAGATGATAGATCTTGTTGACTTCGTAATTTCAAAGTGTCGCCAATATTAGTCTGAATGCGAATCTTTGGATTTTCAAGTACGAGAGTAGATTGGGTAGTCTCTCTTATATCAACATTCCCTACACGAATCAGTTCACCGTTTATTTCTTGTAATGCCTCAACTTTTGCATCTGATTCATAGTCTAAAGACTCTCCCTCTTTTCTGTAAGGTATTCGCAATTTCTCATGACGCTTATCCATATCCCTGACAACTGATGAGGTGATTTCAATTTCAGGTAAGGAATCTTCTTCTGCATCTAAAATTGCACGCCAAAGTGATGCTGTATGTGGTCTTGTTTGAGAAACAGTTAAAGTTTCTGAGTTCTCAATTTCTTCCTCATCAGAAGAAAAATCGCCACCGAGAATTCTATTCTGAATTCTCGATATTGCAGAGATAACCTCAGGTATTTTTTTACATGTTCTAATAACTTGCTAGGGTCATCCGCGGATGAGGGTTCGAATAGTATATTAGCTTCGAGTTGTGTAATGGCTTGTGATGCCATTCGTACAAATAAGCTATGGGCTATATCTTTAGTTCGTAAAAATGCAAAATCTAATTGGGTCGCTTTAAGATAGATTTGTAACTGTTTACGTACTCCGGCAAAAGCTACGATAATATGAGGCTGCTGCTTTGGCGAACGGACTCGTTCTTCACTAATACTGATATGGTATACACCATTGTCATTTATTAATTTCTGGCTGGAGGTCAGTTGCCTCATTAATACTGACAACCTCACTCCCTCATTTATTTGTGGAGGGTTGATTAACATCTCTATTTCATCATTTATTCGGTCAAGAGAATAAATTTTAAAATCCCGCCCCATACAACTTCTGATCTCATTAAGTAAAGCTGATGGATCAATCCCTTCCCAGTTAACGTCGTGTTCTAAAATCTCATTACACACTTTGGCCACAGCGTAGCAATCACGTTCCTCCATAGGCAAAGATTCGTAATCTGTAGGTACATATGCAGGAGTGAATATTATATTCTCACCTGAACAAGGTATATCAAGTGCATCAATGAAGCGAACATCTCCCACTTCTATTAGAATATTGTTAGGGTGCAGATCCCCATGTTGTAATTGCATAGCATGTAAATGTAAGACTGCCCTAACTATTTTTTTGCATAGTAATATCAATTCACGACCAACATGACATGATTTGACAGCGTCATTTAGGAATTCCCCATTCAACCATTTTTGGACTAAATAGGTTCCTGCATCTGAAACACCAAAATCAATAACTTCAGCAAGAGAAGAACAGGGCTGGGATTTTATTAGGCGAGCTTTATCGAGAAAATTCTGAAGTTGTAAAGCCTCTTCCGGTCGCTTTATATCTGGTTTCCGCCCATACCATACTTTAACCGAAACACTCTCACCAGAAAAAGTCGATTTATATAAATGACTTATGCCCTGCTTTATATTTTCCTCAATAGGATAAATAACCATTGGAATCAACTCACTTCGA'..b'TTCAGCGCCCGTAGCAACTGACGACGGGTATCCATAGGTAACAATGCATTACTCATCGGCGCACCTCTAACTGGTTATTTATCACACAGCCAAAGTAACTATTTTAATGAGGCAACAGTGCGCGTTGGGTAACAAAAATGCTGAGTCAATTCGCCGGACTGAGTTATTGTCATCTCATAGAGGGCGGTATTGCAGGTTTACGCTCAAGCAGCAGCGCATTCTGACGCATGATGCGGTATACGCGTTTGGCATTGATAATGGCCATGTCGTCAGTTTCTGATTGTCTGCGCAGCAGTGCCCACACCCGACAATAACCATACCTTGGCAGCTCGCCGATAACGGTATGGATACGGTCCAGCGCTTCAGTATCATCAGGCTTGCGCTTGCGTCGACGATCCTGCCAGCTCTTCGACCGACGGGCCATGGCATGCAGTTGCACACGTGAGACCCGGAGGCAACGGCTGACAAGGCTTATTCACCATCCTCCGGCAACAAGGGCACGTGCGCTATCCACTTTTTTTGGCGACCATATTCAACAGCTTCTTTCAGCAGCTCGTTTTCCATAGTTTTCTTGCCCAGCAGGCGCTGCAGTTCTTTAATTTGCTTCATTGCCGATGCCAGTTCCGACGCGGGCACAACCTGTTCACCCGCGACTACGGCGGTAAGACTGCCTTCCTGATACTGCTTGCGCTACAGGAACAGCTGACTGGCGGCAACACCATGCTGCCGGGCGACCAGCGACACGGTCATACCGGGTTCAAAGCTCTGCTGAACAATGGCGATTTTTTCCTGAACACTGCACCGTCTGCGCTTCTCTGGACCTAAAACATCAATCATCCGGACTCCAACGACTAGTCTAAAAAATAGTATTAAGACTATCACTAACTTAAGTGATACCAACTGTCTGGAGATTCATGGGGCTAGTCTAGGAAGGATGAAGAATCTCCGGCTAACACGGGTTATACGGTATACAAATGCGAACAAGCAAACCGGTATCAAAACGCGCAGAGGTTTTGAGTGTTATGCTTCCCCCCAGCCGTTGAACACAGGTTTTTACAATCGATAACCCCAGCCCTGAACCCGGCAGTGAAGTTCCCGCTGTGCGATAAAATGCTCCGAAAACCTTTTCTCTTTCTGATTCTGAAATCCCGGGTCCACTGTCTTCGACCTCAATAACTAAAAAATCCCTTTCCCACCCTACTGACAAATCAATACGTCCCTGCGGAGGGGTAAACCGTACCGCGTTCTCCGTAACGTTTTTAATGGCGGTATAAAGGGCATTTTTGTCAGTAACAATAACCGTAAGGACTTCAGGTTCTGAGGACTGCAGAATACCAATGTCGATATCTTTTTCCGTCGCAAGAGGCAGGATTGTCTCAATTACGCTGCGAAACAGTGAAACAACATCAACGGAGTCGTGATGAGTTTTGTTCTCATTTTGTTGTTCCCGGGCCAGCGAAAGCAGTTGTTCAAGCAGTCTTTTTTCACGCGAGATGCCTGTCTGAAGGACCGCAAGCCGCTGACGAGCCTCCTCAGACATCTCTGACGCGGAAAGTCGTTCTGCCTGAAGCGAAAGTGCAGTTAAAGGCGTTCGTAATTCATGTGCCGCATCGGCTATAAAACGTTTTTGTGTCTGAATGACCGCATCAACTCTGCTCAGCAGATCGTTTATACTAACGATAAATGGTTGTATTTCATCGGGAATGTTTTTGTCTCTGACGGGTGTAAGGTCGTTGTCATGACGGGAGCAAACAGCGTCGGCCACACGTTGAAGCGGTATAAAGGCTTTTCTGACGACGAACCAGGCCACTAACATAAAAACCGGAAACAGAATGAGGAATGGGATCAGAGAACGCAAAGCGCTGCTAATTGCAATATCATCCCGGACATCGGATTGTTGTCCGACCACGACTTTTGCTTCGGGTGAATACTGGGTGATCAGTATACGGTAGCGTTTATTCCCGGACTCCACATCCTGGAATCCGTCGCTCACCGTTGAGGGTAATTCAAAATGCATCGTTGAGGCGTGTACATGAGCGGCATTATGCGGAAGATATTCAACCAAAATTTTATCCTCGTTATCCCCTTCCATGCTCCGGCCGATCTGTTGAGGAATACGCTCATTCCCTGACGAGTATTTCAGCACCAGGGTAATCTGTTTCAGCGTATTGTCCTGTAATTCATGGGATTCATCGAGCGCCAGAAAAAACGTGAGTGCAGCTGAAATCGTCCCGGTGAACAGAATGGCGATAAAAAGTGACAGCGAAAGCCGGTATTTAACTGATTTCATCATTTGTTTTTAGGGACCAGCCAGCCCACACCTCTCACGTTCCTGATTGCGTCTCGGCCAATTTTCTTTCTCAGTGAGTGGATTAAAAACTCTATCGCATTACTCTCCACCTCTTCACCCCAGCCATAGATACGATCTTCAAGGTCAGACCGGGATAAGATCCCCCCTGGTCTGATGAGAAGCGCCTCCAGCACTGCAAACTCACGGTTTGATAACAGAAAGGACAATCCGGAAGTGATGATCTGCACTTCATGGCTGTCAGGATCAAGTATCAGCTCGCCGTTGGTCAGCACCGGAGATTCGCTACCACTATTTCTGCGCAGAACAGCATGAATTCGAGCCACCAGTTCGCTCAAATCAAAGGGTTTAATAATATAATCATCGGCGCCGCCATTCAGTCCCCTGAGACGATCCTCCAGTGCATCACGTGCGGTCATTATCAGAACCGAAACCGTTGCGTTGTGTTTACGCATTCCGGCCATAACACTGAAGCCGTCCTTGCCTGGCAGACCAAGATCAAGCAGAACAAGATCATAATGTTGAGTACCAAAGGTTGAGAGGGCATCATTGCCATTTTTGACCCAGTCCACTGCAAATGAAGCTTCTCTCAGAGACTCCGCGACAATCTCACCAATCATGGCATCATCTTCAACGAGCAATATTCGCATTTAGGGGATCCCACACTAAAATTCGCCTCAACATCCGGCCGATAGCCTGAACCGGCAACGGTGATAAACCAGTACGTTTCGTTTATCCTGCCAGACAGTAGCCGATGAACCTTAACGTTTTATCTTATAAAAGCAGCAGGGTTGTAATTTTACAACCCTGCATATGGTTTACGGATTAATATGATCATGACCAATTGTAAATGGGGCTTCACAGCTGTGGTATGAAGGATTACAGTAGTTTTTATCCGCACCATTTTTTTTCGAAATCTTGTCCTTCCCGACAGTAAACGGGGCTTCACACTTATGCTGACCAGGGTTACAGTCAGCATTACCGGCATATGCTTCTGATGTCAGAACTGCCATTGTCAGCCCGGAGCAAAAGAGCATCATTGCTGCCGGCAATAATTTCTGTGTCTTTTTCATGTCAGTCACCTTATTTATACATATCGGCTTCGATGGTTTTATGGTCCTGATGGCCCTGTTCACCGATAATCACGTAATATTTACCGCCATGTTTTGCCACTGCATCTTTAAGATCCTGATTGTCCATTGTAAATCCATCCTGAGTTACGGCAATGTTTCCGATTTTATCAAGACCTGCAGCCCGGCTACGGGTAAGTTCCTGAGGGTGAATAGGTGACATGACAGAATTTGTCACGCTGGCATATACGCTGCCTGTTGCCGTAGCCATTAACAGTGCGGTTACAGCCATCAGTGTTGTACGTTTCATGATAAACCTCCTGCTGGGTGCTATTTATATTCAGGCTTTGTGGCCTGAAAGAAAAATTACAGGAGAAAACTTAGGGCATACTGAGCAACTCAATATTTTATGAAAAAATCGGGGGCGTACTACCGGTAATGATTGAAATCTGACGGTAGGGACATAACAGCAGATCTGTCAATACCTGCACAGCGATGAATCGCCACGGATAATCTAGACACTTCCGAGCCGTTGATAATACTGGTTTTCATATTCTGTCGGTGACATCTGATTGCTCGAACCATGCCGACGCTTACTGTTATAAAACATTTCGATGTAA\n'
b
diff -r 000000000000 -r ebee10be4297 test-data/13-1101-Paratyphi_B.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/13-1101-Paratyphi_B.fasta Wed Mar 01 12:35:39 2017 -0500
b
b'@@ -0,0 +1,126 @@\n+>10664704\n+CAATTCACGAATAACAGAGGTGATCTTTCTACCGAATTTAAAATAGAGCTCAATAGCACGAAGGCGCTCAGCTTTTGAATACATAAATTATCTCCTGAGAGTCCAGGAAATCGTCCGCACCCCCATTACCGTCTTATATCATTGGCGCTAACACCCGGGATAACGGAAGCCGCCTGCTGGGCTCATGCCCGCCGGAAGATCCACGATGTGCACGTCCGCACCCCGTCAGCTCTGACGGAAGAAGCCCTGAGGCAGATCGGTCAGTTGTACGCCATAGAGGCGAATATAAGGGGAATACCGGCGGAGCAGCGGCTTGCTGAACGTCAGCGAAAAACGAAACCGCTGCTGAAATCCCTGGAAAGCTGGTTGCGTGAAAAGATGAAGACCCTGTCGCGACACTCAGAGCTGGCGAAAGCGTTCGCGTACGCACTTAACCAGTGGCCGGCACTGACGTACTATGCGGACGATGGCTGGGCCGAAGCAGACAACAACAGTGCTGAAAATGCGCTGCGGACGGTCAGTCTGGGTCGTAAAAACTTCCTGTTCTTCGGCTCTGACCATGGTGGTGAGCGGGGAGCGCTGCTGTACAGCCTGATCGGGACGTGCAAACTGAACGGCGTGGGTCCAGAAAGCTACCTTCGCCATGTGCTTGGCGTCATAGCTGACTGGCCGGTCAACCGGGTCAGCGAACTGCTCCCCTGGCGCATAGCACTGCCAGCTGAATAACGCATCCCTGTCAATACGGCTCTCGCTGTACGCTTACGTTGAAGCAAGAAAGTAACATTAACTGGTGAGGCAACACGTTGGATAATATAAATGACGCCGGGTTATGCGAGTTTTCTTTCCTGTTGAATTTGAATCCGTGCATTTTTATTCAGTGCCTGAATAATACAGCTATAAAATCTGAAAAAAGTGAAAGTAATATGCCCGGGGCACTGGAAAAGCCGGTTACTCCCGGACATTAAGGCTCATAAATCAGTTAAGATCGTGAAGAATGCATATTAACGGCGCGTGGTGGTGGTTATTTTTGCCATCCTTTAAAAATAATTCTGTCCTGCTTTTCGATAACGACATGCTTCTCTATGCGAACATTTAAATACTGAGACGTGACCTGAAAACGACCTCTTGGTGCTTTAGATTGTGTGGATAACTGAGCGCCGGCATAGGCG\n+>10664706\n+ACGATCAACAACCTGTTTGACTGCTTCAATTTTAAACTCTTCGGGATAACGTTTACCGCTCATGGGCACCTCTCTTTAAGTCATTTTAAATGACTCTGAGGTGTCTGTTAAACCCGTGGCGATTCAATCAGAGTAATTTAGTTCTATCGATATTTTTCGCACCACTGGTTCAGGATACAAAGGTGGCAATTGACGATGATCGCCGACCAGTAACACCCTGCGACCCGACTGAATTGCTATAGCCAATTCGCCAGGTGTCGCACGAGCAGCTTCGTCGACTATCACCCAATCATAGCGATTTTTTGCTACACCAAATTGTGAGCGTCCTAGACCCACACAAGTCCCACATACAAGTGATCGGGTCTTCGCAAGAAATTCTTCAAAGTTACCTCTCAGAGTGCCTAAGCGATCTACCCACTCTTGAGACATTGCAATTACTTGCTCCAGTCTAGACACTGCATCTAAAGAAGTTACACCAAACTGATTCATTGTCTCACGGCTTAATTGATTAATCACTTCCTCTGGAGTTCCCTCCCCCGCATAACCAAACTTTTCAGATGCAATTTTTTTAAATCGTTCAAGTCGCTGATTTATTCGAGCTATCAGAGGATTATTATTAGCTTCTGATATTTCATTTTTATTTAGTTTTGTGGTTAATCTTTCTATTTCACGATTCAAACGCTTAAGTTGATATTCAATATCGAACCATCTTTCTACAAATTTGTTAGGGAGACCTAAATTTCGGTTCATTGCTGAAATTCTAACTCGCATTTCTGAGCGGAATAAGTCTCGATAATTTTGCAAAATGGAAGATGAATGATATGGATGAAGTTTTTCTGAGACCATTCCTTCTGCACCGAACCTAACAACATCTAGGGGCAGATTGCTGTGCTGACAAAGTTCAATTACTTTTTCTGCAGCATTATTAACTGCTTCATGTGACTGGCTTGCAAGAAGTATACTTTGGGCTCCTTGTGAGAGAGCATAATGGATAAATGAAGCAATGAATGAAGTTTTACCAGTGCCTGGAGGGCCTTGCAAAAGACTCAATGGGCCGTATGACCAAAGTTTAGAAAAGGCATCTCGTTGCTGTCGATTGAGTGAAAAAGATAGCTCTCCATCTTTATCATATCGATTATAAGCATCAAGATCAGAATCATTTGGCTCTGGTTGTAGATATTGTGGATGAGGGCAAGTTAATGGTTCAAAGTAGTTGATTAATGAGGGGATAGCCGATTCAGCATTTAAAATACGTGTAACTGCATGACGCCTACGAATAAAAGATGATAGATCTTGTTGACTTCGTAATTTCAAAGTGTCGCCAATATTAGTCTGAATGCGAATCTTTGGATTTTCAAGTACGAGAGTAGATTGGGTAGTCTCTCTTATATCAACATTCCCTACACGAATCAGTTCACCGTTTATTTCTTGTAATGCCTCAACTTTTGCATCTGATTCATAGTCTAAAGACTCTCCCTCTTTTCTGTAAGGTATTCGCAATTTCTCATGACGCTTATCCATATCCCTGACAACTGATGAGGTGATTTCAATTTCAGGTAAGGAATCTTCTTCTGCATCTAAAATTGCACGCCAAAGTGATGCTGTATGTGGTCTTGTTTGAGAAACAGTTAAAGTTTCTGAGTTCTCAATTTCTTCCTCATCAGAAGAAAAATCGCCACCGAGAATTCTATTCTGAATTCTCGATATTGCAGAGATAACCTCAGGTATTTTTTTACATGTTCTAATAACTTGCTAGGGTCATCCGCGGATGAGGGTTCGAATAGTATATTAGCTTCGAGTTGTGTAATGGCTTGTGATGCCATTCGTACAAATAAGCTATGGGCTATATCTTTAGTTCGTAAAAATGCAAAATCTAATTGGGTCGCTTTAAGATAGATTTGTAACTGTTTACGTACTCCGGCAAAAGCTACGATAATATGAGGCTGCTGCTTTGGCGAACGGACTCGTTCTTCACTAATACTGATATGGTATACACCATTGTCATTTATTAATTTCTGGCTGGAGGTCAGTTGCCTCATTAATACTGACAACCTCACTCCCTCATTTATTTGTGGAGGGTTGATTAACATCTCTATTTCATCATTTATTCGGTCAAGAGAATAAATTTTAAAATCCCGCCCCATACAACTTCTGATCTCATTAAGTAAAGCTGATGGATCAATCCCTTCCCAGTTAACGTCGTGTTCTAAAATCTCATTACACACTTTGGCCACAGCGTAGCAATCACGTTCCTCCATAGGCAAAGATTCGTAATCTGTAGGTACATATGCAGGAGTGAATATTATATTCTCACCTGAACAAGGTATATCAAGTGCATCAATGAAGCGAACATCTCCCACTTCTATTAGAATATTGTTAGGGTGCAGATCCCCATGTTGTAATTGCATAGCATGTAAATGTAAGACTGCCCTAACTATTTTTTTGCATAGTAATATCAATTCACGACCAACATGACATGATTTGACAGCGTCATTTAGGAATTCCCCATTCAACCATTTTTGGACTAAATAGGTTCCTGCATCTGAAACACCAAAATCAATAACTTCAGCAAGAGAAGAACAGGGCTGGGATTTTATTAGGCGAGCTTTATCGAGAAAATTCTGAAGTTGTAAAGCCTCTTCCGGTCGCTTTATATCTGGTTTCCGCCCATACCATACTTTAACCGAAACACTCTCACCAGAAAAAGTCGATTTATATAAATGACTTATGCCCTGCTTTATATTTTCCTCAATAGGATAAATAACCATTGGAATCAACTCACTTCGA'..b'TTCAGCGCCCGTAGCAACTGACGACGGGTATCCATAGGTAACAATGCATTACTCATCGGCGCACCTCTAACTGGTTATTTATCACACAGCCAAAGTAACTATTTTAATGAGGCAACAGTGCGCGTTGGGTAACAAAAATGCTGAGTCAATTCGCCGGACTGAGTTATTGTCATCTCATAGAGGGCGGTATTGCAGGTTTACGCTCAAGCAGCAGCGCATTCTGACGCATGATGCGGTATACGCGTTTGGCATTGATAATGGCCATGTCGTCAGTTTCTGATTGTCTGCGCAGCAGTGCCCACACCCGACAATAACCATACCTTGGCAGCTCGCCGATAACGGTATGGATACGGTCCAGCGCTTCAGTATCATCAGGCTTGCGCTTGCGTCGACGATCCTGCCAGCTCTTCGACCGACGGGCCATGGCATGCAGTTGCACACGTGAGACCCGGAGGCAACGGCTGACAAGGCTTATTCACCATCCTCCGGCAACAAGGGCACGTGCGCTATCCACTTTTTTTGGCGACCATATTCAACAGCTTCTTTCAGCAGCTCGTTTTCCATAGTTTTCTTGCCCAGCAGGCGCTGCAGTTCTTTAATTTGCTTCATTGCCGATGCCAGTTCCGACGCGGGCACAACCTGTTCACCCGCGACTACGGCGGTAAGACTGCCTTCCTGATACTGCTTGCGCTACAGGAACAGCTGACTGGCGGCAACACCATGCTGCCGGGCGACCAGCGACACGGTCATACCGGGTTCAAAGCTCTGCTGAACAATGGCGATTTTTTCCTGAACACTGCACCGTCTGCGCTTCTCTGGACCTAAAACATCAATCATCCGGACTCCAACGACTAGTCTAAAAAATAGTATTAAGACTATCACTAACTTAAGTGATACCAACTGTCTGGAGATTCATGGGGCTAGTCTAGGAAGGATGAAGAATCTCCGGCTAACACGGGTTATACGGTATACAAATGCGAACAAGCAAACCGGTATCAAAACGCGCAGAGGTTTTGAGTGTTATGCTTCCCCCCAGCCGTTGAACACAGGTTTTTACAATCGATAACCCCAGCCCTGAACCCGGCAGTGAAGTTCCCGCTGTGCGATAAAATGCTCCGAAAACCTTTTCTCTTTCTGATTCTGAAATCCCGGGTCCACTGTCTTCGACCTCAATAACTAAAAAATCCCTTTCCCACCCTACTGACAAATCAATACGTCCCTGCGGAGGGGTAAACCGTACCGCGTTCTCCGTAACGTTTTTAATGGCGGTATAAAGGGCATTTTTGTCAGTAACAATAACCGTAAGGACTTCAGGTTCTGAGGACTGCAGAATACCAATGTCGATATCTTTTTCCGTCGCAAGAGGCAGGATTGTCTCAATTACGCTGCGAAACAGTGAAACAACATCAACGGAGTCGTGATGAGTTTTGTTCTCATTTTGTTGTTCCCGGGCCAGCGAAAGCAGTTGTTCAAGCAGTCTTTTTTCACGCGAGATGCCTGTCTGAAGGACCGCAAGCCGCTGACGAGCCTCCTCAGACATCTCTGACGCGGAAAGTCGTTCTGCCTGAAGCGAAAGTGCAGTTAAAGGCGTTCGTAATTCATGTGCCGCATCGGCTATAAAACGTTTTTGTGTCTGAATGACCGCATCAACTCTGCTCAGCAGATCGTTTATACTAACGATAAATGGTTGTATTTCATCGGGAATGTTTTTGTCTCTGACGGGTGTAAGGTCGTTGTCATGACGGGAGCAAACAGCGTCGGCCACACGTTGAAGCGGTATAAAGGCTTTTCTGACGACGAACCAGGCCACTAACATAAAAACCGGAAACAGAATGAGGAATGGGATCAGAGAACGCAAAGCGCTGCTAATTGCAATATCATCCCGGACATCGGATTGTTGTCCGACCACGACTTTTGCTTCGGGTGAATACTGGGTGATCAGTATACGGTAGCGTTTATTCCCGGACTCCACATCCTGGAATCCGTCGCTCACCGTTGAGGGTAATTCAAAATGCATCGTTGAGGCGTGTACATGAGCGGCATTATGCGGAAGATATTCAACCAAAATTTTATCCTCGTTATCCCCTTCCATGCTCCGGCCGATCTGTTGAGGAATACGCTCATTCCCTGACGAGTATTTCAGCACCAGGGTAATCTGTTTCAGCGTATTGTCCTGTAATTCATGGGATTCATCGAGCGCCAGAAAAAACGTGAGTGCAGCTGAAATCGTCCCGGTGAACAGAATGGCGATAAAAAGTGACAGCGAAAGCCGGTATTTAACTGATTTCATCATTTGTTTTTAGGGACCAGCCAGCCCACACCTCTCACGTTCCTGATTGCGTCTCGGCCAATTTTCTTTCTCAGTGAGTGGATTAAAAACTCTATCGCATTACTCTCCACCTCTTCACCCCAGCCATAGATACGATCTTCAAGGTCAGACCGGGATAAGATCCCCCCTGGTCTGATGAGAAGCGCCTCCAGCACTGCAAACTCACGGTTTGATAACAGAAAGGACAATCCGGAAGTGATGATCTGCACTTCATGGCTGTCAGGATCAAGTATCAGCTCGCCGTTGGTCAGCACCGGAGATTCGCTACCACTATTTCTGCGCAGAACAGCATGAATTCGAGCCACCAGTTCGCTCAAATCAAAGGGTTTAATAATATAATCATCGGCGCCGCCATTCAGTCCCCTGAGACGATCCTCCAGTGCATCACGTGCGGTCATTATCAGAACCGAAACCGTTGCGTTGTGTTTACGCATTCCGGCCATAACACTGAAGCCGTCCTTGCCTGGCAGACCAAGATCAAGCAGAACAAGATCATAATGTTGAGTACCAAAGGTTGAGAGGGCATCATTGCCATTTTTGACCCAGTCCACTGCAAATGAAGCTTCTCTCAGAGACTCCGCGACAATCTCACCAATCATGGCATCATCTTCAACGAGCAATATTCGCATTTAGGGGATCCCACACTAAAATTCGCCTCAACATCCGGCCGATAGCCTGAACCGGCAACGGTGATAAACCAGTACGTTTCGTTTATCCTGCCAGACAGTAGCCGATGAACCTTAACGTTTTATCTTATAAAAGCAGCAGGGTTGTAATTTTACAACCCTGCATATGGTTTACGGATTAATATGATCATGACCAATTGTAAATGGGGCTTCACAGCTGTGGTATGAAGGATTACAGTAGTTTTTATCCGCACCATTTTTTTTCGAAATCTTGTCCTTCCCGACAGTAAACGGGGCTTCACACTTATGCTGACCAGGGTTACAGTCAGCATTACCGGCATATGCTTCTGATGTCAGAACTGCCATTGTCAGCCCGGAGCAAAAGAGCATCATTGCTGCCGGCAATAATTTCTGTGTCTTTTTCATGTCAGTCACCTTATTTATACATATCGGCTTCGATGGTTTTATGGTCCTGATGGCCCTGTTCACCGATAATCACGTAATATTTACCGCCATGTTTTGCCACTGCATCTTTAAGATCCTGATTGTCCATTGTAAATCCATCCTGAGTTACGGCAATGTTTCCGATTTTATCAAGACCTGCAGCCCGGCTACGGGTAAGTTCCTGAGGGTGAATAGGTGACATGACAGAATTTGTCACGCTGGCATATACGCTGCCTGTTGCCGTAGCCATTAACAGTGCGGTTACAGCCATCAGTGTTGTACGTTTCATGATAAACCTCCTGCTGGGTGCTATTTATATTCAGGCTTTGTGGCCTGAAAGAAAAATTACAGGAGAAAACTTAGGGCATACTGAGCAACTCAATATTTTATGAAAAAATCGGGGGCGTACTACCGGTAATGATTGAAATCTGACGGTAGGGACATAACAGCAGATCTGTCAATACCTGCACAGCGATGAATCGCCACGGATAATCTAGACACTTCCGAGCCGTTGATAATACTGGTTTTCATATTCTGTCGGTGACATCTGATTGCTCGAACCATGCCGACGCTTACTGTTATAAAACATTTCGATGTAA\n'
b
diff -r 000000000000 -r ebee10be4297 test-data/AE014613-699860.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/AE014613-699860.fasta Wed Mar 01 12:35:39 2017 -0500
b
b'@@ -0,0 +1,9999 @@\n+>gi|29140506|gb|AE014613.1| Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome\n+AGAGATTACGTCTGGTTGCAAGAGATCATAACAGGGGAAATTGATTGAAAATAAATATATCGCCAGCAGC\n+ACATGAACAAGTTTCGGAATGTGATCAATTTAAAAATTTATTGACTTAGGCGGGCAGATACTTTAACCAA\n+TATAGGAATACAAGACAGACAAATAAAAATGACAGAGTACACAACATCCATGAACCGCATCAGCACCACC\n+ACCATTACCACCATCACCATTACCACAGGTAACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAG\n+CCCGCACCTGAACAGTGCGGGCTTTTTTTTCGACCAGAGATCACGAGGTAACAACCATGCGAGTGTTGAA\n+GTTCGGCGGTACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATTCC\n+AGGCAAGGGCAGGTAGCGACCGTACTTTCCGCCCCCGCGAAAATTACCAACCATCTGGTGGCGATGATTG\n+AAAAAACTATCGGCGGCCAGGATGCTTTGCCGAATATCAGCGATGCCGAACGTATTTTTTCTGACCTGCT\n+CGCAGGACTTGCCAGCGCGCAGCCGGGATTCCCGCTTGCACGGTTGAAAATGGTTGTCGAACAAGAATTC\n+GCTCAGATCAAACATGTTTTGCATGGTATCAGCCTGCTGGGTCAGTGCCCGGATAGCATCAACGCCGCGC\n+TGATTTGCCGTGGCGAAAAAATGTCGATCGCGATTATGGCGGGACTCCTGGAGGCGCGTGGACATCGCGT\n+CACGGTGATCGATCCGGTAGAAAAACTGCTGGCGGTGGGCCATTACCTTGAATCTACCGTCGATATCGCG\n+GAATCGACTCGCCGTATCGCCGCCAGCCAGATCCCGGCCGATCACATGATCCTGATGGCGGGCTTTACTG\n+CCGGTAATGAAAAGGGTGAACTGGTGGTGCTGGGCCGTAATGGTTCCGACTATTCCGCCGCCGTGCTGGC\n+CGCCTGTTTACGCGCTGACTGCTGTGAAATCTGGACTGACGTCGATGGCGTGTATACCTGTGACCCGCGC\n+CAGGTGCCGGACGCCAGGCTGTTGAAATCGATGTCCTACCAGGAAGCGATGGAGCTCTCTTACTTCGGCG\n+CTAAAGTCCTTCACCCTCGCACCATAACGCCTATCGCCCAGTTCCAGATCCCCTGTCTGATTAAAAATAC\n+CGGCAATCCGCAGGCGCCAGGAACGCTGATCGGCGCGTCCAGCGACGATGATAATCTGCCGGTTAAAGGG\n+ATCTCTAACCTTAACAACATGGCGATGTTTAGCGTCTCCGGCCCGGGAATGAAAGGGATGATTGGGATGG\n+CGGCGCGTGTTTTCGCCGCCATGTCTCGCGCCGGGATCTCGGTGGTGCTCATTACCCAGTCCTCCTCTGA\n+GTACAGCATCAGCTTCTGTGTGCCGCAGAGTGACTGCGCGCGTGCCCGCCGTGCGATGCAGGATGAGTTC\n+TATCTGGAGCTGAAAGAGGGGCTGCTGGAGCCGCTGGCGGTTACGGAGCGGTTGGCGATTATCTCTGTTG\n+TCGGCGACGGTATGCGCACGCTACGCGGCATTTCAGCGAAATTCTTCGCCGCGCTGGCGCGGGCTAATAT\n+CAATATCGTGGCGATCGCTCAGGGATCTTCTGAGCGTTCCATTTCTGTGGTGGTGAATAACGACGATGCC\n+ACCACCGGCGTGCGGGTAACGCACCAGATGCTGTTCAATACCGATCAGGTGATTGAAGTGTTTGTCATTG\n+GCGTCGGCGGCGTCGGCGGCGCGCTACTGGAACAGCTTAAACGTCAGCAAACCTGGCTGAAGAACAAGCA\n+CATCGATCTACGCGTGTGCGGCGTGGCGAACTCAAAGGCGTTGCTAACCAATGTGCATGGCCTGAATCTG\n+GACAACTGGCAGGCGGAACTGGCGCAAGCGAACGCGCCGTTCAATCTGGGACGTTTAATTCGCCTGGTGA\n+AAGAATATCATCTACTCAATCCGGTGATTGTTGATTGTACCTCCAGTCAGGCGGTGGCCGACCAGTATGC\n+CGACTTCCTGCGCGAAGGGTTCCATGTGGTGACGCCGAACAAGAAAGCGAACACCTCGTCGATGGACTAC\n+TACCATCAGCTACGTTTCGCCGCCGCGCAATCACGGCGCAAATTCTTGTATGACACCAACGTCGGCGCCG\n+GTTTGCCGGTAATCGAAAACCTGCAAAACCTGCTGAATGCGGGTGATGAACTGCAAAAATTTTCCGGCAT\n+TCTTTCCGGGTCGCTCTCTTTTATTTTCGGTAAACTGGAAGAGGGGATGAGTCTCTCACAGGCGACCGCT\n+CTGGCGCGCGAGATGGGCTATACCGAACCCGATCCGCGCGACGATCTTTCCGGTATGGATGTGGCGCGTA\n+AACTGTTGATCCTCGCCCGCGAGACGGGCCGCGAGCTGGAGCTTTCCGATATCGTGATTGAACCGGTGTT\n+GCCGGACGAGTTTGACGCCTCCGGCGATGTGACCGCCTTTATGGCGCATCTGCCGCAGCTTGACGACGCG\n+TTTGCCGCCCGTGTGGCGAAAGCTCGTGATGAAGGTAAGGTATTGCGCTATGTGGGCAATATCGAAGAGG\n+ATGGCGTGTGCCGCGTGAAGATTGCCGAAGTTGATGGTAACGATCCGCTCTTCAAAGTGAAAAACGGTGA\n+AAACGCGCTGGCGTTCTACAGCCATTATTATCAGCCCTTGCCGTTGGTGCTGCGCGGCTACGGCGCAGGC\n+AATGATGTGACGGCGGCGGGCGTGTTTGCCGATCTGTTACGGACCCTCTCATGGAAGTTAGGAGTTTAAC\n+ATGGTGAAAGTGTATGCCCCGGCTTCCAGCGCGAACATGAGCGTCGGTTTCGACGTGTTGGGCGCGGCCG\n+TCACACCCGTTGACGGCACGTTGCTGGGCGATGTGGTATCCGTTGAAGCAGCGGATCATTTCCGTCTGCA\n+TAACCTGGGGCGATTTGCCGATAAACTGCCGCCGGAGCCGCGTGAAAATATTGTTTATCAGTGCTGGGAA\n+CGTTTTTGCCAGGCATTGGGGAAAACCATCCCGGTGGCGATGACGCTGGAAAAAAATATGCCGATTGGTT\n+CCGGGTTAGGGTCCAGCGCCTGTTCCGTCGTCGCCGCGCTGGTCGCGATGAATGAGCACTGCGGCAAACC\n+GTTAAACGACACGCGTCTGTTGGCGCTGATGGGCGAGCTGGAAGGCCGTATCTCCGGCAGCATCCATTAC\n+GATAACGTCGCGCCGTGCTTTCTTGGCGGTATGCAGTTGATGATTGAAGAAAACGGCATTATTAGTCAGC\n+AGGTGCCGGGCTTTGATGAGTGGCTATGGGTACTGGCTTATCCGGGCATTAAAGTTTCCACCGCAGAAGC\n+ACGGGCCATTTTGCCTGCGCAGTATCGCCGTCAGGATTGCATTGCGCATGGACGGCATCTGGCCGGTTTT\n+ATTCACGCCTGTTACTCGCGGCAGCCGCAGCTTGCCGCCGCGCTGATGAAAGATGTTATTGCCGAACCAT\n+ACCGCGCGCGTTTACTGCCGGGCTTTAGCCAGGCGCGGCAGGCGGTGTCGGAGATCGGCGCGCTGGCGAG\n+CGGGATTTCCGGATCGGGGCCGACGCTGTTTGCGCTATGCGATAAACCGGAGACGGCGCAGCGCGTCGCG\n+GACTGGCTGAGCAAACATTATCTGCAAAATCAGGAAGGCTTCGTTCATATTTGTCGGCTGGATACGGCGG\n+GCGCACGAGTAGTGGGATAATCAATGAAACTCTATAATCTGAAAGACCATAATGAGCAGGTCA'..b'GCACCAGGGCTCGACGCGGATGCAGCTGAGTATCGACC\n+CGGCGATGCGCTCGCCGCTGCGCTGGGCCATCGCCGGCTTCCTGCTGCTCTTTATGACGCTGGCGCTGAT\n+GCGGATGCGCAACCTGATTTTACTGATGGAAAAACGCCGCCCGTGGGTGAGCGAACTGATACTGAAAAGG\n+GGGCACCGGTGAGTCCGGCATTTTCATCGTGGAGCGATTTTTTCGCCATGGGCGGGTACGCCTTTTTTGT\n+CTGGCTGGCGGTGGCGATGACCGTGGCGCCGCTGGCGCTGCTGGCGCTGCACACGGTGCTGCAGCGCCGG\n+GCCATTCTGCGCGGCGTGGCGCAGCAGCGGGCGCGCGAGGCGCGGATGCGTGCCGCACAGGCGCAACAGG\n+AGGCCGCGTGAACCTGCGACGTAAAAACCGGCTATGGGTGGTCTGCGCGGTGCTGGCGGGCCTGGCGCTG\n+ACCACCGCCCTGGTCCTGTACGCGCTGCGCGCGAATATCGACCTGTTCTATACCCCCGGCGAAATCCTCT\n+ACGGCAAGCGCGAGACGCAGCAGCTGCCGGCGGTGGGCCAGCGCCTGCGCGTCGGCGGGATGGTGATGCC\n+CGGCAGCGTCAGGCGCGACCCGGACTCGCTGAAGGTGAACTTCAGCCTCTACGACGCCGAAGGGTCGGTG\n+ACGGTGAGCTATGAGGGGATACTGCCGGACCTGTTCCGCGAGGGGCAGGGGGTGGTGGTGCAGGGCACCC\n+TGGAGAAGGGCAACCACGTCCTGGCGCACGAGGTGCTGGCCAAGCATGACGAGAACTACACCCCGCCGGA\n+AGTGGAAAAGGCGATGCAGGAAAACCACCGCCGCCCGCAACGCGCTGATAAGGACACTTCATCATGATGC\n+CTGAATACGGCCACGCACTGCTGTGCCTGGCGCTCGGCGTGGCGCTGCTGCTGTCCGTTTACCCGCTGTG\n+GGGCGTGGCGCGCGGCGACGCGCGGATGATGGCGTCGGCCGGGGTGTTCGCCTGGCTGCTGTTCATCTGC\n+GTGGCGGGCGCGTTTTTCGTGCTGGTGCACGCCTTTGTGGTTAACGACTTCACCGTGGCCTATGTCGCCG\n+GCAACTCGAACACGCAGCTGCCGGTGTGGTACCGGGTGGCCGCCACCTGGGGGGCGCACGAGGGCTCGCT\n+GCTGCTGTGGGTGCTGCTGATGAGCGGCTGGACCCTGGCGGTGGCGGTGTTCAGCCGGCAGGTGCCGGCG\n+GATATCGTCGCCCGGGTGCTGGCGGTGATGGGGATGGTCTGCGCCGGTTTTCTGGCGTTCATCCTGTTCA\n+CCTCCGGCCCGTTTGCCCGCACGCTGCCGGCCTTTCCGGTGGAGGGGCGCGACCTGAACCCGCTGCTGCA\n+GGACCCGGGGCTGATTTTCCACCCGCCGCTGCTGTACATGGGCTATGTCGGCTTCTCGGTGGCCTTCGCC\n+TTCGCCATCGCCGCGCTGCTGAGCGGGCGTCTGGACAGCGCGTTCACCCGTTTTGCCCGCCCGTGGACGC\n+TGGCGGCGTGGGTGTTCCTGACGCTGGGCATCGTGCTCGGCTCGGCGTGGGCCTACTACGAGCTGGGCTG\n+GGGCGGCTGGTGGTTCTGGGACCCGGTGGAGAACGCCTCCTTTATGCCGTGGCTGGCGGGCACCGCCCTG\n+CTGCACTCGCTGGCGGTCACCGAACAGCGCGCCGGCTTTAAGGCGTGGACGCTGCTGTTGTCCATCTGCG\n+CCTTCTCGCTGTGCCTGCTGGGCACCTTCCTGGTGCGCTCCGGGGTGCTGGTGTCGGTGCACGCCTTCGC\n+CTCCGACCCGGCGCGCGGAATGTTTATCCTCGCCTTTATGGTGCTGGTCACCGGCGGCTCGCTGCTGCTG\n+TTCGCCGTGCGCGGGCACAGGGTGCGTTCGCGGGTGAACAACGCGCTGTGGTCGCGTGAGTCGCTGCTGC\n+TCGGCAACAACGTCCTGCTGATGGCCGCCATGCTGGTGGTGCTGCTGGGTACCCTGCTGCCGCTGGTGCA\n+CAAACAGCTGGGGCTGGGCAGCATTTCGGTGGGGGAGCCGTTCTTTAACACCATGTTCACCTGGCTGATG\n+GTCCCCTTTGCCCTGCTGCTGGGGGTGGGGCCACTGGTGCGCTGGGGCCGGGACCGGCCGCGTAACATCA\n+GGAAACTGCTGCTCACCGCCCTGGTCTCCACCCTGGTGCTGTCGGTACTTTTGCCATGGCTGCTGGAAGA\n+TAGAATCATCGCCATGACGGCGGTGGGGATGGCGATGGCCTGCTGGATTGCGGTGCTGGCGGTGGCCGAA\n+GCCGTACAGCGCGTGTCCCGCGGCACGAAAACCTCTCTCAGCTACTGGGGAATGGTGGCGGCGCACCTCG\n+GGCTGGCGGTGACGATTACCGGTATCGCCTTCAGCCAGAATTACAGCGTGGAGCGTGACGTGCGGATGCG\n+GGCGGGCGACAGCGTGACCATTCACGACTACCGCTTCACCTTCCGGGAGGTGCGGGACATCACCGGGCCC\n+AACTACCGCGGCGGGGTGGCCCTCATCGGGGTGACGCGCCACGGCGAGCCGGAGGCGGTGCTGCACGCGG\n+AGAAACGGCTCTACAACACCAGCCGGATGGTGATGACCGAGGCGGCGATTGACGGCGGGCTGACCCGCGA\n+CCTGTACGCCGCGCTCGGGGAGGAGCTGGACAACGGCGCGTGGGCCGTGCGCCTGTACTACAAACCGTTT\n+GTCCGCTGGATATGGGCCGGGGGGCTGCTGATGGCGCTGGGCGGGCTGCTGTGCCTGGCGGACCCGCGCT\n+ACCGCCGCCGTAAACCATTGCCGGAGGCCGGATGAAACGCAACGTACTGTTATTACCGCTGCTGATTTTT\n+CTGCTGATTGCCGCGGCGCTGCTGTGGCAGCTGGCGCGCAACGCGCAGGGGGATGACCCGACGAATCTCG\n+AATCGGCGCTGACCGGAAAGCCGGTGCCGGCGTTTCGCCTGGAATCGCTGGAGACGCCGGGTCAGTACTA\n+TCAGGCGGAGGTGCTGACGCAGGGGAAACCGGTGCTGCTTAACGTCTGGGCCACCTGGTGCCCGACCTGC\n+CGCGCCGAGCATCAGTACCTGAACCGGCTGGCCGCGCAGGGTATCCGGGTGGTGGGGCTGAACTATAAGG\n+ACGACCGGGCGAAGGCGGTGGCCTGGTTAAAGGAACTGGGCAACCCGTATGCGCTGAGTTTATCGGACAG\n+CGACGGGATGCTGGGGCTGGACCTGGGCGTGTACGGCGCGCCGGAAACCTTCCTCATCGACGGCAGGGGG\n+ATTATCCGCTACCGCCATGCGGGCGATTTGAATGCCCGGGTATGGGAAAGTGAACTGAAACCGCTGTGGG\n+ACAGATACAGCCGGGAGGCGGCGCAATGAGACTGTTACCGGGCATGGTGATGCTGATGCTGGCGCTGGTT\n+ATCTCCGGGTCAGCGCGGGCGACCACCGACGTGATGCCGTTTAAAGATGAAGCGCAGGAGCAGCAGTTCC\n+GCCAGCTCACGGAGCAGCTGCGCTGCCCGAAATGCCAGAACAACAGCATTGCGGACTCGAACGCGATGAT\n+AGCCACCGACATGCGCCGCAGGGTGTATGACCTGATGCAGGAGGGGAAGAGCCGCCAGGAAATCATCGAT\n+TACATGGTGGCGCGCTACGGCAACTTCGTCACCTACGACCCGCCGCTGACCCCGCTGACGGTGCTGCTGT\n+GGGTGCTGCCGCTGGCCGCCATCGTGGCGGGCGGGTGGATAATCGTCGCCCGGACGCGCCGGCGGGTGCG\n+CCTGCGCCGGGAGCCGCTGCCGGCGGACACCCCGGTTTGCGGCGCGCGCGCCGGGTGGGGCGTTTACGTG\n+CCGGGGGCCGTCATTGCGCTGGCGGTCGGCGCCGGCAGCTACGCCCTGACCGGCAGCTATCCGCAGGTGA\n'
b
diff -r 000000000000 -r ebee10be4297 test-data/alleles-output-13-1101.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/alleles-output-13-1101.json Wed Mar 01 12:35:39 2017 -0500
b
b'@@ -0,0 +1,1 @@\n+{"13-1101-Paratyphi_B.fasta": {"NZ_AOXE01000034.1_82": {"blast_result": {"is_perfect": true, "slen": 345713, "pident": 100.0, "marker": "NZ_AOXE01000034.1_82", "qseqid": "NZ_AOXE01000034.1_82|1542872411", "qstart": 1, "is_trunc": false, "sseq_msa_gaps": 0, "sseq": "ATGCCAACCAGACCACCTTATCCGCGGGAAGCTTATATCGTCACCATTGAAAAAGGCACGCCGGGCCAGACGGTGACGTGGTATCAGCTACGGGCTGACCATCCGAAACCTGATTCGCTCATCAGCGAGCATCCGACCGCAGAAGAAGCGATGGATGCGAAAAAACGTTACGAAGATCCGGATAAATCATAG", "bitscore": 355.0, "sseq_msa_p_gaps": 0.0, "evalue": 6.989999999999998e-99, "gapopen": 0, "send": 244465, "has_perfect_match": true, "allele": 1542872411, "sstart": 244656, "allele_name": 1542872411, "start_idx": 244464.0, "qend": 192, "needs_revcomp": true, "is_extended": false, "mismatch": 0, "coverage": 1.0, "too_many_gaps": false, "end_idx": 244655.0, "trunc": false, "stitle": 10664711, "qlen": 192, "is_match": true, "length": 192}, "name": 1542872411, "seq": "ATGCCAACCAGACCACCTTATCCGCGGGAAGCTTATATCGTCACCATTGAAAAAGGCACGCCGGGCCAGACGGTGACGTGGTATCAGCTACGGGCTGACCATCCGAAACCTGATTCGCTCATCAGCGAGCATCCGACCGCAGAAGAAGCGATGGATGCGAAAAAACGTTACGAAGATCCGGATAAATCATAG"}, "NZ_AOXE01000016.1_8": {"blast_result": {"is_perfect": false, "slen": 277029, "pident": 99.344, "marker": "NZ_AOXE01000016.1_8", "qseqid": "NZ_AOXE01000016.1_8|2903172827", "qstart": 1, "is_trunc": false, "sseq_msa_gaps": 0, "sseq": "GTGGAAAAATATCTGCGTAGCGGTACCATGTTTGTGGTACTGGCATTTATATTATGGGGGTTAACACCTTTATATTATCAATATTTATCAGGGGGAAATCTCGCACAAATATTGATATACCGGGTGTTCTGGTCAATTCCATTATTACTGGCAGTAAGATTGTTATTTCGGCAGCGAACGCGATTTCATGATGCCTGGAAAGATAAAAAATCTTTTTTCTTCTGCATGATCGCCGGACTTTTGATGATTGTCTCATGGTCATCTTTTATTTATGCGCTAACCCATCATCTGGTACTTGATGCCAGCCTCGGCTATTTTATCAACCCGCTATTTGTTATTGCGCTGGGATGCATTTTTCTTAAAGAAAAACTGTCGTTGTTTCAGGCCATCGCCGTTTTTTCCGGTGTCTGCGGCCTGACTTTTCAAATTATCATGCTACGGCATTTCCCGGCGCTGGCGCTAACCATGGGATTATCATTTGCGCTATATGGCCTGGCGCGAAAGTTTATACATTATGATGTGATGACATCGATAACGATTGAAACATTATGGGCATTGCCTGTCTCACTATTAATTTTTCTCTTTAGCGATAGCGGACCAATTATATCCGCTAATACTCCTTTCTTTTTGTATGTCATGACGGCGCCAGTGACGATCATTCCACTGGTATTATTTGCCATCGCGTTAAATCACACCTCGTTGATTGTCACCGGACTGGCGCAATATATAGAGCCGTCGTTACAGTTCTTACTTGCCATTATGATATTTGGCGAACACATTAATTACGCAGAACTGCTCTGCTTTTGTGCGGTATGGTTCGGGTTGTTTCTATGCATATCTGAAAATTTATATTCCCATTATCTCCGCGCCCGTCTGAAACCGGTGTTCGGCAGGGTACAGCGCTTCTTTCGCTAA", "bitscore": 1657.0, "sseq_msa_p_gaps": 0.0, "evalue": 0.0, "gapopen": 0, "send": 53329, "has_perfect_match": false, "allele": 2903172827, "sstart": 54243, "allele_name": 717530117, "start_idx": 53328.0, "qend": 915, "needs_revcomp": true, "is_extended": false, "mismatch": 6, "coverage": 1.0, "too_many_gaps": false, "end_idx": 54242.0, "trunc": false, "stitle": 10664749, "qlen": 915, "is_match": true, "length": 915}, "name": 717530117, "seq": "GTGGAAAAATATCTGCGTAGCGGTACCATGTTTGTGGTACTGGCATTTATATTATGGGGGTTAACACCTTTATATTATCAATATTTATCAGGGGGAAATCTCGCACAAATATTGATATACCGGGTGTTCTGGTCAATTCCATTATTACTGGCAGTAAGATTGTTATTTCGGCAGCGAACGCGATTTCATGATGCCTGGAAAGATAAAAAATCTTTTTTCTTCTGCATGATCGCCGGACTTTTGATGATTGTCTCATGGTCATCTTTTATTTATGCGCTAACCCATCATCTGGTACTTGATGCCAGCCTCGGCTATTTTATCAACCCGCTATTTGTTATTGCGCTGGGATGCATTTTTCTTAAAGAAAAACTGTCGTTGTTTCAGGCCATCGCCGTTTTTTCCGGTGTCTGCGGCCTGACTTTTCAAATTATCATGCTACGGCATTTCCCGGCGCTGGCGCTAACCATGGGATTATCATTTGCGCTATATGGCCTGGCGCGAAAGTTTATACATTATGATGTGATGACATCGATAACGATTGAAACATTATGGGCATTGCCTGTCTCACTATTAATTTTTCTCTTTAGCGATAGCGGACCAATTATATCCGCTAATACTCCTTTCTTTTTGTATGTCATGACGGCGCCAGTGACGATCATTCCACTGGTATTATTTGCCATCGCGTTAAATCACACCTCGTTGATTGTCACCGGACTGGCGCAATATATAGAGCCGTCGTTACAGTTCTTACTTGCCATTATGATATTTGGCGAACACATTAATTACGCAGAACTGCTCTGCTTTTGTGCGGTATGGTTCGGGTTGTTTCTATGCATATCTGAAAATTTATATTCCCATTATCTCCGCGCCCGTCTGAAACCGGTGTTCGGCAGGGTACAGCGCTTCTTTCGCTAA"}, "NZ_AOXE01000061.1_3": {"blast_result": {"is_perfect": false, "slen": 146239, "pident": 99.14200000000001, "marker": "NZ_AOXE01000061.1_3", "qseqid": "NZ_AOXE01000061.1_3|1371632201", "qstart": 1, "is_trunc": false, "sseq_msa_gaps": 0, "sseq": "ATGAATAAAATCCTGTTAGTTGATGATGACCGAGAGCTGACTTCCCTGTTAAAAGAGCTCCTCGAAATGGAAGGTTTTAATGTCCTGGTGGCC'..b'": 125460.0, "trunc": false, "stitle": 10664753, "qlen": 996, "is_match": true, "length": 996}, "name": 4181313675, "seq": "ATGAAAAAGAAAAGACCCGTACTTCAGGATGTGGCCGACCGTGTCGGCGTGACCAAAATGACGGTCAGCCGTTTTTTGCGTAATCCGGAGCAGGTCTCCGTCGCGCTGCGGGGTAAAATTGCGGCTGCGCTTGATGAGCTCGGGTACATTCCTAATCGCGCGCCTGACATTCTTTCCAACGCCACCAGCCGCGCCATTGGCGTTCTGCTGCCGTCTTTAACCAACCAGGTCTTTGCGGAAGTGTTACGCGGCATTGAGGCCGTCACCGATGCCCACGGGTATCAGACCATGCTGGCGCACTACGGCTATAAACCGGAGATGGAGCAGGAGCGCCTGGAATCGATGCTCTCCTGGAATATCGACGGCCTGATCCTCACTGAGCGTACCCATACGCCGCGCACCTTAAAAATGATCGAAGTCGCCGGGATTCCGGTGGTGGAACTGATGGACAGCCAGTCGCCGTGTCTCGATATTGCCGTCGGTTTTGATAACTTCGAGGCCGCCCGTCAGATGACCGCCGCGATTATCGCGCGTGGTCATCGTCATATCGCCTATCTGGGGGCGCGCCTCGACGAACGTACTATCATCAAGCAGAAGGGCTATGAACAGGCGATGCGGGACGCCGGCCTGGTTCCTTACAGTGTGATGATGGAGCAATCTTCATCCTACTCTTCCGGTATCGAACTCATGCGCCAGGCGCGACGTGAATACCCACAGCTTGACGGTATTTTTTGCACCAACGATGACCTGGCGGTGGGGGCGGCCTTCGAATGCCAGCGCCTGGGGCTAAAAATCCCGGACGACATGGCGATCGCCGGGTTCCACGGTCATGACATCGGCCAGGTGATGGAACCGCGTCTGGCAAGCGTCCTGACGCCGCGCGAGCGAATGGGCAGCATTGGCGCGGAGCGTCTGTTGGCCCGCATTCGCGGCGAAACGGTCACGCCGAAAATGTTAGATTTAGGTTTCACCTTGTCACCGGGCGGATCTATTTAG"}, "NZ_AOXE01000053.1_217": {"blast_result": {"is_perfect": false, "slen": 418912, "pident": 99.48200000000001, "marker": "NZ_AOXE01000053.1_217", "qseqid": "NZ_AOXE01000053.1_217|657736370", "qstart": 1, "is_trunc": false, "sseq_msa_gaps": 0, "sseq": "ATGAATACTATCTGGATTGCCGTTGGCGCCCTGACCCTTCTGGGCCTGGTGTTCGGCGCCATTCTGGGTTATGCCTCCCGCCGATTCGCGGTTGAAGATGATCCGGTTGTTGAAAAAATCGATGCGATTTTGCCGCAGAGCCAGTGTGGTCAGTGCGGCTATCCCGGCTGCCGCCCCTACGCCGAAGCCGTGGGTCTCCAGGGCGAGAAAATCAATCGTTGCGCGCCCGGCGGCGAAGCCGTCATGCTGAAAATTGCAGAGTTGCTAAACGTAGAGCCGCAGCCATGCGATGGTGAAGAACAGCAGGCCGCGCCAGTACGTATGCTGGCAGTTATTGATGAAAATAACTGCATTGGCTGCACAAAATGCATTCAGGCCTGCCCGGTTGACGCCATCGTTGGCGCAACGCGCGCCATGCACACGGTCATGAGCGATCTCTGTACTGGCTGTAATCTGTGCGTCGATCCGTGTCCGACGCACTGCATCGAATTACGTCCGGTGAATGAGACGCCCGACAGTTGGAAATGGGATTTGAACACCATTCCCGTTCGCATCATTCCCGTGGAACAACATGCTTAA", "bitscore": 1053.0, "sseq_msa_p_gaps": 0.0, "evalue": 0.0, "gapopen": 0, "send": 208532, "has_perfect_match": false, "allele": 657736370, "sstart": 209110, "allele_name": 2392764491, "start_idx": 208531.0, "qend": 579, "needs_revcomp": true, "is_extended": false, "mismatch": 3, "coverage": 1.0, "too_many_gaps": false, "end_idx": 209109.0, "trunc": false, "stitle": 10664758, "qlen": 579, "is_match": true, "length": 579}, "name": 2392764491, "seq": "ATGAATACTATCTGGATTGCCGTTGGCGCCCTGACCCTTCTGGGCCTGGTGTTCGGCGCCATTCTGGGTTATGCCTCCCGCCGATTCGCGGTTGAAGATGATCCGGTTGTTGAAAAAATCGATGCGATTTTGCCGCAGAGCCAGTGTGGTCAGTGCGGCTATCCCGGCTGCCGCCCCTACGCCGAAGCCGTGGGTCTCCAGGGCGAGAAAATCAATCGTTGCGCGCCCGGCGGCGAAGCCGTCATGCTGAAAATTGCAGAGTTGCTAAACGTAGAGCCGCAGCCATGCGATGGTGAAGAACAGCAGGCCGCGCCAGTACGTATGCTGGCAGTTATTGATGAAAATAACTGCATTGGCTGCACAAAATGCATTCAGGCCTGCCCGGTTGACGCCATCGTTGGCGCAACGCGCGCCATGCACACGGTCATGAGCGATCTCTGTACTGGCTGTAATCTGTGCGTCGATCCGTGTCCGACGCACTGCATCGAATTACGTCCGGTGAATGAGACGCCCGACAGTTGGAAATGGGATTTGAACACCATTCCCGTTCGCATCATTCCCGTGGAACAACATGCTTAA"}, "NZ_AOYX01000060.1_42": {"blast_result": {"is_perfect": true, "slen": 110251, "pident": 100.0, "marker": "NZ_AOYX01000060.1_42", "qseqid": "NZ_AOYX01000060.1_42|486088087", "qstart": 1, "is_trunc": false, "sseq_msa_gaps": 0, "sseq": "ATGCGTAAGCGCAAAGAAAAAGCAGTAAAGGTGCGGCAGTATGTAAATAGTAATGAGAACGACTATCAATTCGACGTTGTTTTGATATTATTATGCTCAGATTTTGTGATTTGCGTCCTGGAGATACAGAGTGGGTAA", "bitscore": 255.0, "sseq_msa_p_gaps": 0.0, "evalue": 5.029999999999999e-69, "gapopen": 0, "send": 44639, "has_perfect_match": true, "allele": 486088087, "sstart": 44776, "allele_name": 486088087, "start_idx": 44638.0, "qend": 138, "needs_revcomp": true, "is_extended": false, "mismatch": 0, "coverage": 1.0, "too_many_gaps": false, "end_idx": 44775.0, "trunc": false, "stitle": 10664743, "qlen": 138, "is_match": true, "length": 138}, "name": 486088087, "seq": "ATGCGTAAGCGCAAAGAAAAAGCAGTAAAGGTGCGGCAGTATGTAAATAGTAATGAGAACGACTATCAATTCGACGTTGTTTTGATATTATTATGCTCAGATTTTGTGATTTGCGTCCTGGAGATACAGAGTGGGTAA"}}}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r ebee10be4297 test-data/alleles-output.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/alleles-output.json Wed Mar 01 12:35:39 2017 -0500
b
b'@@ -0,0 +1,1 @@\n+{"AE014613-699860.fasta": {"NZ_AOXE01000059.1_338": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000017.1_96": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000059.1_370": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000021.1_61": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000034.1_164": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000016.1_8": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000011.1_85": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000036.1_31": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000036.1_39": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000011.1_83": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000031.1_80": {"blast_result": {"is_perfect": true, "slen": 699860, "pident": 100.0, "marker": "NZ_AOXE01000031.1_80", "qseqid": "NZ_AOXE01000031.1_80|2955003506", "qstart": 1, "is_trunc": false, "sseq_msa_gaps": 0, "sseq": "ATGAGTAAATGCAGTGCTGATGAAACCCCGGTTTGCTGCTGTATGGACGTTGGAACCATCATGGACAACTCCGATTGCACCGCGTCATACAGCCGCGTATTCGCTACCCGCGCAGAGGCTGAAGAGACGCTGGCGGCGTTAACCGAAAAAGCGCGTAGCGTGGAGTCTGAACCTTGCCAAATTACGCCAACCTTTACCGAGGAATCCGAAGGCGTTCGTCTGGATATTGATTTTGTTTTCGCCTGCGAAGCAGAAACGCTGATCTTCCAGCTCGGCCTGCGTTAA", "bitscore": 527, "sseq_msa_p_gaps": 0.0, "evalue": 3.18e-151, "gapopen": 0, "send": 548003, "has_perfect_match": true, "allele": 2955003506, "sstart": 547719, "allele_name": 2955003506, "start_idx": 547718.0, "qend": 285, "needs_revcomp": false, "is_extended": false, "mismatch": 0, "coverage": 1.0, "too_many_gaps": false, "end_idx": 548002.0, "trunc": false, "stitle": "gi|29140506|gb|AE014613.1| Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome", "qlen": 285, "is_match": true, "length": 285}, "name": 2955003506, "seq": "ATGAGTAAATGCAGTGCTGATGAAACCCCGGTTTGCTGCTGTATGGACGTTGGAACCATCATGGACAACTCCGATTGCACCGCGTCATACAGCCGCGTATTCGCTACCCGCGCAGAGGCTGAAGAGACGCTGGCGGCGTTAACCGAAAAAGCGCGTAGCGTGGAGTCTGAACCTTGCCAAATTACGCCAACCTTTACCGAGGAATCCGAAGGCGTTCGTCTGGATATTGATTTTGTTTTCGCCTGCGAAGCAGAAACGCTGATCTTCCAGCTCGGCCTGCGTTAA"}, "NZ_AOXE01000064.1_27": {"seq": null, "name": null, "blast_result": null}, "NZ_AOYX01000075.1_47": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000064.1_26": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000073.1_48": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000064.1_36": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000059.1_395": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000081.1_272": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000081.1_124": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000068.1_45": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000081.1_105": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000085.1_60": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000053.1_173": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000004.1_154": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000081.1_103": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000021.1_165": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000034.1_134": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000083.1_53": {"blast_result": {"is_perfect": false, "slen": 699860, "pident": 99.56700000000001, "marker": "NZ_AOXE01000083.1_53", "qseqid": "NZ_AOXE01000083.1_53|1998597791", "qstart": 1, "is_trunc": false, "sseq_msa_gaps": 0, "sseq": "ATGATTGACTGGCAGGATCTCCACCATAGCGAACTTACTGTTCCTCAACTTTACGCGTTACTCAAATTGCGCTGCGCGGTCTTTGTGGTAGAACAGCGCTGTCCTTACCTGGATGTGGATGGCGATGATTTAGTCGGCGACAATCGGCATATTCTGGGCTGGCATCAGGATGAGCTGGTGGCGTATGCGAGGATTCTGAAAAGTGATAACGAATCAGACCCGGTTGTCATTGGCCGGGTGATCGTCAGCGATGCCTGGCGAGGGGCGAAACTGGGCCAGCAATTAATGGCGAAAACGCTGGAATCTTGTGGGCGGCACTGGCCGGATAAGCCGCTATATCTGGGGGCGCAGGCGCATCTGCAACCTTTTTATGCGCGGTTTGGTTTT'..b'rt": 633122, "allele_name": 2372254687, "start_idx": 633121.0, "qend": 963, "needs_revcomp": false, "is_extended": false, "mismatch": 3, "coverage": 1.0, "too_many_gaps": false, "end_idx": 634083.0, "trunc": false, "stitle": "gi|29140506|gb|AE014613.1| Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome", "qlen": 963, "is_match": true, "length": 963}, "name": 2372254687, "seq": "ATGCGTAGCGCGCAGGTATACCGCTGGCAGATCCCCATGGACGCGGGGGTGGTTCTGCGCGACAGGCGGTTAAAAACTCGCGACGGGCTGTATGTTTGTCTGCGTGACGGCGAGCGTGAAGGGTGGGGAGAGATCTCCCCGCTGCCGGGCTTCAGTCAGGAAACGTGGGAAGAGGCGCAGACGGCGCTCCTGACATGGGTGAATGACTGGCTTCAGGGGAGCGAGGGATTACCGGAGATGCCTTCGGTCGCGTTTGGCGCAAGCTGCGCGCTGGCGGAACTGACTGGCGTCTTGCCGGAGGCGGCGGACTATCGCGCCGCGCCGTTATGCACTGGCGATCCTGACGATTTGGTACTGCGGCTTGCCGATATGCCCGGCGAGAAAATCGCTAAGGTCAAAGTGGGTCTCTATGAAGCGGTACGCGACGGCATGGTGGTTAATTTGCTGCTGGAGGCGATCCCGGATCTGCATCTGCGTCTGGATGCGAATCGCGCCTGGACGCCGCTAAAAGCCCAACAGTTCGCAAAGTATGTTAATCCGGATTACCGCGCTCGTATCGCCTTTCTCGAAGAACCGTGTAAGACGCGGGATGATTCCCGCGCCTTTGCCCGTGAAACCGGCATCGCGATTGCCTGGGACGAAAGTCTGCGCGAAGCGGATTTCACCTTTGAAGCCGAAGAGGGCGTCAGGGCTGTGGTTATCAAACCTACGCTGACCGGATCGCTTGATAAAGTGCGTGAGCAAGTCGCTGCCGCCCATGCGTTGGGACTGACGGCGGTCATCAGCTCTTCGATCGAGTCCAGCCTCGGCCTGACGCAACTGGCGCGGATTGCCGCCTGGTTGACGCCGGGAACGCTGCCCGGACTGGATACCTTGCATCTGATGCAGGCGCAACAGGTTCGCCCCTGGCCTGGTAACGCGTTGCCTTGTCTGAAGCGTGATGAGCTGGAACGACTGTTATGA"}, "NZ_AOXE01000083.1_45": {"blast_result": {"is_perfect": true, "slen": 699860, "pident": 100.0, "marker": "NZ_AOXE01000083.1_45", "qseqid": "NZ_AOXE01000083.1_45|1328452594", "qstart": 1, "is_trunc": false, "sseq_msa_gaps": 0, "sseq": "ATGGAATGGTTGGTTAAGAAATCGCATTATGTCAAAAAGAGGGCGTGCCATGTTCTGGTGCTGTGCGATAGCGGCGGTTCGCTAAAAATGATCGCCGAGGCGAATTCCATGATATTACTGAGTCCCGGCGATATCCTGTCGCCTTTACAGGATGCGCAGTATTGTATTAATCGGGAAAAACACCAGACCTTAAAAATCGTTGATGCACGCTGTTATTCCTGCGACGAATGGCAGCGGTTGACGCGCAAGCCATTATGA", "bitscore": 477, "sseq_msa_p_gaps": 0.0, "evalue": 2.919999999999999e-136, "gapopen": 0, "send": 635803, "has_perfect_match": true, "allele": 1328452594, "sstart": 635546, "allele_name": 1328452594, "start_idx": 635545.0, "qend": 258, "needs_revcomp": false, "is_extended": false, "mismatch": 0, "coverage": 1.0, "too_many_gaps": false, "end_idx": 635802.0, "trunc": false, "stitle": "gi|29140506|gb|AE014613.1| Salmonella enterica subsp. enterica serovar Typhi Ty2, complete genome", "qlen": 258, "is_match": true, "length": 258}, "name": 1328452594, "seq": "ATGGAATGGTTGGTTAAGAAATCGCATTATGTCAAAAAGAGGGCGTGCCATGTTCTGGTGCTGTGCGATAGCGGCGGTTCGCTAAAAATGATCGCCGAGGCGAATTCCATGATATTACTGAGTCCCGGCGATATCCTGTCGCCTTTACAGGATGCGCAGTATTGTATTAATCGGGAAAAACACCAGACCTTAAAAATCGTTGATGCACGCTGTTATTCCTGCGACGAATGGCAGCGGTTGACGCGCAAGCCATTATGA"}, "NZ_AOXE01000081.1_210": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000081.1_211": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000081.1_212": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000011.1_77": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000007.1_20": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000081.1_215": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000085.1_58": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000072.1_73": {"seq": null, "name": null, "blast_result": null}, "NZ_APAO01000014.1_55": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000081.1_200": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000081.1_87": {"seq": null, "name": null, "blast_result": null}, "NZ_AOYX01000031.1_11": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000059.1_363": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000081.1_283": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000004.1_101": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000008.1_59": {"seq": null, "name": null, "blast_result": null}, "NZ_AOXE01000053.1_113": {"seq": null, "name": null, "blast_result": null}}}\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r ebee10be4297 test-data/cgmlst-profiles-13-1101.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cgmlst-profiles-13-1101.csv Wed Mar 01 12:35:39 2017 -0500
b
b'@@ -0,0 +1,2 @@\n+,NC_003198.1_3005,NC_006905.1_2841,NC_011149.1_467,NC_017623.1_3300,NZ_ABFH02000002.1_1303,NZ_AOXE01000003.1_37,NZ_AOXE01000003.1_39,NZ_AOXE01000003.1_57,NZ_AOXE01000003.1_7,NZ_AOXE01000003.1_70,NZ_AOXE01000004.1_10,NZ_AOXE01000004.1_101,NZ_AOXE01000004.1_12,NZ_AOXE01000004.1_134,NZ_AOXE01000004.1_135,NZ_AOXE01000004.1_14,NZ_AOXE01000004.1_140,NZ_AOXE01000004.1_154,NZ_AOXE01000004.1_35,NZ_AOXE01000004.1_36,NZ_AOXE01000004.1_39,NZ_AOXE01000004.1_59,NZ_AOXE01000004.1_68,NZ_AOXE01000004.1_74,NZ_AOXE01000004.1_87,NZ_AOXE01000007.1_13,NZ_AOXE01000007.1_18,NZ_AOXE01000007.1_20,NZ_AOXE01000007.1_48,NZ_AOXE01000008.1_59,NZ_AOXE01000008.1_63,NZ_AOXE01000009.1_17,NZ_AOXE01000011.1_101,NZ_AOXE01000011.1_77,NZ_AOXE01000011.1_82,NZ_AOXE01000011.1_83,NZ_AOXE01000011.1_85,NZ_AOXE01000016.1_13,NZ_AOXE01000016.1_8,NZ_AOXE01000017.1_117,NZ_AOXE01000017.1_118,NZ_AOXE01000017.1_130,NZ_AOXE01000017.1_4,NZ_AOXE01000017.1_40,NZ_AOXE01000017.1_43,NZ_AOXE01000017.1_54,NZ_AOXE01000017.1_59,NZ_AOXE01000017.1_80,NZ_AOXE01000017.1_82,NZ_AOXE01000017.1_96,NZ_AOXE01000019.1_13,NZ_AOXE01000019.1_14,NZ_AOXE01000019.1_24,NZ_AOXE01000021.1_10,NZ_AOXE01000021.1_11,NZ_AOXE01000021.1_165,NZ_AOXE01000021.1_29,NZ_AOXE01000021.1_38,NZ_AOXE01000021.1_49,NZ_AOXE01000021.1_6,NZ_AOXE01000021.1_61,NZ_AOXE01000021.1_79,NZ_AOXE01000023.1_11,NZ_AOXE01000023.1_25,NZ_AOXE01000023.1_30,NZ_AOXE01000024.1_3,NZ_AOXE01000024.1_35,NZ_AOXE01000024.1_38,NZ_AOXE01000025.1_13,NZ_AOXE01000025.1_14,NZ_AOXE01000025.1_20,NZ_AOXE01000031.1_102,NZ_AOXE01000031.1_106,NZ_AOXE01000031.1_70,NZ_AOXE01000031.1_80,NZ_AOXE01000033.1_11,NZ_AOXE01000033.1_12,NZ_AOXE01000033.1_14,NZ_AOXE01000033.1_17,NZ_AOXE01000033.1_19,NZ_AOXE01000033.1_2,NZ_AOXE01000033.1_21,NZ_AOXE01000033.1_26,NZ_AOXE01000033.1_3,NZ_AOXE01000033.1_30,NZ_AOXE01000033.1_34,NZ_AOXE01000033.1_38,NZ_AOXE01000033.1_43,NZ_AOXE01000033.1_51,NZ_AOXE01000034.1_103,NZ_AOXE01000034.1_106,NZ_AOXE01000034.1_111,NZ_AOXE01000034.1_112,NZ_AOXE01000034.1_113,NZ_AOXE01000034.1_119,NZ_AOXE01000034.1_126,NZ_AOXE01000034.1_127,NZ_AOXE01000034.1_133,NZ_AOXE01000034.1_134,NZ_AOXE01000034.1_164,NZ_AOXE01000034.1_173,NZ_AOXE01000034.1_53,NZ_AOXE01000034.1_82,NZ_AOXE01000035.1_13,NZ_AOXE01000035.1_21,NZ_AOXE01000036.1_108,NZ_AOXE01000036.1_116,NZ_AOXE01000036.1_15,NZ_AOXE01000036.1_157,NZ_AOXE01000036.1_16,NZ_AOXE01000036.1_2,NZ_AOXE01000036.1_3,NZ_AOXE01000036.1_31,NZ_AOXE01000036.1_39,NZ_AOXE01000036.1_43,NZ_AOXE01000036.1_58,NZ_AOXE01000036.1_66,NZ_AOXE01000036.1_98,NZ_AOXE01000040.1_19,NZ_AOXE01000040.1_28,NZ_AOXE01000040.1_31,NZ_AOXE01000041.1_33,NZ_AOXE01000041.1_73,NZ_AOXE01000041.1_75,NZ_AOXE01000041.1_76,NZ_AOXE01000041.1_84,NZ_AOXE01000041.1_85,NZ_AOXE01000041.1_87,NZ_AOXE01000043.1_4,NZ_AOXE01000047.1_56,NZ_AOXE01000047.1_57,NZ_AOXE01000050.1_18,NZ_AOXE01000050.1_44,NZ_AOXE01000052.1_115,NZ_AOXE01000052.1_128,NZ_AOXE01000052.1_131,NZ_AOXE01000052.1_137,NZ_AOXE01000052.1_141,NZ_AOXE01000052.1_23,NZ_AOXE01000052.1_36,NZ_AOXE01000052.1_38,NZ_AOXE01000052.1_41,NZ_AOXE01000052.1_43,NZ_AOXE01000052.1_78,NZ_AOXE01000052.1_92,NZ_AOXE01000053.1_113,NZ_AOXE01000053.1_128,NZ_AOXE01000053.1_130,NZ_AOXE01000053.1_166,NZ_AOXE01000053.1_173,NZ_AOXE01000053.1_180,NZ_AOXE01000053.1_190,NZ_AOXE01000053.1_217,NZ_AOXE01000053.1_86,NZ_AOXE01000059.1_11,NZ_AOXE01000059.1_129,NZ_AOXE01000059.1_133,NZ_AOXE01000059.1_15,NZ_AOXE01000059.1_174,NZ_AOXE01000059.1_182,NZ_AOXE01000059.1_184,NZ_AOXE01000059.1_189,NZ_AOXE01000059.1_229,NZ_AOXE01000059.1_31,NZ_AOXE01000059.1_32,NZ_AOXE01000059.1_325,NZ_AOXE01000059.1_328,NZ_AOXE01000059.1_333,NZ_AOXE01000059.1_335,NZ_AOXE01000059.1_336,NZ_AOXE01000059.1_338,NZ_AOXE01000059.1_35,NZ_AOXE01000059.1_353,NZ_AOXE01000059.1_363,NZ_AOXE01000059.1_37,NZ_AOXE01000059.1_370,NZ_AOXE01000059.1_372,NZ_AOXE01000059.1_38,NZ_AOXE01000059.1_395,NZ_AOXE01000059.1_396,NZ_AOXE01000059.1_408,NZ_AOXE01000059.1_411,NZ_AOXE01000059.1_418,NZ_AOXE01000059.1_42,NZ_AOXE01000059.1_427,NZ_AOXE01000059.1_430,NZ_AOXE01000059.1_433,NZ_AOXE0100005'..b'0085.1_34,NZ_AOXE01000085.1_57,NZ_AOXE01000085.1_58,NZ_AOXE01000085.1_60,NZ_AOXE01000085.1_62,NZ_AOXE01000085.1_63,NZ_AOXE01000085.1_65,NZ_AOXI01000002.1_306,NZ_AOXI01000005.1_72,NZ_AOXI01000016.1_73,NZ_AOYI01000008.1_9,NZ_AOYL01000006.1_89,NZ_AOYO01000084.1_456,NZ_AOYX01000009.1_43,NZ_AOYX01000031.1_11,NZ_AOYX01000060.1_42,NZ_AOYX01000075.1_47,NZ_AOYX01000092.1_135,NZ_APAO01000014.1_55,NZ_AYDA01000043.1_275,NZ_CM001471.1_3941\n+13-1101-Paratyphi_B.fasta,3750733112,2577401250,161888011,212691877,4104237653,600123782,2702249523,1721939526,3200918252,1829936019,1268872669,4181313675,2932852483,2488907409,3205748274,2755012932,16813142,212755137,3536700674,2393566038,185769493,1803383866,2748382047,1140249760,118649377,2980577262,3237112777,706931044,3826354763,2106993987,1303226364,3118988453,1557621358,2176838220,3478111452,958144191,2645401897,2663521059,717530117,1358177531,2367459520,3044853482,2006834477,2864140755,945900486,3756506314,3661407416,827295360,2136794984,3389985419,3028643217,2087509483,2983916931,3219814449,4276130360,3574020847,2077249876,2077738650,1430683295,2795082944,2904170129,1830292553,3712766732,1898052981,246770399,2246059591,282105442,3473113586,4252575321,299233641,2651864740,1436693988,2545200385,225275747,1320460320,2353669245,1780432149,1667064808,4127891750,2307413670,1994382719,1324934990,912019826,767517378,300758334,2732360708,1476689272,316910970,726132721,2404196596,628816999,715723428,439814616,3868995612,3923470646,922813689,596464946,2068726690,514528221,1235021963,1738069373,2194946451,1542872411,4178717449,2356733911,3699174113,2832724106,3646752757,2770658758,431631459,3155517162,3250676045,1781281347,747567264,550964520,528121872,308073351,4121689972,593462775,3179271034,399278785,498374695,1439849948,1080346469,2094006958,551814723,708436169,828533582,2587507857,2087509483,3578386263,3641973773,3721979689,119594086,2079191084,1524097055,4237193625,2084418558,2598101487,2243362473,132779259,2539563995,3147004960,1005413801,398274060,683371912,2311744161,2098379863,830225365,4090514893,3192866010,489837497,2392764491,4177019046,616151434,195979449,2688075180,571430577,3183061957,2253874773,1342053785,162097627,1109320307,664703088,4249405940,4228448746,791036150,629769704,3428477437,1226961299,21264588,2569933901,2113055247,1932373744,2467894309,3206518183,3822365505,2170727246,2979506855,3572598511,257452301,3674258444,986292687,3438653864,3027329430,4212855923,2590187629,2536338461,2306804434,3643876638,2409457196,3781645063,2087509483,2424781734,1291136085,1047370225,698589548,2519701742,3276128843,3548183620,4289919471,1965001593,1842785664,3995669301,2390483285,3371174263,1245009874,2919271763,1854288580,3832041954,2672990925,2087509483,3643634927,333081518,2139774773,121039594,4267848688,1426386553,93691856,2617592815,2061334442,484067521,338226340,595233173,1246387686,1236556171,3165457710,649192555,3831614515,2226007812,2615307626,1822361792,1251756376,2953514645,226121950,3523430840,799742746,4250299083,1635467073,563085019,4033110422,2734837303,1982008089,2061617626,2494351896,3172400135,309774068,2754698655,2253744431,2081649483,3386486144,4213771231,352954861,216514641,744347523,2758229229,1743327588,406171140,1043329926,294538384,2505247376,4189957668,2899049638,2078682862,4109265911,2639318025,4269576185,3445065778,1716630297,3912522894,10121991,3254127745,3047220128,4248599762,353328767,1019572313,3239852751,2459342739,1547676029,2844497268,2203341188,4282035484,3699551722,121975481,3289785650,1171029009,4054067887,3552349156,3344082709,3523966715,3187079670,1371090518,2289872656,4200440493,3463404426,822043034,637470977,1500184554,3978410193,3572777440,2763485497,2649675872,625657224,2641882719,1619775958,1484137762,1613239859,3150901989,1366744981,504704912,2712433588,2087509483,3887842238,1839157263,3017501787,2881079752,3306151290,3554479089,128939352,1992222443,491087478,1881515649,1671956993,938581157,486088087,2151397774,2972709081,3276924,4149206546,4185944080\n'
b
diff -r 000000000000 -r ebee10be4297 test-data/cgmlst-profiles.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cgmlst-profiles.csv Wed Mar 01 12:35:39 2017 -0500
b
@@ -0,0 +1,2 @@
+,NC_003198.1_3005,NC_006905.1_2841,NC_011149.1_467,NC_017623.1_3300,NZ_ABFH02000002.1_1303,NZ_AOXE01000003.1_37,NZ_AOXE01000003.1_39,NZ_AOXE01000003.1_57,NZ_AOXE01000003.1_7,NZ_AOXE01000003.1_70,NZ_AOXE01000004.1_10,NZ_AOXE01000004.1_101,NZ_AOXE01000004.1_12,NZ_AOXE01000004.1_134,NZ_AOXE01000004.1_135,NZ_AOXE01000004.1_14,NZ_AOXE01000004.1_140,NZ_AOXE01000004.1_154,NZ_AOXE01000004.1_35,NZ_AOXE01000004.1_36,NZ_AOXE01000004.1_39,NZ_AOXE01000004.1_59,NZ_AOXE01000004.1_68,NZ_AOXE01000004.1_74,NZ_AOXE01000004.1_87,NZ_AOXE01000007.1_13,NZ_AOXE01000007.1_18,NZ_AOXE01000007.1_20,NZ_AOXE01000007.1_48,NZ_AOXE01000008.1_59,NZ_AOXE01000008.1_63,NZ_AOXE01000009.1_17,NZ_AOXE01000011.1_101,NZ_AOXE01000011.1_77,NZ_AOXE01000011.1_82,NZ_AOXE01000011.1_83,NZ_AOXE01000011.1_85,NZ_AOXE01000016.1_13,NZ_AOXE01000016.1_8,NZ_AOXE01000017.1_117,NZ_AOXE01000017.1_118,NZ_AOXE01000017.1_130,NZ_AOXE01000017.1_4,NZ_AOXE01000017.1_40,NZ_AOXE01000017.1_43,NZ_AOXE01000017.1_54,NZ_AOXE01000017.1_59,NZ_AOXE01000017.1_80,NZ_AOXE01000017.1_82,NZ_AOXE01000017.1_96,NZ_AOXE01000019.1_13,NZ_AOXE01000019.1_14,NZ_AOXE01000019.1_24,NZ_AOXE01000021.1_10,NZ_AOXE01000021.1_11,NZ_AOXE01000021.1_165,NZ_AOXE01000021.1_29,NZ_AOXE01000021.1_38,NZ_AOXE01000021.1_49,NZ_AOXE01000021.1_6,NZ_AOXE01000021.1_61,NZ_AOXE01000021.1_79,NZ_AOXE01000023.1_11,NZ_AOXE01000023.1_25,NZ_AOXE01000023.1_30,NZ_AOXE01000024.1_3,NZ_AOXE01000024.1_35,NZ_AOXE01000024.1_38,NZ_AOXE01000025.1_13,NZ_AOXE01000025.1_14,NZ_AOXE01000025.1_20,NZ_AOXE01000031.1_102,NZ_AOXE01000031.1_106,NZ_AOXE01000031.1_70,NZ_AOXE01000031.1_80,NZ_AOXE01000033.1_11,NZ_AOXE01000033.1_12,NZ_AOXE01000033.1_14,NZ_AOXE01000033.1_17,NZ_AOXE01000033.1_19,NZ_AOXE01000033.1_2,NZ_AOXE01000033.1_21,NZ_AOXE01000033.1_26,NZ_AOXE01000033.1_3,NZ_AOXE01000033.1_30,NZ_AOXE01000033.1_34,NZ_AOXE01000033.1_38,NZ_AOXE01000033.1_43,NZ_AOXE01000033.1_51,NZ_AOXE01000034.1_103,NZ_AOXE01000034.1_106,NZ_AOXE01000034.1_111,NZ_AOXE01000034.1_112,NZ_AOXE01000034.1_113,NZ_AOXE01000034.1_119,NZ_AOXE01000034.1_126,NZ_AOXE01000034.1_127,NZ_AOXE01000034.1_133,NZ_AOXE01000034.1_134,NZ_AOXE01000034.1_164,NZ_AOXE01000034.1_173,NZ_AOXE01000034.1_53,NZ_AOXE01000034.1_82,NZ_AOXE01000035.1_13,NZ_AOXE01000035.1_21,NZ_AOXE01000036.1_108,NZ_AOXE01000036.1_116,NZ_AOXE01000036.1_15,NZ_AOXE01000036.1_157,NZ_AOXE01000036.1_16,NZ_AOXE01000036.1_2,NZ_AOXE01000036.1_3,NZ_AOXE01000036.1_31,NZ_AOXE01000036.1_39,NZ_AOXE01000036.1_43,NZ_AOXE01000036.1_58,NZ_AOXE01000036.1_66,NZ_AOXE01000036.1_98,NZ_AOXE01000040.1_19,NZ_AOXE01000040.1_28,NZ_AOXE01000040.1_31,NZ_AOXE01000041.1_33,NZ_AOXE01000041.1_73,NZ_AOXE01000041.1_75,NZ_AOXE01000041.1_76,NZ_AOXE01000041.1_84,NZ_AOXE01000041.1_85,NZ_AOXE01000041.1_87,NZ_AOXE01000043.1_4,NZ_AOXE01000047.1_56,NZ_AOXE01000047.1_57,NZ_AOXE01000050.1_18,NZ_AOXE01000050.1_44,NZ_AOXE01000052.1_115,NZ_AOXE01000052.1_128,NZ_AOXE01000052.1_131,NZ_AOXE01000052.1_137,NZ_AOXE01000052.1_141,NZ_AOXE01000052.1_23,NZ_AOXE01000052.1_36,NZ_AOXE01000052.1_38,NZ_AOXE01000052.1_41,NZ_AOXE01000052.1_43,NZ_AOXE01000052.1_78,NZ_AOXE01000052.1_92,NZ_AOXE01000053.1_113,NZ_AOXE01000053.1_128,NZ_AOXE01000053.1_130,NZ_AOXE01000053.1_166,NZ_AOXE01000053.1_173,NZ_AOXE01000053.1_180,NZ_AOXE01000053.1_190,NZ_AOXE01000053.1_217,NZ_AOXE01000053.1_86,NZ_AOXE01000059.1_11,NZ_AOXE01000059.1_129,NZ_AOXE01000059.1_133,NZ_AOXE01000059.1_15,NZ_AOXE01000059.1_174,NZ_AOXE01000059.1_182,NZ_AOXE01000059.1_184,NZ_AOXE01000059.1_189,NZ_AOXE01000059.1_229,NZ_AOXE01000059.1_31,NZ_AOXE01000059.1_32,NZ_AOXE01000059.1_325,NZ_AOXE01000059.1_328,NZ_AOXE01000059.1_333,NZ_AOXE01000059.1_335,NZ_AOXE01000059.1_336,NZ_AOXE01000059.1_338,NZ_AOXE01000059.1_35,NZ_AOXE01000059.1_353,NZ_AOXE01000059.1_363,NZ_AOXE01000059.1_37,NZ_AOXE01000059.1_370,NZ_AOXE01000059.1_372,NZ_AOXE01000059.1_38,NZ_AOXE01000059.1_395,NZ_AOXE01000059.1_396,NZ_AOXE01000059.1_408,NZ_AOXE01000059.1_411,NZ_AOXE01000059.1_418,NZ_AOXE01000059.1_42,NZ_AOXE01000059.1_427,NZ_AOXE01000059.1_430,NZ_AOXE01000059.1_433,NZ_AOXE01000059.1_435,NZ_AOXE01000059.1_437,NZ_AOXE01000059.1_440,NZ_AOXE01000059.1_442,NZ_AOXE01000059.1_49,NZ_AOXE01000059.1_60,NZ_AOXE01000059.1_66,NZ_AOXE01000059.1_67,NZ_AOXE01000059.1_68,NZ_AOXE01000059.1_69,NZ_AOXE01000059.1_72,NZ_AOXE01000059.1_79,NZ_AOXE01000059.1_9,NZ_AOXE01000059.1_94,NZ_AOXE01000061.1_12,NZ_AOXE01000061.1_20,NZ_AOXE01000061.1_22,NZ_AOXE01000061.1_3,NZ_AOXE01000064.1_26,NZ_AOXE01000064.1_27,NZ_AOXE01000064.1_36,NZ_AOXE01000068.1_19,NZ_AOXE01000068.1_20,NZ_AOXE01000068.1_27,NZ_AOXE01000068.1_29,NZ_AOXE01000068.1_37,NZ_AOXE01000068.1_38,NZ_AOXE01000068.1_45,NZ_AOXE01000068.1_46,NZ_AOXE01000068.1_5,NZ_AOXE01000068.1_52,NZ_AOXE01000068.1_58,NZ_AOXE01000068.1_65,NZ_AOXE01000068.1_67,NZ_AOXE01000068.1_70,NZ_AOXE01000068.1_72,NZ_AOXE01000068.1_76,NZ_AOXE01000072.1_100,NZ_AOXE01000072.1_104,NZ_AOXE01000072.1_12,NZ_AOXE01000072.1_13,NZ_AOXE01000072.1_3,NZ_AOXE01000072.1_41,NZ_AOXE01000072.1_42,NZ_AOXE01000072.1_60,NZ_AOXE01000072.1_65,NZ_AOXE01000072.1_73,NZ_AOXE01000072.1_8,NZ_AOXE01000072.1_82,NZ_AOXE01000072.1_83,NZ_AOXE01000072.1_86,NZ_AOXE01000072.1_93,NZ_AOXE01000073.1_11,NZ_AOXE01000073.1_130,NZ_AOXE01000073.1_144,NZ_AOXE01000073.1_15,NZ_AOXE01000073.1_19,NZ_AOXE01000073.1_48,NZ_AOXE01000073.1_79,NZ_AOXE01000073.1_85,NZ_AOXE01000073.1_98,NZ_AOXE01000077.1_25,NZ_AOXE01000077.1_28,NZ_AOXE01000077.1_29,NZ_AOXE01000077.1_33,NZ_AOXE01000077.1_35,NZ_AOXE01000079.1_15,NZ_AOXE01000079.1_4,NZ_AOXE01000080.1_12,NZ_AOXE01000080.1_13,NZ_AOXE01000080.1_20,NZ_AOXE01000081.1_103,NZ_AOXE01000081.1_105,NZ_AOXE01000081.1_124,NZ_AOXE01000081.1_136,NZ_AOXE01000081.1_179,NZ_AOXE01000081.1_186,NZ_AOXE01000081.1_190,NZ_AOXE01000081.1_193,NZ_AOXE01000081.1_195,NZ_AOXE01000081.1_200,NZ_AOXE01000081.1_201,NZ_AOXE01000081.1_209,NZ_AOXE01000081.1_210,NZ_AOXE01000081.1_211,NZ_AOXE01000081.1_212,NZ_AOXE01000081.1_214,NZ_AOXE01000081.1_215,NZ_AOXE01000081.1_220,NZ_AOXE01000081.1_223,NZ_AOXE01000081.1_249,NZ_AOXE01000081.1_251,NZ_AOXE01000081.1_262,NZ_AOXE01000081.1_264,NZ_AOXE01000081.1_267,NZ_AOXE01000081.1_272,NZ_AOXE01000081.1_282,NZ_AOXE01000081.1_283,NZ_AOXE01000081.1_286,NZ_AOXE01000081.1_294,NZ_AOXE01000081.1_40,NZ_AOXE01000081.1_48,NZ_AOXE01000081.1_49,NZ_AOXE01000081.1_52,NZ_AOXE01000081.1_55,NZ_AOXE01000081.1_59,NZ_AOXE01000081.1_62,NZ_AOXE01000081.1_64,NZ_AOXE01000081.1_76,NZ_AOXE01000081.1_79,NZ_AOXE01000081.1_83,NZ_AOXE01000081.1_87,NZ_AOXE01000081.1_92,NZ_AOXE01000081.1_97,NZ_AOXE01000083.1_45,NZ_AOXE01000083.1_47,NZ_AOXE01000083.1_53,NZ_AOXE01000083.1_74,NZ_AOXE01000083.1_86,NZ_AOXE01000085.1_10,NZ_AOXE01000085.1_17,NZ_AOXE01000085.1_20,NZ_AOXE01000085.1_34,NZ_AOXE01000085.1_57,NZ_AOXE01000085.1_58,NZ_AOXE01000085.1_60,NZ_AOXE01000085.1_62,NZ_AOXE01000085.1_63,NZ_AOXE01000085.1_65,NZ_AOXI01000002.1_306,NZ_AOXI01000005.1_72,NZ_AOXI01000016.1_73,NZ_AOYI01000008.1_9,NZ_AOYL01000006.1_89,NZ_AOYO01000084.1_456,NZ_AOYX01000009.1_43,NZ_AOYX01000031.1_11,NZ_AOYX01000060.1_42,NZ_AOYX01000075.1_47,NZ_AOYX01000092.1_135,NZ_APAO01000014.1_55,NZ_AYDA01000043.1_275,NZ_CM001471.1_3941
+AE014613-699860.fasta,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3969539340,2545200385,225275747,2955003506,2353669245,2666669453,1672513023,3779563470,1301843222,2161147266,607954140,3680021500,2914087704,1062106200,3673111880,1314942441,1367997025,3293595301,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1528212814,2110459436,4160823845,1648892875,2084418558,1638162324,469721942,1317894045,1973458150,926214622,2197498164,398274060,,,,,,,,,,1123870984,278162969,490843778,3950769715,,,,,,4203409135,3569491948,,,,,,,1052128508,,,1510445340,,,4065472468,,,,,,1495737522,,,,,,,,3076491138,712233770,3105746335,625241463,3016847250,1928860657,2229984332,1341416065,2978539204,1175502179,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1328452594,2372254687,2640609716,3051501604,3258707132,,,,,,,,,,,,1007978530,,2019769394,1109678443,,,,,,,,,
b
diff -r 000000000000 -r ebee10be4297 test-data/novel-alleles-13-1101.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/novel-alleles-13-1101.fasta Wed Mar 01 12:35:39 2017 -0500
b
b'@@ -0,0 +1,660 @@\n+>NZ_AOXE01000034.1_82|1542872411\n+ATGCCAACCAGACCACCTTATCCGCGGGAAGCTTATATCGTCACCATTGAAAAAGGCACGCCGGGCCAGACGGTGACGTGGTATCAGCTACGGGCTGACCATCCGAAACCTGATTCGCTCATCAGCGAGCATCCGACCGCAGAAGAAGCGATGGATGCGAAAAAACGTTACGAAGATCCGGATAAATCATAG\n+>NZ_AOXE01000016.1_8|717530117\n+GTGGAAAAATATCTGCGTAGCGGTACCATGTTTGTGGTACTGGCATTTATATTATGGGGGTTAACACCTTTATATTATCAATATTTATCAGGGGGAAATCTCGCACAAATATTGATATACCGGGTGTTCTGGTCAATTCCATTATTACTGGCAGTAAGATTGTTATTTCGGCAGCGAACGCGATTTCATGATGCCTGGAAAGATAAAAAATCTTTTTTCTTCTGCATGATCGCCGGACTTTTGATGATTGTCTCATGGTCATCTTTTATTTATGCGCTAACCCATCATCTGGTACTTGATGCCAGCCTCGGCTATTTTATCAACCCGCTATTTGTTATTGCGCTGGGATGCATTTTTCTTAAAGAAAAACTGTCGTTGTTTCAGGCCATCGCCGTTTTTTCCGGTGTCTGCGGCCTGACTTTTCAAATTATCATGCTACGGCATTTCCCGGCGCTGGCGCTAACCATGGGATTATCATTTGCGCTATATGGCCTGGCGCGAAAGTTTATACATTATGATGTGATGACATCGATAACGATTGAAACATTATGGGCATTGCCTGTCTCACTATTAATTTTTCTCTTTAGCGATAGCGGACCAATTATATCCGCTAATACTCCTTTCTTTTTGTATGTCATGACGGCGCCAGTGACGATCATTCCACTGGTATTATTTGCCATCGCGTTAAATCACACCTCGTTGATTGTCACCGGACTGGCGCAATATATAGAGCCGTCGTTACAGTTCTTACTTGCCATTATGATATTTGGCGAACACATTAATTACGCAGAACTGCTCTGCTTTTGTGCGGTATGGTTCGGGTTGTTTCTATGCATATCTGAAAATTTATATTCCCATTATCTCCGCGCCCGTCTGAAACCGGTGTTCGGCAGGGTACAGCGCTTCTTTCGCTAA\n+>NZ_AOXE01000061.1_3|2390483285\n+ATGAATAAAATCCTGTTAGTTGATGATGACCGAGAGCTGACTTCCCTGTTAAAAGAGCTCCTCGAAATGGAAGGTTTTAATGTCCTGGTGGCCCACGACGGCGAGCAGGCGCTTGAGCTTTTGGATGACAGCATCGATTTACTTTTGCTTGACGTCATGATGCCGAAGAAAAACGGTATCGATACGTTGAAAGCGCTTCGCCAGACACACCAGACGCCTGTCATTATGCTGACCGCGCGCGGCAGCGAGCTGGATCGCGTTCTCGGCCTTGAGCTGGGCGCGGACGACTATTTACCCAAACCATTTAACGACCGCGAGCTGGTAGCGCGCATCAGGGCTATTTTGCGCCGTTCCCACTGGAGCGAACAGCAGCAGAGCAGCGACAACGGCTCGCCGACGCTGGAAGTCGATGCGCTAAGCCTTAATCCGGGTCGCCAGGAAGCCAGTTTCGATGGCCAAACGCTGGAGCTGACCGGTACGGAATTCACCCTGCTCTATTTGCTGGCCCAGCACCTCGGCCAGGTGGTTTCCCGTGAACATTTAAGTCAGGAAGTGCTGGGCAAGCGCCTGACGCCGTTCGATCGCGCCATTGATATGCATATTTCTAACCTGCGCCGCAAACTGCCGGAACGCAAAGACGGTCACCCGTGGTTTAAAACATTGCGTGGTCGCGGCTATCTGATGGTTTCCGCTTCATGA\n+>NZ_AOXE01000036.1_39|747567264\n+TTGAGCAAGAACGCATTTAATGCTTATTTTAATAGCCTGTGTTTAGGAGTAAGACCACGAAGTGATTATATAATGAGCAAAACAGAACTATACGCGGCGTTAAACCGCGATTTTCAGTCGTTAATGGCAGGTGAAACCAGCTTTCTGGCCACGCTGGCGAATACCAGCGCGCTACTGTTCGAACGCCTTACCGAGGTGAACTGGGCGGGATTTTATCTCCTCGAAGGCGATACGCTGGTGTTGGGGCCGTTTCAGGGGCGAATCGCCTGTGTGCGGATTCCGGTTGGTCGCGGCGTGTGTGGCGCAGCGGTAGCGCAGAATAAGGTTCAACGTATTGATGATGTTCATGCGTTTGACGGCCATATTGCCTGTGATGCCGCCAGCAACGCCGAAATTGTGCTGCCTGTCACGGTTGGCGAACGGATTATCGGCGTGCTGGATATAGATAGCACGGCGTTTGGCCGTTTTACCGAAGAAGATGAACACGGCCTGCGTACGCTGGTCGCACAGCTTGAAACCGTGCTTGCAACGACGGATTACAAAAAATTCTTTGCGAGCGTTGCAGGATAA\n+>NZ_AOXE01000031.1_80|1320460320\n+ATGAGTAAATGCAGTGCTGATGAAACCCCGGTTTGCTGCTGTATGGATGTTGGAACCATCATGGACAACTCCGATTGCACCGCGTCATACAGCCGCGTATTCGCTACCCGCGCAGAGGCTGAAGGGACGCTGGCGGCGTTAACCGAAAAAGCGCGTAGCGTGGAGTCTGAGCCTTGCCAAATTACGCCAACCTTTACCGAGGAATCCGAAGGCGTTCGTCTGGATATTGATTTTGTTTTCGCCTGCGAAGCAGAAACGCTGATCTTCCAGCTCGGCCTGCGTTAA\n+>NZ_AOXE01000059.1_395|2979506855\n+ATGATAGCGATTGAAACACGGCAATTAGCCGGAGGCGTCGTACTACATGCCTTCCCGGAAGGGAAACGCGCCGTGCCGCTTCCCTGTGTGGTGTTCTATCATGGCTTCACCTCCTCCAGCCTGGTTTATAGTTATTTTGCCGTTGCCCTGGCACAGGCAGGATTCCGGGTAGTCATGCCGGATGCGCCGGAGCATGGCGCTCGCTTCGGCGGCGACTCACAGGGACGGATACACCGATTCTGGCAGATTCTGCACCAAAACATGCAGGAATTTACGACGTTACGTGCGGCGATTCAGGAGGAAAACTGGCTACTTGACGGGCGGTTGGCGGTGGGCGGCGCATCCATGGGCGGTATGACGGCGCTGGGCATTATGACGCGTCACCGTGAGGTAAAATGCGGGGCCAGTTTAATGGGGTCGGGCTATTTTACCGGGCTTGCCCGGACGCTTTTCCCGCCGTTATCCCCGCAGAACCCGGCGCAGCAGGCGGAATTCGACAATATCATCGCGCCGCTACGTGAATGGGAAGTCACACACCAGTTGGAGCGACTGGCCGACAGGCCGCTTCTGTTGTGGCATGGTCAGGAGGATGACGTGGTGCCTGCTATCGAAACCTTCCGGCTCCAGCAGGCACTCGCCGGGGCGAAGCTGGATAAGCATGTGACCTGTTTATGGGCCGCAGGCGTGCGGCATCGCATTACGCCAGAAGCGTTGTCGGCGACGGTAGCGTTTTTCCGACAGCATCTTTAA\n+>NZ_AOXE01000081.1_105|294538384\n+ATGAGCGATATGCACTCGCTGCTGATAGCGGCAATTTTGGGTGTGGTCGAAGGATTGACGGAGTTTTTGCCGGTATCCAGCACGGGCCATATGATTATTGTGGGTCATCTGCTGGGGTTTGAAGGCGATACGGCCAAGACATTCGAAGTGGTGATTCAACTTGGATCTATTCTGGCGGTCGTGGTGATGTTCTGGCGGCGGTTGTTTGGTCTCATCGGTATTCACTTTGGCCGCCCGCTACAGCGTGAAGGTGAAAGTAAAGGTCGATTAACGTTGATTCACATCCTGCTGGGCATGATTCCGGCGGTGGTGCTGGGGTTAG'..b'CGTATGAGTATGCTGTTTCAGTCGGGAGCGCTGTTTACCGACATGAACGTGTTTGACAATGTGGCCTATCCGCTACGGGAGCACACCAATTTACCCGCGCCGCTGCTAAAAAGCGTCGTGATGATGAAACTGGAAGCTGTCGGGCTGCGCGGCGCGGCAAAACTGATGCCTTCCGAGCTCTCCGGTGGGATGGCGCGCCGCGCCGCGTTGGCGCGCGCCATCGCTCTGGAACCGGATCTCATCATGTTCGATGAGCCGTTTGTCGGCCAGGACCCGATTACCATGGGCGTTCTGGTGAAGCTGATTTCAGAATTGAACAGCGCGCTGGGCGTGACCTGCGTGGTGGTCTCGCATGATGTGCCAGAGGTGCTCAGTATTGCGGATCACGCCTGGATCATGGCGGACAAAAAAATCGTCGCTCACGGTAGCGCTCAGGCGTTGCAGGAGAATACGGACCCGCGCGTGCGTCAGTTCCTTGACGGTATTGCCGACGGGCCGGTTCCGTTCCGCTATCCGGCGGGCGACTATCACCTTGATTTACTCGAAACAGGGAGTTAA\n+>NZ_AOXE01000085.1_58|2087509483\n+AGCGACTGGGCTACCATGCAATTCGCCGCCGAAATTTTTGAAATTCTGGATGTCCCGCACCATGTAGAAGTGGTTTCCGCCCATCGCACCCCCGATAAACTGTTCAGCTTCGCCGAAACGGCGGAAGAGAACGGATATCAAGTGATTATTGCCGGCGCGGGCGGCGCGGCGCACCTGCCGGGAATGATTGCGGCAAAAACGCTGGTCCCGGTACTCGGCGTGCCGGTACAAAGCGCTGCGCTCAGCGGCGTGGATAGCCTCTACTCCATCGTGCAGATGCCGCGCGGCATTCCGGTGGGTACGCTGGCGATCGGTAAAGCCGGTGCCGCTAACGCCGCCCTGCTCGCCGCGCAGATTCTGGCGCAACACGACGCGGAACTGCATCAGCGCATCGCTGAC\n+>NZ_AOXE01000059.1_363|1932373744\n+ATGAGCTTACTCAACGTCCCGGCGGGTAAAGAACTGCCGGAAGATATCTACGTCGTTATCGAGATCCCGGCTAACGCAGATCCGATCAAATACGAAGTTGACAAAGAGAGCGGCGCGCTGTTCGTTGACCGCTTCATGTCCACCGCGATGTTCTATCCGTGCAACTACGGTTACATCAACCATACCCTGTCTCTGGACGGCGACCCGGTAGACGTTCTGGTCCCGACGCCGTACCCGCTGCAGCCAGGCGCCGTCATCCGTTGCCGTCCGGTTGGCGTACTGAAAATGACCGACGAATCCGGTGAAGATGCGAAACTGGTTGCCGTACCGCACACCAAACTGAGCAAAGAGTACGATCACATTAAAGATGTGAACGATCTGCCGGAACTGCTGAAAGCGCAGATCACTCATTTCTTCGAGCATTATAAAGATCTCGAAAAAGGCAAATGGGTGAAAGTTGACGGTTGGGACAACGCCGAAGCGGCTAAAGCGGAAATCGTTGCCTCCTTCGAGCGCGCAGCGAAGAAATAA\n+>NZ_AOXE01000072.1_60|1822361792\n+ATGAATAGGCAGCCATTACCCATTATCTGGCAAAGAATCATTTTTGATCCGTTATCGTATATCCATCCTCAGCGGTTGCAGATAGCGCCGGAAATGATTGTCAGACCCGCCGCCAGGGCGGCGGCAAATGAGTTAATACTGGCGGCATGGCGGCTTAAGAACGGAGAAAAGGAGTGTATTCAAAACTCACTGACGCAGCTGTGGCTGCGTCAGTGGCGCCGACTGCCGCAAGTAGCGTATTTACTCGGTTGCCATAAACTGAGAGCCGATCTGGCAAGGCAGGGAGCCTTGCTTGGCCTGCCGGATTGGGCGCAAGCATTTTTGGCAATGCATCAGGGAACAAGTTTATCTGTCTGCAATAAGGCGCCGAATCACCGGTTTTTACTTAGCGTCGGGTATGCACAGTTAAATGCCCTAAATGAATTTTTACCTGAATCTTTAGCACAGCGTTTTCCTTTGCTTTTTCCTCCATTTATTGAGGAGGCATTGAAGCAGGATGCTGTAGAAATGTCAATTTTGCTACTGGCCTTACAATATGCTCAAAAATATCCCAATACCGTCCCCGCTTTCGCCTGTTGA\n+>NZ_AOXE01000004.1_101|4181313675\n+ATGAAAAAGAAAAGACCCGTACTTCAGGATGTGGCCGACCGTGTCGGCGTGACCAAAATGACGGTCAGCCGTTTTTTGCGTAATCCGGAGCAGGTCTCCGTCGCGCTGCGGGGTAAAATTGCGGCTGCGCTTGATGAGCTCGGGTACATTCCTAATCGCGCGCCTGACATTCTTTCCAACGCCACCAGCCGCGCCATTGGCGTTCTGCTGCCGTCTTTAACCAACCAGGTCTTTGCGGAAGTGTTACGCGGCATTGAGGCCGTCACCGATGCCCACGGGTATCAGACCATGCTGGCGCACTACGGCTATAAACCGGAGATGGAGCAGGAGCGCCTGGAATCGATGCTCTCCTGGAATATCGACGGCCTGATCCTCACTGAGCGTACCCATACGCCGCGCACCTTAAAAATGATCGAAGTCGCCGGGATTCCGGTGGTGGAACTGATGGACAGCCAGTCGCCGTGTCTCGATATTGCCGTCGGTTTTGATAACTTCGAGGCCGCCCGTCAGATGACCGCCGCGATTATCGCGCGTGGTCATCGTCATATCGCCTATCTGGGGGCGCGCCTCGACGAACGTACTATCATCAAGCAGAAGGGCTATGAACAGGCGATGCGGGACGCCGGCCTGGTTCCTTACAGTGTGATGATGGAGCAATCTTCATCCTACTCTTCCGGTATCGAACTCATGCGCCAGGCGCGACGTGAATACCCACAGCTTGACGGTATTTTTTGCACCAACGATGACCTGGCGGTGGGGGCGGCCTTCGAATGCCAGCGCCTGGGGCTAAAAATCCCGGACGACATGGCGATCGCCGGGTTCCACGGTCATGACATCGGCCAGGTGATGGAACCGCGTCTGGCAAGCGTCCTGACGCCGCGCGAGCGAATGGGCAGCATTGGCGCGGAGCGTCTGTTGGCCCGCATTCGCGGCGAAACGGTCACGCCGAAAATGTTAGATTTAGGTTTCACCTTGTCACCGGGCGGATCTATTTAG\n+>NZ_AOXE01000053.1_217|2392764491\n+ATGAATACTATCTGGATTGCCGTTGGCGCCCTGACCCTTCTGGGCCTGGTGTTCGGCGCCATTCTGGGTTATGCCTCCCGCCGATTCGCGGTTGAAGATGATCCGGTTGTTGAAAAAATCGATGCGATTTTGCCGCAGAGCCAGTGTGGTCAGTGCGGCTATCCCGGCTGCCGCCCCTACGCCGAAGCCGTGGGTCTCCAGGGCGAGAAAATCAATCGTTGCGCGCCCGGCGGCGAAGCCGTCATGCTGAAAATTGCAGAGTTGCTAAACGTAGAGCCGCAGCCATGCGATGGTGAAGAACAGCAGGCCGCGCCAGTACGTATGCTGGCAGTTATTGATGAAAATAACTGCATTGGCTGCACAAAATGCATTCAGGCCTGCCCGGTTGACGCCATCGTTGGCGCAACGCGCGCCATGCACACGGTCATGAGCGATCTCTGTACTGGCTGTAATCTGTGCGTCGATCCGTGTCCGACGCACTGCATCGAATTACGTCCGGTGAATGAGACGCCCGACAGTTGGAAATGGGATTTGAACACCATTCCCGTTCGCATCATTCCCGTGGAACAACATGCTTAA\n+>NZ_AOYX01000060.1_42|486088087\n+ATGCGTAAGCGCAAAGAAAAAGCAGTAAAGGTGCGGCAGTATGTAAATAGTAATGAGAACGACTATCAATTCGACGTTGTTTTGATATTATTATGCTCAGATTTTGTGATTTGCGTCCTGGAGATACAGAGTGGGTAA\n'
b
diff -r 000000000000 -r ebee10be4297 test-data/novel-alleles.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/novel-alleles.fasta Wed Mar 01 12:35:39 2017 -0500
b
b'@@ -0,0 +1,118 @@\n+>NZ_AOXE01000031.1_80|2955003506\n+ATGAGTAAATGCAGTGCTGATGAAACCCCGGTTTGCTGCTGTATGGACGTTGGAACCATCATGGACAACTCCGATTGCACCGCGTCATACAGCCGCGTATTCGCTACCCGCGCAGAGGCTGAAGAGACGCTGGCGGCGTTAACCGAAAAAGCGCGTAGCGTGGAGTCTGAACCTTGCCAAATTACGCCAACCTTTACCGAGGAATCCGAAGGCGTTCGTCTGGATATTGATTTTGTTTTCGCCTGCGAAGCAGAAACGCTGATCTTCCAGCTCGGCCTGCGTTAA\n+>NZ_AOXE01000083.1_53|2640609716\n+ATGATTGACTGGCAGGATCTCCACCATAGCGAACTTACTGTTCCTCAACTTTACGCGTTACTCAAATTGCGCTGCGCGGTCTTTGTGGTAGAACAGCGCTGTCCTTACCTGGATGTGGATGGCGATGATTTAGTCGGCGACAATCGGCATATTCTGGGCTGGCATCAGGATGAGCTGGTGGCGTATGCGAGGATTCTGAAAAGTGATAACGAATCAGACCCGGTTGTCATTGGCCGGGTGATCGTCAGCGATGCCTGGCGAGGGGCGAAACTGGGCCAGCAATTAATGGCGAAAACGCTGGAATCTTGTGGGCGGCACTGGCCGGATAAGCCGCTATATCTGGGGGCGCAGGCGCATCTGCAACCTTTTTATGCGCGGTTTGGTTTTATTCCCGTCACCGATGTTTATGACGAAGACGGCATTCCGCACCGGGGAATGGCGCGGGAGGTTCATCAGGCATAA\n+>NZ_AOXE01000059.1_129|278162969\n+ATGGGTAAAATTATTGGTATCGACCTGGGTACTACCAACTCTTGTGTAGCGATTATGGATGGAACGCAGGCACGCGTGCTGGAGAACGCCGAGGGCGATCGCACTACGCCTTCTATCATTGCTTATACCCAGGATGGTGAAACTCTGGTTGGTCAGCCGGCTAAACGTCAGGCAGTGACAAACCCGCAAAACACCCTGTTTGCGATTAAACGCCTGATTGGCCGCCGCTTCCAGGACGAAGAAGTTCAACGTGACGTTTCTATCATGCCGTACAAAATCATCGGCGCCGACAACGGCGACGCATGGCTTGATGTGAAAGGTCAGAAAATGGCGCCGCCGCAGATTTCTGCCGAAGTGCTGAAGAAAATGAAGAAAACGGCTGAAGATTATCTGGGCGAACCGGTAACTGAAGCGGTTATCACCGTACCGGCTTACTTTAACGATGCGCAGCGTCAGGCTACCAAAGATGCTGGTCGTATCGCGGGGCTGGAAGTTAAACGTATCATCAACGAACCGACTGCCGCAGCGCTGGCTTACGGTCTGGATAAAGAAGTCGGCAACCGTACTATCGCGGTTTACGACCTCGGTGGTGGTACTTTCGATATCTCTATTATCGAAATCGACGAAGTTGATGGCGAAAAAACCTTTGAAGTTCTGGCAACCAACGGTGATACCCACCTGGGTGGTGAAGACTTCGATACCCGCCTGATCAACTACCTCGTTGACGAGTTTAAGAAAGATCAGGGCATCGACCTGCGTAACGATCCGCTGGCCATGCAGCGCCTGAAAGAAGCCGCAGAAAAAGCCAAAATCGAGCTGTCTTCTGCGCAGCAGACCGACGTGAACCTGCCGTACATTACCGCAGATGCCACCGGTCCGAAACACATGAACATCAAAGTGACCCGTGCGAAACTGGAAAGCCTGGTTGAAGATCTGGTGAACCGTTCTATCGAGCCGCTGAAAGTCGCACTGCAGGACGCTGGCCTGTCCGTGTCTGATATCAACGACGTGATCCTCGTCGGCGGTCAGACCCGTATGCCAATGGTGCAGAAAAAAGTGGCTGAGTTCTTCGGTAAAGAGCCGCGTAAAGACGTTAACCCGGACGAAGCTGTGGCTATCGGCGCAGCGGTACAGGGCGGCGTATTGACCGGTGATGTGAAAGACGTACTGCTGCTGGACGTTACCCCGCTGTCTCTGGGTATCGAAACGATGGGTGGCGTGATGACTCCGCTTATCACCAAAAACACCACCATCCCGACCAAGCACAGCCAGGTGTTCTCTACTGCGGAAGACAACCAGTCTGCGGTAACCATCCATGTGCTGCAGGGTGAGCGTAAACGTGCGTCTGATAACAAATCTCTGGGTCAGTTCAACCTGGATGGCATCAACCCGGCGCCGCGCGGTATGCCGCAGATCGAAGTCACCTTCGATATCGATGCTGACGGTATCCTGCACGTCTCCGCGAAAGATAAAAATAGCGGTAAAGAGCAGAAGATCACTATCAAGGCGTCTTCTGGTCTGAACGAGGAAGAAATTCAGAAAATGGTTCGCGATGCAGAAGCGAACGCTGAATCCGACCGTAAGTTCGAAGAGCTGGTTCAGACCCGTAACCAGGGTGACCATCTGCTGCACAGCACCCGTAAGCAGGTTGAAGAAGCAGGCGATAAACTGCCGGCTGATGACAAAACCGCTATCGAGTCTGCGCTGAGCGCGCTGGAAACTGCCCTGAAAGGCGAAGATAAAGCCGCTATCGAAGCGAAAATGCAGGAGCTGGCGCAGGTTTCCCAGAAACTGATGGAAATCGCTCAGCAGCAACATGCGCAGCAGCAGGCTGGCTCCGCCGACGCTTCTGCAAACAATGCGAAAGATGACGACGTTGTCGACGCTGAGTTTGAAGAAGTAAAAGATAAAAAATAA\n+>NZ_AOXE01000059.1_9|2978539204\n+ATGGAAAAACAACGCGGTTTCACGCTTATCGAACTGATGGTCGTTATTGGCATCATCGCCATTTTAAGCGCCATTGGCATTCCGGCTTACCAGAACTATCTGCGTAAAGCGGCGCTGACGGATATGTTGCAAACATTTGTCCCCTACCGTACTGCCGTCGAACTCTGCGCTCTGGAACATGGTGGGACGAGCACATGCGATGCGGGCGTCAACGGTATCCCCTCGCCCGTCATCACCCGTTATGTTTCGGGCATGAGCGTGGAAAAAGGCGTCATCACGCTTACCGGTCAGGAGAGTCTGAGCGGGCTTAGCGTCATCATGACGCCCGCCTGGGACAATGCTAACGGCATTACCGGCTGGACGCGTAACTGCAATATTCAAAGCGACAGCGCGTTACAACAGGCTTGTGAAGATGTCTTCCGTTTTGACGCCAACTAA\n+>NZ_AOXE01000033.1_51|3293595301\n+ATGCAGGAAAACCACATTCATTCCGATACCACCTTTGCGCTGCGAAGCGTCGCTTTTCGTGTGCCGGGCCGCACGCTTTTACACCCCCTCTCGTTAACGTTTCCCGCAGGTCGAGTCACCGGACTTATCGGCCATAATGGTTCCGGTAAATCCACGCTGTTAAAAATGCTGGGCCGCCATCAGCTGCCTTCCGAAGGGGATATTCTGCTCGACAATCAGCCGCTGGCGAGCTGGAGCAGCAAGGCGTTTGCCCGCAAAGTTGCCTATCTGCCTCAACAATTGCCGCAGGCGGAAGGAATGACGGTGCGCGAACTGGTGGCGATTGGCCGCTATCCGTGGCACGGCGCGTTGGGACGCTTTGGCGTCGCGGACCGGGAAAAAGTAGACGAGGCGATTACGCTGGTCGGCTTAAAACCGCTGGCGCATCGTCTGGTCGATAGCCTGTCCGGCGGTGAACGCCAGCGCGCGTGGATTGCCATGCTGGTCGCGCAGGACAGCCGTTGTCTGCTGCTGGATGAGCCGACGTCAGCGCTGGATATCGCCCATCAGGTTGACGTGCTGGCGCTGGTGCATCGTTTAAGCCAACAGCGCGGGCTGACGGTGGTTGCGGTGCTGCACGATATCAACATGGCGGCCCGCTACTGTGATTATTTAGTAGCGCTACGCGGCGGTGAAATGATTGCGCAAGGAACGCCTGCGGA'..b'GCGATATGGCCGAGCTGGGCGCAGAAAGCGAAGCGTGCCACCTCCAGGTTGGCGAGGCCGCAAAAGCGGCGGGTATTGATCGCGTGTTAAGCACCGGAAAACTCAGTCAGGTTATCAGCCACGCCAGCGGCGTCGGCGAACATTTTGCCGATAAAGCCGCGCTCATTGCGCGTTTACACGCTCTGCTTCAGGAGCAACCGATGATGACTATTTTAGTGAAAGGATCACGCAGCGCCGCGATGGAAGATGTGGTTCATGCGTTACAGGAGAAAGGTTCATGTTAG\n+>NZ_AOXE01000059.1_32|3569491948\n+GTGGCAGATCGTAATTTGCGCGACCTTCTTGCTCCGTGGGTGGCTGGACTACCCGCGCGAGAACTGCGAGAGATGACGCTCGACAGCCGTGTGGCTGCGGCGGGCGATCTCTTTGTGGCAGTGGTGGGTCATCAGGCGGACGGGCGTCGTTATATCCCGCAGGCGATAGCGCAAGGCGTAGCTGCCATTATTGCAGAGGCGAAAGACGAGGCGAGTGACGGTGAGATTCGCGAAATGCACGGCGTACCAGTCGTCTATCTCAGCCAGCTCAATGAGCGTTTATCTGCGCTGGCGGGCCGCTTTTACCATGAGCCATCTGAAAACATGCGTCTGGTGGCGGTGACCGGCACCAATGGCAAGACCACCACCACCCAACTACTGGCGCAGTGGAGCCAGTTGCTCGGCGAAACCAGCGCAGTGATGGGGACGGTAGGTAACGGACTGTTGGGTAAAGTGATCCCGACGGAGAACACAACCGGTTCCGCAGTGGATGTTCAGCATGTGCTGGCCAGTCTGGTTGCGCAGGGCGCGACCTTCGGCGCTATGGAAGTCTCTTCTCACGGCCTGGTGCAGCATCGCGTGGCGGCGCTGAAATTTGCCGCCTCCGTGTTTACTAATTTGAGCCGCGACCATCTCGACTATCATGGCGATATGGCGCATTACGAGGCGGCAAAATGGATGCTTTATTCCACCCACCATCACGGTCAGGCAATCGTCAACGCCGATGATGAAGTTGGACGCCGCTGGCTGGCGTCGCTACCCGATGCGGTCGCGGTTTCAATGGAAGGGCATATCAACCCTAACTGTCACGGTCGTTGGCTGAAAGCGGAGGCGGTGGAGTACCACGACCGCGGAGTGACGATTCGTTTTGCTTCAAGCTGGGGTGAAGGCGAAATCGAAAGCTGCCTGATGGGCGCGTTTAACGTCAGCAACTTACTGCTGGCATTGGCGACGCTGCTGGCGCTGGGCTATCCGTTAACGGATTTGCTGAAAACCGCCGCGCGTTTGCAGCCGGTTTGCGGGCGTATGGAAGTGTTCACTGCGCCAGGCAAACCGACGGTGGTGGTGGATTACGCGCACACGCCGGATGCGCTGGAAAAAGCATTGCAGGCGGCGCGCCTGCACTGCGCCGGAAAATTGTGGTGCGTCTTTGGCTGTGGCGGGGATCGTGACAAAGGTAAGCGCCCACTCATGGGGGCCATTGCCGAAGAATTCGCGGATATCGTCGTGGTGACTGACGATAACCCGCGTACCGAGGAGCCGCGCGCCATTATCAACGATATTCTGGCCGGAATGCTGGACGCCGGGCAGGTCAGGGTAATGGAAGGCCGCGCTGAGGCGGTAACCAATGCCATTATGCAGGCAAAAGACAATGACGTCGTGCTGATTGCAGGTAAAGGGCACGAGGATTACCAGATTGTCGGCACGCAGCGTCTTGATTATTCAGACCGCGTGACCGCAGCGCGTTTGCTGGGGGTGATCGCATGA\n+>NZ_AOXE01000059.1_35|1052128508\n+ATGATCAGCAGAGTGACAGAAGCCCTAAGCAAAGTTAAGGGATCGATAGGAAGCAACGAGCGCCATGCCTTGCCTGGCGTGATCGGTGACGATCTTTTGCGGTTCGGGAAGCTGCCACTCTGCTTGTTCATTTGCATCATTTTAACGGCGGTGACGGTGGTCACGACGGCGCACCATACTCGTTTACTCACCGCTCAGCGTGAACAACTGGTTCTGGAGCGCGATGCATTGGACATTGAATGGCGCAACCTGATCCTTGAAGAAAATGCGCTCGGCGATCACAGCCGGGTAGAGCGGATCGCAACGGAAAAGCTGCAAATGCAGCATGTTGATCCGTCCCAAGAAAATATCGTAGTGCAAAAATAA\n+>NZ_AOXE01000059.1_37|1510445340\n+ATGTTCCGGGGGGCAACGTTAGTCAATCTCGACAGTAAAGGGCGCCTGACCGTGCCGACCCGTTATCGGGAGCAACTGATCGAGAGCGCTACCGGTCAAATAGTATGTACCATTGACATCCATCACCCATGCCTGCTGCTTTACCCCCTGCCTGAATGGGAAATTATTGAGCAAAAGTTATCTCGTCTGTCGAGCATGAACCCGGTAGAACGTCGCGTACAGCGTTTACTGTTGGGCCATGCCAGTGAATGTCAGATGGATGGTGCAGGTCGATTACTGATCGCGCCAGTTCTGCGGCAACATGCCGGACTGACGAAAGAAGTGATGCTGGTTGGACAGTTCAACAAATTTGAGCTGTGGGATGAAACGACCTGGTATCAACAGGTCAAGGAAGATATCGACGCTGAACAGTCAGCTACCGAAACGTTATCGGAGCGGCTGCAGGACTTGTCTCTATAA\n+>NZ_AOXE01000083.1_47|2372254687\n+ATGCGTAGCGCGCAGGTATACCGCTGGCAGATCCCCATGGACGCGGGGGTGGTTCTGCGCGACAGGCGGTTAAAAACTCGCGACGGGCTGTATGTTTGTCTGCGTGACGGCGAGCGTGAAGGGTGGGGAGAGATCTCCCCGCTGCCGGGCTTCAGTCAGGAAACGTGGGAAGAGGCGCAGACGGCGCTCCTGACATGGGTGAATGACTGGCTTCAGGGGAGCGAGGGATTACCGGAGATGCCTTCGGTCGCGTTTGGCGCAAGCTGCGCGCTGGCGGAACTGACTGGCGTCTTGCCGGAGGCGGCGGACTATCGCGCCGCGCCGTTATGCACTGGCGATCCTGACGATTTGGTACTGCGGCTTGCCGATATGCCCGGCGAGAAAATCGCTAAGGTCAAAGTGGGTCTCTATGAAGCGGTACGCGACGGCATGGTGGTTAATTTGCTGCTGGAGGCGATCCCGGATCTGCATCTGCGTCTGGATGCGAATCGCGCCTGGACGCCGCTAAAAGCCCAACAGTTCGCAAAGTATGTTAATCCGGATTACCGCGCTCGTATCGCCTTTCTCGAAGAACCGTGTAAGACGCGGGATGATTCCCGCGCCTTTGCCCGTGAAACCGGCATCGCGATTGCCTGGGACGAAAGTCTGCGCGAAGCGGATTTCACCTTTGAAGCCGAAGAGGGCGTCAGGGCTGTGGTTATCAAACCTACGCTGACCGGATCGCTTGATAAAGTGCGTGAGCAAGTCGCTGCCGCCCATGCGTTGGGACTGACGGCGGTCATCAGCTCTTCGATCGAGTCCAGCCTCGGCCTGACGCAACTGGCGCGGATTGCCGCCTGGTTGACGCCGGGAACGCTGCCCGGACTGGATACCTTGCATCTGATGCAGGCGCAACAGGTTCGCCCCTGGCCTGGTAACGCGTTGCCTTGTCTGAAGCGTGATGAGCTGGAACGACTGTTATGA\n+>NZ_AOXE01000083.1_45|1328452594\n+ATGGAATGGTTGGTTAAGAAATCGCATTATGTCAAAAAGAGGGCGTGCCATGTTCTGGTGCTGTGCGATAGCGGCGGTTCGCTAAAAATGATCGCCGAGGCGAATTCCATGATATTACTGAGTCCCGGCGATATCCTGTCGCCTTTACAGGATGCGCAGTATTGTATTAATCGGGAAAAACACCAGACCTTAAAAATCGTTGATGCACGCTGTTATTCCTGCGACGAATGGCAGCGGTTGACGCGCAAGCCATTATGA\n'
b
diff -r 000000000000 -r ebee10be4297 test-data/sistr-results-13-1101.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sistr-results-13-1101.tab Wed Mar 01 12:35:39 2017 -0500
[
@@ -0,0 +1,2 @@
+cgmlst_ST cgmlst_distance cgmlst_genome_match cgmlst_matching_alleles cgmlst_subspecies fasta_filepath genome h1 h2 mash_distance mash_genome mash_match mash_serovar mash_subspecies o_antigen qc_messages qc_status serogroup serovar serovar_antigen serovar_cgmlst
+2375035975 0.069696969697 SRR1097806 307 enterica /mnt/ssd240/work/repos/galaxy_tools/tools/sistr_cmd/test-data/13-1101-Paratyphi_B.fasta 13-1101-Paratyphi_B b 1,2 0.00178577 72308 929 Paratyphi B var. Java enterica 1,4,[5],12 PASS B Paratyphi B var. Java Paratyphi B|Paratyphi B var. Java Paratyphi B var. Java
b
diff -r 000000000000 -r ebee10be4297 test-data/sistr-results.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sistr-results.tab Wed Mar 01 12:35:39 2017 -0500
b
@@ -0,0 +1,2 @@
+cgmlst_distance cgmlst_genome_match cgmlst_matching_alleles cgmlst_subspecies fasta_filepath genome h1 h2 mash_distance mash_genome mash_match mash_serovar o_antigen qc_messages qc_status serogroup serovar serovar_antigen serovar_cgmlst
+0.827272727273 73530 57 enterica /mnt/ssd240/work/repos/galaxy_tools/tools/sistr_cmd/test-data/AE014613-699860.fasta AE014613-699860 - - 0.0637124 60369 151 Typhi - FAIL: Large number of cgMLST330 loci missing (n=272 > 30) | FAIL: Wzx/Wzy genes missing. Cannot determine O-antigen group/serogroup. Cannot accurately predict serovar from antigen genes. | WARNING: H1 antigen gene (fliC) missing. Cannot determine H1 antigen. Cannot accurately predict serovar from antigen genes. | WARNING: Input genome size (699860 bp) not within expected range of 4000000-6000000 (bp) for Salmonella | WARNING: Only matched 57 cgMLST330 loci. Min threshold for confident serovar prediction from cgMLST is 297.0 FAIL - -:-:- -:-:- Typhi