Repository 'plant_tribes_gene_family_aligner'
hg clone https://toolshed.g2.bx.psu.edu/repos/greg/plant_tribes_gene_family_aligner

Changeset 6:3384b6a842b0 (2017-10-30)
Previous changeset 5:a73c2e65098e (2017-08-25) Next changeset 7:2ac7090847f9 (2017-12-21)
Commit message:
Uploaded
modified:
.shed.yml
gene_family_aligner.xml
macros.xml
utils.py
added:
test-data/3722.faa
test-data/3722.faa.aln
test-data/3722.fna
test-data/3722.fna.aln
test-data/38889.faa
test-data/38889.faa.aln
test-data/38889.fna
test-data/38889.fna.aln
test-data/39614.faa
test-data/39614.faa.aln
test-data/39614.fna
test-data/39614.fna.aln
removed:
gene_family_aligner.py
b
diff -r a73c2e65098e -r 3384b6a842b0 .shed.yml
--- a/.shed.yml Fri Aug 25 13:01:25 2017 -0400
+++ b/.shed.yml Mon Oct 30 09:52:00 2017 -0400
b
@@ -7,7 +7,7 @@
   Contains a tool that tool is one of the PlantTribes collection of automated modular analysis pipelines that
   utilize objective classifications of complete protein sequences from sequenced plant genomes to perform
   comparative evolutionary studies.  This tool aligns gene family sequences.
-remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/gene_family_aligner
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/phylogenetics/plant_tribes/gene_family_aligner
 type: unrestricted
 categories:
 - Phylogenetics
b
diff -r a73c2e65098e -r 3384b6a842b0 gene_family_aligner.py
--- a/gene_family_aligner.py Fri Aug 25 13:01:25 2017 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,58 +0,0 @@
-#!/usr/bin/env python
-import argparse
-import os
-
-import utils
-
-OUTPUT_DIR = 'geneFamilyAlignments_dir'
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--alignment_method', dest='alignment_method', help='Multiple sequence alignments method')
-parser.add_argument('--automated_trimming', dest='automated_trimming', default=None, help='Trims alignments using trimAls ML heuristic trimming approach')
-parser.add_argument('--codon_alignments', dest='codon_alignments', default=None, help='Flag for constructing orthogroup multiple codon alignments')
-parser.add_argument('--gap_trimming', dest='gap_trimming', default=0, type=float, help='Remove sites in alignments with gaps of')
-parser.add_argument('--iterative_realignment', dest='iterative_realignment', type=int, default=0, help='Maximum number of iterations')
-parser.add_argument('--num_threads', dest='num_threads', type=int, help='Number of threads to use for execution')
-parser.add_argument('--orthogroup_faa', dest='orthogroup_faa', help='Directory of input fasta datasets')
-parser.add_argument('--output', dest='output', help='Output dataset')
-parser.add_argument('--output_dir', dest='output_dir', help='Output dataset files_path directory')
-parser.add_argument('--output_dataset_collection', dest='output_dataset_collection', default=None, help='Output additional dataset collection of files.')
-parser.add_argument('--pasta_iter_limit', dest='pasta_iter_limit', type=int, default=None, help='Maximum number of iteration that the PASTA algorithm will execute')
-parser.add_argument('--pasta_script_path', dest='pasta_script_path', default=None, help='Path to script for executing pasta')
-parser.add_argument('--remove_sequences', dest='remove_sequences', default=0, type=float, help='Remove sequences with gaps of')
-
-args = parser.parse_args()
-
-# Build the command line.
-cmd = 'GeneFamilyAligner'
-cmd += ' --orthogroup_faa %s' % args.orthogroup_faa
-cmd += ' --alignment_method %s' % args.alignment_method
-if args.alignment_method == 'pasta':
-    if args.pasta_script_path is not None:
-        cmd += ' --pasta_script_path %s' % args.pasta_script_path
-    if args.pasta_iter_limit is not None:
-        cmd += ' --pasta_iter_limit %d' % args.pasta_iter_limit
-cmd += ' --num_threads %d' % args.num_threads
-if args.codon_alignments is not None:
-    cmd += ' --codon_alignments'
-if args.automated_trimming is not None:
-    cmd += ' --automated_trimming'
-if args.gap_trimming > 0:
-    cmd += ' --gap_trimming %4f' % args.gap_trimming
-if args.remove_sequences > 0:
-    cmd += ' --remove_sequences %4f' % args.remove_sequences
-if args.iterative_realignment > 0:
-    cmd += ' --iterative_realignment %d' % args.iterative_realignment
-
-# Run the command.
-utils.run_command(cmd)
-
-# Handle outputs.
-if args.codon_alignments is None:
-    src_output_dir = OUTPUT_DIR
-else:
-    src_output_dir = os.path.join(OUTPUT_DIR, 'orthogroups_aln')
-if args.output_dataset_collection is not None:
-    utils.move_directory_files(src_output_dir, args.output_dataset_collection, copy=True)
-utils.move_directory_files(src_output_dir, args.output_dir)
-utils.write_html_output(args.output, 'Aligned gene family sequences', args.output_dir)
b
diff -r a73c2e65098e -r 3384b6a842b0 gene_family_aligner.xml
--- a/gene_family_aligner.xml Fri Aug 25 13:01:25 2017 -0400
+++ b/gene_family_aligner.xml Mon Oct 30 09:52:00 2017 -0400
[
b'@@ -1,36 +1,30 @@\n-<tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.2">\n+<tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.3.0">\n     <description>aligns integrated orthologous gene family clusters</description>\n     <macros>\n         <import>macros.xml</import>\n     </macros>\n-    <expand macro="requirements_gene_family_aligner" />\n+    <requirements>\n+        <requirement type="package" version="1.0.3">plant_tribes_gene_family_aligner</requirement>\n+    </requirements>\n     <command detect_errors="exit_code"><![CDATA[\n-#set input_format = $input_format_cond.input_format\n-#set alignment_method_cond = $input_format_cond.alignment_method_cond\n #set alignment_method = $alignment_method_cond.alignment_method\n-#if str($input_format_cond.input_format) == \'ptortho\':\n-    #set output_codon_alignments = False\n-#else if str($input_format_cond.input_format) == \'ptorthocs\' and str($input_format_cond.codon_alignments ) == \'no\':\n-    #set output_codon_alignments = False\n-#else:\n-    #set output_codon_alignments = True\n-#end if\n-\n-python \'$__tool_directory__/gene_family_aligner.py\'\n+#set input_dir = \'input_dir\'\n+mkdir $input_dir &&\n+#for $i in $input:\n+    #set filename = $i.file_name\n+    #set name = $i.name\n+    ln -s $filename $input_dir/$name &&\n+#end for\n+GeneFamilyAligner\n --alignment_method $alignment_method\n #if str($alignment_method) == \'pasta\':\n     --pasta_script_path \'$__tool_directory__/run_pasta.py\'\n     --pasta_iter_limit $alignment_method_cond.pasta_iter_limit\n #end if\n --num_threads \\${GALAXY_SLOTS:-4}\n-#if str($input_format) == \'ptortho\':\n-    --orthogroup_faa \'$input_format_cond.input_ptortho.extra_files_path\'\n-#else:\n-    ## str($input_format) == \'ptorthocs\'\n-    --orthogroup_faa \'$input_format_cond.input_ptorthocs.extra_files_path\'\n-    #if str($input_format_cond.codon_alignments) == \'yes\':\n-        --codon_alignments true\n-    #end if\n+--orthogroup_faa \'$input_dir\'\n+#if str($codon_alignments) == \'yes\':\n+    --codon_alignments\n #end if\n #set remove_gappy_sequences = $remove_gappy_sequences_cond.remove_gappy_sequences\n #if str($remove_gappy_sequences) == \'yes\':\n@@ -40,7 +34,7 @@\n         --gap_trimming $trim_type_cond.gap_trimming\n     #else:\n         ## str($trim_type) == \'automated_trimming\'\n-        --automated_trimming true\n+        --automated_trimming\n     #end if\n     #set remove_sequences_with_gaps_cond = $remove_gappy_sequences_cond.remove_sequences_with_gaps_cond\n     #set remove_sequences_with_gaps = $remove_sequences_with_gaps_cond.remove_sequences_with_gaps\n@@ -51,90 +45,116 @@\n         #if str($remove_sequences_with_gaps_cond.iterative_realignment):\n             --iterative_realignment $remove_sequences_with_gaps_cond.iterative_realignment\n         #end if\n-        #if $output_codon_alignments:\n-            --output \'$output_aln_filtered_ca\'\n-            --output_dir \'$output_aln_filtered_ca.files_path\'\n-        #else:\n-            --output \'$output_aln_filtered\'\n-            --output_dir \'$output_aln_filtered.files_path\'\n-        #end if\n-    #else:\n-        #if $output_codon_alignments:\n-            --output \'$output_aln_trimmed_ca\'\n-            --output_dir \'$output_aln_trimmed_ca.files_path\'\n-        #else:\n-            --output \'$output_aln_trimmed\'\n-            --output_dir \'$output_aln_trimmed.files_path\'\n-        #end if\n-    #end if\n-#else:\n-    #if $output_codon_alignments:\n-        --output \'$output_aln_ca\'\n-        --output_dir \'$output_aln_ca.files_path\'\n-    #else:\n-        --output \'$output_aln\'\n-        --output_dir \'$output_aln.files_path\'\n     #end if\n #end if\n-#if str($output_dataset_collection) == \'yes\':\n-    --output_dataset_collection dataset_collection\n-#end if\n+&>proc.log\n     ]]></command>\n     <inputs>\n-        <conditional name="input_format_cond">\n-            <param name="input_format" type="select" label="Classified orthogroup fasta files">\n-                <option value="ptortho">Prot'..b'y_sequences_cond[\'remove_sequences_with_gaps_cond\'][\'remove_sequences_with_gaps\'] == \'yes\'</filter>\n+        </collection>\n+\n     </outputs>\n     <tests>\n-        <!-- Test framework does not currently support inputs whose associated extra_files_path contains files to be analyzed.\n         <test>\n+            <param name="input">\n+                <collection type="list">\n+                    <element name="3722.faa" value="3722.faa"/>\n+                    <element name="3722.fna" value="3722.fna"/>\n+                    <element name="38889.faa" value="38889.faa"/>\n+                    <element name="38889.fna" value="38889.fna"/>\n+                    <element name="39614.faa" value="39614.faa"/>\n+                    <element name="39614.fna" value="39614.fna"/>\n+                </collection>\n+            </param>\n+            <param name="codon_alignments" value="yes"/>\n+            <output_collection name="primary_faa" type="list">\n+                <element name="3722.faa.aln" file="3722.faa.aln" ftype="fasta"/>\n+                <element name="38889.faa.aln" file="38889.faa.aln" ftype="fasta"/>\n+                <element name="39614.faa.aln" file="39614.faa.aln" ftype="fasta"/>\n+            </output_collection>\n+            <output_collection name="primary_fna" type="list">\n+                <element name="3722.fna.aln" file="3722.fna.aln" ftype="fasta"/>\n+                <element name="38889.fna.aln" file="38889.fna.aln" ftype="fasta"/>\n+                <element name="39614.fna.aln" file="39614.fna.aln" ftype="fasta"/>\n+            </output_collection>\n         </test>\n-        -->\n     </tests>\n     <help>\n This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary\n@@ -145,13 +165,13 @@\n \n **Required options**\n \n- * **Classified orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyClassifier tool selected from your history.  Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences.\n+ * **Integrated orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyIntegrator tool selected from your history.  Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences.\n \n  * **Multiple sequence alignment method** - method for estimating orthogroup multiple sequence alignments.  PlantTribes estimates alignments using either MAFFT\'s L-INS-i algorithm or the divide and conquer approach implemented in the PASTA pipeline for large alignments.\n \n   - **PASTA iteration limit** - number of PASTA iterations. By default, PASTA performs 3 iterations.\n \n- * **Codon alignments** - select \'Yes\' to create codon multiple sequence alignments.  This option requires protein and their corresponding coding sequences to be provided as input data.\n+ * **Codon alignments** - select \'Yes\' to create codon multiple sequence alignments.  This option requires both protein and their corresponding coding sequence orthogroup fasta files to be present in the GeneFamilyAligner input data that was produced by the GeneFamilyIntegrator.\n \n **Other options**\n \n@@ -167,7 +187,7 @@\n \n     - **Realignment iteration limit** - number of iterations to perform trimming, removal of sequences, and realignment of orthogroup sequences.  Zero value has no effect.\n \n- * **Output additional dataset collection of files** - selecting \'Yes\' will produce an additional output dataset collection whose elements are copies of the directories of files (these elements can be viewed with visualization tools).\n+ * **Output primary and intermediate alignments** - selecting \'Yes\' will produce a dataset collection of primary and intermediate alignments, the elements of which can be viewed with viaula tools, in addition to the final trimmed and/or filtered alignments dataset collection.\n \n .. _trimAl: http://trimal.cgenomics.org\n \n'
b
diff -r a73c2e65098e -r 3384b6a842b0 macros.xml
--- a/macros.xml Fri Aug 25 13:01:25 2017 -0400
+++ b/macros.xml Mon Oct 30 09:52:00 2017 -0400
b
@@ -1,47 +1,6 @@
 <?xml version='1.0' encoding='UTF-8'?>
 <macros>
     <token name="@WRAPPER_VERSION@">1.0</token>
-    <xml name="requirements_assembly_post_processor">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_assembly_post_processor</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_aligner">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_aligner</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_classifier">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_classifier</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_integrator">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_integrator</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_kaks_analysis">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_kaks_analysis</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_ks_distribution">
-        <requirements>
-            <requirement type="package" version="1.3.2">r-optparse</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_phylogeny_builder">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_phylogeny_builder</requirement>
-        </requirements>
-    </xml>
-    <xml name="param_codon_alignments">
-        <param name="codon_alignments" type="select" label="Codon alignments">
-            <option value="yes" selected="true">Yes</option>
-            <option value="no">No</option>
-        </param>
-    </xml>
     <xml name="param_method">
         <param name="method" type="select" label="Protein clustering method">
             <option value="gfam" selected="true">GFam</option>
@@ -49,74 +8,12 @@
             <option value="orthomcl">OrthoMCL</option>
         </param>
     </xml>
-    <xml name="param_options_type">
-        <param name="options_type" type="select" label="Options Configuration">
-            <option value="basic" selected="true">Basic</option>
-            <option value="advanced">Advanced</option>
-        </param>
-    </xml>
-    <xml name="param_orthogroup_fna">
-        <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences">
-            <option value="yes" selected="true">Yes</option>
-            <option value="no">No</option>
-        </param>
-    </xml>
     <xml name="param_scaffold">
         <param name="scaffold" type="select" label="Gene family scaffold">
             <options from_data_table="plant_tribes_scaffolds" />
             <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." />
         </param>
     </xml>
-    <xml name="param_sequence_type">
-        <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference (dna)">
-            <option value="protein" selected="true">Amino acid based</option>
-            <option value="dna">Nucleotide based</option>
-        </param>
-    </xml>
-    <xml name="cond_alignment_method">
-        <conditional name="alignment_method_cond">
-            <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method">
-                <option value="mafft" selected="true">MAFFT</option>
-                <option value="pasta">PASTA</option>
-            </param>
-            <when value="mafft" />
-            <when value="pasta">
-                <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" />
-            </when>
-        </conditional>
-    </xml>
-    <xml name="cond_remove_gappy_sequences">
-        <conditional name="remove_gappy_sequences_cond">
-            <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration">
-                <option value="no" selected="true">No</option>
-                <option value="yes">Yes</option>
-            </param>
-            <when value="no" />
-            <when value="yes">
-                <conditional name="trim_type_cond">
-                    <param name="trim_type" type="select" label="Trimming method">
-                        <option value="gap_trimming" selected="true">Gap score based trimming</option>
-                        <option value="automated_trimming">Automated heuristic trimming</option>
-                    </param>
-                    <when value="gap_trimming">
-                        <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" />
-                    </when>
-                    <when value="automated_trimming" />
-                </conditional>
-                <conditional name="remove_sequences_with_gaps_cond">
-                    <param name="remove_sequences_with_gaps" type="select" label="Remove sequences">
-                        <option value="no" selected="true">No</option>
-                        <option value="yes">Yes</option>
-                    </param>
-                    <when value="no" />
-                    <when value="yes">
-                        <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" />
-                        <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" />
-                    </when>
-                </conditional>
-            </when>
-        </conditional>
-    </xml>
     <xml name="citation1">
         <citation type="bibtex">
             @misc{None,
b
diff -r a73c2e65098e -r 3384b6a842b0 test-data/3722.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3722.faa Mon Oct 30 09:52:00 2017 -0400
b
b'@@ -0,0 +1,161 @@\n+>gnl_Glyma1.01_PACid_16266208\n+MANEGKKSNNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATGNLELVEEAKKKFQEIREAYSVLSDANKRLMYDVG\n+VYDSDDDENGMGDFLDEMLTMMSHTKSNENGEESFEELQQLFEDMFQADIGLDGGPSLASSDSSTSSAYMTYSESSSSNK\n+RNSSEMNFGKAENSSVFDASYQNFCFGVNQLQDIKKGKGGILGGGGRSRHRSGRKQKMFYGHDV\n+>gnl_Glyma1.01_PACid_16266209\n+MANEGKKSNNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATGNLELVEEAKKKFQEIREAYSVLSDANKRLMYDVG\n+VYDSDDDENGMGDFLDEMLTMMSHTKSNENGEESFEELQQLFEDMFQADIGLDGGPSLASSDSSTSSAYMTYSESSSSNK\n+RNSSEMNFGKAENSSVFDASYQNFCFGTGEPTPRYKEGKGGNSRRRR\n+>gnl_Glyma1.01_PACid_16266210\n+MANEGKKSNNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATGNLELVEEAKKKFQEIREAYSVLSDANKRLMYDVG\n+VYDSDDDENGMGDFLDEMLTMMSHTKSNENGEESFEELQQLFEDMFQADIGLDGGPSLASSDSSTSSAYMTYSESSSSNK\n+RNSSEMNFGKAENSSVFDASYQNFCFGVGHVNYHYQ\n+>gnl_Glyma1.01_PACid_16301083\n+MADEGNKSNNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATGNSELVEEAKKKFQEIREAYSVLSDANKRLMYDVG\n+VYDSDDDENGMGDFLDEMLTMMSQTKSNENGEESFEELQQLFEDMFEADIGLDGGPSLASSDCSTSSAYMTYSESSSSNK\n+HNSSEMNFGKAENSSVFDAGYQNFCFGVNQLQDIKKKKGGILGGGRSRHRNGRKQNMSYGHDVSSNDYPGISTK\n+>gnl_Glyma1.01_PACid_16301085\n+MADEGNKSNNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATGNSELVEEAKKKFQEIREAYSVLSDANKRLMYDVG\n+VYDSDDDENGMGDFLDEMLTMMSQTKSNENGEESFEELQQLFEDMFEADIGLDGGPSLASSDCSTSSAYMTYSESSSSNK\n+HNSSEMNFGKAENSSVFDAGYQNFCFGVGHVNYHYQ\n+>gnl_Glyma1.01_PACid_16301084\n+MADEGNKSNNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATGNSELVEEAKKKFQEIREAYSVLSDANKRLMYDVG\n+VYDSDDDENGMGDFLDEMLTMMSQTKSNENGEESFEELQQLFEDMFEADIGLDGGPSLASSDCSTSSAYMTYSESSSSNK\n+HNSSEMNFGKAENSSVFDAGYQNFCFGTGEPTPRYKEEKGGNSRRR\n+>gnl_Medtr3.5_Medtr8g022310.1\n+MANEGNKSNDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASGNVKFVEEAKKKFQAIQEAYSVLSDSNKRLMYDVG\n+VYDSDDDENGMGDFLNEMVTMMSQTKSNENGEESFEELQQLFDDMFQADIGLNGSTSLNASGCSTSSTFMTFSESSNSNK\n+RNSTQMNFGKAEDSSSFGANYQNFCFGMKHLQEDVEKEKGGILEGGGSKKQRKGRKQKISCGHVSSNDHPGISAN\n+>gnl_Medtr3.5_Medtr8g022310.2\n+MANEGNKSNDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASGNVKFVEEAKKKFQAIQEAYSVLSDSNKRLMYDVG\n+VYDSDDDENGMGDFLNEMVTMMSQTKSNENGEESFEELQQLFDDMFQADIGLNGSTSLNASGCSTSSTFMTFSESSNSNK\n+RNSTQMNFGKAEDSSSFGANYQNFCFGVNLVNYHYQ\n+>gnl_Musac1.0_GSMUA_Achr6T31040_001\n+MAAEEDKSGDFYAVLGLRKECSETELRNAYKKLAMRWHPDKCLASGNAQIVGEAKEKFQEIQKAYSVLSDSNKRFLYDVG\n+VYDNDDDNDENGMGDFIGEMLEMMSQTKPNENSQDSFQELQELFVEMFQDDLDAGFGGSIFHDCPWAQPTNGQDCWTSSG\n+LHFANGRSKCGNKRGNSAVNLGKVNLEELEHGTSDFYFGLNDAAQPSQGKGGSNNKRRNGRKQKVSSNHDVSS\n+>gnl_Musac1.0_GSMUA_Achr9T18140_001\n+MAAGEEKIGDFYTVLGLRKECSEAELRIAYKKLAMRWHPDKCSASGNHRRMEEAKEKFQEIQKAYSVLSDSSKRFLYDVG\n+IYDNEDDNDEKGMGDFIGEIAQMMSQTKSGENGHDSFEELQRMFLDMFQDDLDAGFGDSSIHSGPQARPTDGLNCSMPSG\n+LQFADGGNNGSNKRGNSEKAKLDGLENSSTGFCFGLNDAGQSSKGKGSANSKRRNGRKQKVSSKHDVSSSDAEVSF\n+>gnl_Musac1.0_GSMUA_Achr8T23700_001\n+MASDMDASGDFYSVLGLKKECSEAELRNAYKKLALKWHPDKCSASGNEIRMKEAKQQFQEIQKAYSVLSDSNKRFLYDVG\n+AYDKDDDKDEEGMVEFLGEMAQMMRQTKCCGSGQESFEQLQQMFVEMFHDDLDAGFCGHSSATSGAASCGNKRDNSAMDS\n+GKRKPDELDPAAIGFCLGTKDAGQSSKGRGSNSKRRNRRKQKASSKHDNSSHNAKVSA\n+>gnl_Musac1.0_GSMUA_AchrUn_randomT02210_001\n+MEGDEEKSGDFYAVLGLKKEGSMAELKNAYKKLAMKWHPDKCPASGNKIRMDKAKEKFQEIQKAYSVLSDSNKRFLYDVG\n+VYDKDDEEDEEGMGDFIGEIAQMMSQSKPSGSGHESLEELHRQVVEMFLDELDAGDRFSSANQGASSCDGRDDGGGNKRG\n+NWAVDWGKEKLNELGPGTGGFCFGVSRRVHSFDLMIDVVHLIHSDLTLE\n+>gnl_Orysa6.0_PACid_16843526\n+MADGGEKCRDAAGEGGGGGDLYAVLGLKKECSDADLKLAYRKLAMRWHPDKCSSSSSAKHMEEAKEKFQEIQGAYSVLSD\n+SNKRFLYDVGVYDDDDNDDDNLQGMGDFIGEMAQMMSQARPTRQESFKELQQLFVDMFQADLDSGFCNGPSKCYHTQAQS\n+QTRTSSTSPSMSPSPPPPVATEAESPSCNGINKRGSSAMDSGKPPRASEVGSGQSQSGFCFGKSDAKQAAKTRSGNTASR\n+RRNGRKQKVSSKHDVSSEDEMPGSQWHGVA\n+>gnl_Orysa6.0_PACid_16843528\n+MADGGEKCRDAAGEGGGGGDLYAVLGLKKECSDADLKLAYRKLAMRWHPDKCSSSSSAKHMEEAKEKFQEIQGAYSVLSD\n+SNKRFLYDVGVYDDDDNDDDNLQGMGDFIGEMAQMMSQARPTRQESFKELQQLFVDMFQADLDSGFCNGPSKCYHTQAQS\n+QTRTSSTSPSMSPSPPPPVATEAESPSCNGINKRGSSAMDSGKPPRASEVGSGQSQSGFCFGQKSDAKQAAKTRSGNTAS\n+RRRNGRKQKVSSKHDVSSEDEMPGSQWHGVA\n+>gnl_Poptr2.2_PACid_18217800\n+MANGGEDKWKSNDLYQVLGLNKECTDTELRSAYKKLALRWHPDRCSASGNSKFVEEAKKKFQAIQQAYSVLSDTNKRFLY\n+DVGVDDSDDDENGMGDFLNEMAVMMSQTKPSENMEESLEELQELFDEMFQEDLHSFGIDSQAAPSCPPSYVSYSESSNSN\n+NKRVSADMNLGKTKVDDSSSFNSHFEKFCLGTGGTAATFQEGEGGSKRRNSRRSQRQTKARQETKSFFGL\n+>gnl_Poptr2.2_PACid_18234651\n+M'..b'SFSLGVDHQQEFKKGKNNGGRRNRRKNNVPSAGHETSSSNNYGVPTS\n+>gnl_Bradi1.2_Bradi3g60090.1\n+MATGGDKCGGKPAAAGVGGGDLYSVLGVNKECSDADLKVAYRKLAMRWHPDRCSSSSSTKHMEEAKEKFQEIQGAYSVLS\n+DANKRFLYDVGVYEEHEEEDDDTLQGMGDFLGEMAHMMSQTQPARQESFEELQQLFVDMFQSDIESGFCNGPAKDHDPVQ\n+RQTRTFSTPPSPSPSPPPPLATVDEAASCNGINKRGSSAMGSGKPPRAGEVSGGHGQSEFCFGMSDAKQAPKARGGNASR\n+RRNGQKQKLSSKHDVSSGDEMPRPHAAV\n+>gnl_Carpa1.181_PACid_16420351\n+MADGEDKNNSDLYAVLGLNKECTPAELRNAYKKLAMRWHPDRCSASGNSMFVEEAKKKFQAIQEAYSVLSDANKRFLYDV\n+GAYESDDDENGMGDFLNEMAAMMSQTKPNENGNAQESFEELQELFQEMFQGDMGFNTFGSSSQPTTSSCSASSAYATCSE\n+TSNPNNNKRNSSEMNYGKKKVDDSSGFHAHFQTFCLGVEQQQDFKKGKEARGGIRGKPGGSRRQGRKQKVSSRHNVSSND\n+LGISAS\n+>gnl_Frave2.0_gene05408\n+MAGGKWVPPPLSQFHLHIKRFRRRQKVSGSSGNTEPSGYTDCKNISNRRMEEKGNDFYAVMGLKKECSDSELRNAYKKLA\n+LIWHPDRCSASGNSKFVEEAKKKFQDIQQAYSVLSDANKRFLYDVGAYESDDDENGMGDFLNEMAVMMSQTKPNENGGES\n+FEQLQELFEEMFQGDIEGFSSCSQPPTSCSTSSSSYALYCENSTPSNKRNSSAMNYGNATLDSSGFDAHFHNFCVGTGGK\n+PAKDREGDARKRKDSRRSNR\n+>gnl_Mimgu1.0_PACid_17694730\n+MAADEEKSSDFYGVLGLRKECTAAELRVAYKKLAMKWHPDRCSASGNLKYVEEAKNKFQAVQQAYSVLSDANKRFLYDVG\n+IYDSEDDADENGMGDFLNEMVAMMGQSKPNENKNESFQELQDLFEEIFNNDAEEVFKIPPPHFPYQDSCSETRTASNKRN\n+AREMGSVNFSNIEATPFEGFCIGENVIFGGERIQTRPGGGSRRTKPKISTSIDGLIS\n+>gnl_Nelnu1.0_NNU_010544-RA\n+MNLLLQKWHPDRCSSSGNSKFVEDSKKKFQAIQEAYSVLSDENKRFLYDVGVYDCDDDDDDENGMGEFLGEMATMMSQIK\n+PSENGPESLEKLQELFEEMFQRDMDDGFFSPSPQCASFSSSCSSSSSSTTYFSYNNNKHDNKRNCSDISSMDDFYTFGTD\n+SIQFSNFCIGVEGGEDSKVRGGKSRRKSNRRQKVSSSKHDPSCR\n+>gnl_Solly2.3_Solyc03g123560.2.1\n+MEDKSNDYYAVLGLKKECTDTELRNAYKKLALKWHPDRCSASGNLKFVDEAKKQFQAIQEAYSVLSDANKKFLYDVGVYD\n+SGDDDDENGMGDFLNEMAAMMSQNKSNENQGEETFEELQDMFNEMFNSDNGTFSSSSSSSSSWTGTPSMCSTTSSTSSSE\n+TFLTFPNKRSSGEMKSGSSVRGDSCQFQGFCVGAGGTSGKCNERERSWRKNSKSGRKH\n+>gnl_Sorbi1.4_PACid_1968370\n+MDAGGEKFSDAAAAEGGEGGGDLYAVLGLKKECSDADLKVAYRKLAKKWHPDKCSSSSSVKHMEEAKEKFQEIQGAYSVL\n+SDANKRLLYDVGVYDDEDDEDSMQGMGDFIGEMAQMMSQVRPTRQESFEELQQLFVDMFQSDIDSGFCNGSAKDQVQGQA\n+KSRTCSTSPSSSPSPPPPPTIVKEAEVSSCNGFNKRGSSAMDSGKPPRPVEGGAGQAGFCFGVSDTKQTPKPRGPNTSRR\n+RNGRKQKLSSKHDVSSEDETAGS\n+>gnl_Thepa2.0_Tp3g12470\n+MASNNSEKGNDDLYGVLGLKKECTTTELRTAYKKLALRWHPDRCSSMGTPEFVDEAKKKFQAIQEAYSVLSDSNKRFLYD\n+VGAYNSDDEDQNGMGDFLNEMAAMMNQSKPSENNSGDSFEQLQDLFNEMFQGDAAAFSSSSSSSCSASTFTSSCSFVFDT\n+NSQRSPFETSSMGTNDLFGFDHSAHTFSLGVEHQQDFKKGKNSGGRRNRRKNNAQSAAHETASSNNYGVPTS\n+>gnl_Theca1.0_Tc06_g010450\n+MANGEEKNNDFYAVLGLNKECTPTELRTAYKKLALRWHPDRCSASGNSKFVEEAKKKFQAIQQAYSVLSDSNKRFLYDVG\n+AYDSDDDENGMGDFLNEMAGMMSQTKSNENGGESFEELQELFEEMFQADIDSFESTGQSTPSCSASSSFGSYGESSSSNK\n+RNSSEMSSVETRLESSSSFDAQFHSFCLGVEHRQDIKQHRGARGGMRGAAGGSRRRNGRKQKVSSGHDVTSNDCGISAS\n+>gnl_Vitvi12X_PACid_17827068\n+MAAGEEKSNDFYAVLGLKKECTASELRNAYKRLALMWHPDRCSSSGNSKFVEEAKKKFQAIQEAYSVLSDANKRFLYDVG\n+AYDSDDDENGMGDFLNEMAVMMSQTKSNENGKESFEELQELFEDMFQRDVDAFNSASHHPMNSFPSSTSTSSYCESSNAN\n+NKRNSAEMGSGRMMSAGESSAFDAHFQSFCFGTGGTPGRFQEGERSKRRNSRRSQR\n+>gnl_Selmo1.0_PACid_15401289\n+MEKRKEDPYTVLGVQKSSSSSEIRSAYRKLAMKWHPDKQHSLEDQAKAKFQGIQEAYSVLSDDKKRVLYDSGLYDEGDDE\n+VS\n+>gnl_Orysa6.0_PACid_16864430\n+MARGGGGGGGADADLYAVLGLSRECTDADLRLAYRKLAMIWHPDRCSVAGGSASAAGVDEAKERFQEIQGAYSVLSDSNK\n+RFLYDVGVYDGNDGDDDDDEADLSGMGDFLGEMAQMMSQATPAESFEELQQLFVDMFQDDIDAGLCQSTPPPPSWPSPPA\n+AANARSPAAAATSRKGVNKRCSPAAMDMDSGLSSLLGISGFCFEAPWTSQDASTAAGGGGGKRRKQRPPPASHNV\n+>gnl_Sorbi1.4_PACid_1982925\n+MAATSHCGNIQDQDEEASAPGAADLYAVLGLNRECTDAELRVAYRRLAMIWHPDRCSASGSSPARMEEAKERFQEIQGAY\n+SVLSDSNKRLLYDVGVYDSDDDEADLSGMGDFLGEMADMMSQATPTETFEELQQVFVDMFQDDLDDAGFFGGLPTTGRRA\n+QAPSTSLPPSVSSSPLRPTPAAGRSKGPQATPSSSFKGVERRGSTSTAKRPRPNGSAGLESDLGLSGFCFMVSKEMSKSK\n+ERQAVWASDDGDRSTDGKQRLSTSRDVSGGGMSRSLQGQSSKNLLQCMASKS\n+>gnl_Medtr3.5_Medtr8g022310.3\n+MANEGNKSNDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASGNVKFVEEAKKKFQAIQEAYSVLSDSNKRLMYDVG\n+VYDSDDDENVRHLFHTIHELGTLFCVMFCFFISLRGEKRSNLNLTFSLSH\n+>gnl_Nelnu1.0_NNU_000115-RA\n+MEVDSHRSSPSYYTILGVDQNSSASEIRNAYRKLAMQWHPDKWTKTPSLLEKAKSKFQQIQEAYSGGLLVFMLSDQGKRT\n+LYDVGLYDPDDETNDEVGLRRFHAGDDISHERCEETGEEIQLGGTTGDVSGNVTRAGVEDGECWWCVVVRWSCSLKEELK\n+EGQMGIISESDDAGHDTPSLPHLHGSELELLGRTGCCN\n+>contig_7\n+ENEWSGAEFLNEMAAMMTQNKSNENGTGTFEELQQLFDEMFQSDIESFNGCSSSSNETCSNSNKRNSIESSSANFRPENG\n+NESGEISGKKNTRKGKGDX\n'
b
diff -r a73c2e65098e -r 3384b6a842b0 test-data/3722.faa.aln
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3722.faa.aln Mon Oct 30 09:52:00 2017 -0400
b
b'@@ -0,0 +1,312 @@\n+>gnl_Glyma1.01_PACid_16266208\n+---MANE-------------------------------------------------GKKS\n+NNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATG---NLELVEEAKKKFQEIREAY\n+S------VLSDANKRLMYDVGVYDS--------DDDENGMGDFLDEMLTMMSHTKSNENG\n+--EESFEELQ------QLFEDMFQADI--GL----------DGGPSL-------------\n+----ASSDSSTSSA------------------YMTYSESSS---------SNKRNSS-EM\n+NFGK---AENSSVFDASY------QNFCFG-VN--QLQDIKKGK---GGILGGGGRSRHR\n+SGRKQKMFY-GHDV--------------------------\n+>gnl_Glyma1.01_PACid_16266209\n+---MANE-------------------------------------------------GKKS\n+NNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATG---NLELVEEAKKKFQEIREAY\n+S------VLSDANKRLMYDVGVYDS--------DDDENGMGDFLDEMLTMMSHTKSNENG\n+--EESFEELQ------QLFEDMFQADI--GL----------DGGPSL-------------\n+----ASSDSSTSSA------------------YMTYSESSS---------SNKRNSS-EM\n+NFGK---AENSSVFDASY------QNFCFG-TG--EPTPRYKEG--------KGGNSRRR\n+R---------------------------------------\n+>gnl_Glyma1.01_PACid_16266210\n+---MANE-------------------------------------------------GKKS\n+NNFYSILGLSKECTELELKNAYRKLAKKWHPDRCSATG---NLELVEEAKKKFQEIREAY\n+S------VLSDANKRLMYDVGVYDS--------DDDENGMGDFLDEMLTMMSHTKSNENG\n+--EESFEELQ------QLFEDMFQADI--GL----------DGGPSL-------------\n+----ASSDSSTSSA------------------YMTYSESSS---------SNKRNSS-EM\n+NFGK---AENSSVFDASY------QNFCFG-VG--HVNYHYQ------------------\n+----------------------------------------\n+>gnl_Glyma1.01_PACid_16301083\n+---MADE-------------------------------------------------GNKS\n+NNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATG---NSELVEEAKKKFQEIREAY\n+S------VLSDANKRLMYDVGVYDS--------DDDENGMGDFLDEMLTMMSQTKSNENG\n+--EESFEELQ------QLFEDMFEADI--GL----------DGGPSL-------------\n+----ASSDCSTSSA------------------YMTYSESSS---------SNKHNSS-EM\n+NFGK---AENSSVFDAGY------QNFCFG-VN--QLQDIKKKK---GGIL-GGGRSRHR\n+NGRKQNMSY-GHDVSSNDYPGISTK---------------\n+>gnl_Glyma1.01_PACid_16301085\n+---MADE-------------------------------------------------GNKS\n+NNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATG---NSELVEEAKKKFQEIREAY\n+S------VLSDANKRLMYDVGVYDS--------DDDENGMGDFLDEMLTMMSQTKSNENG\n+--EESFEELQ------QLFEDMFEADI--GL----------DGGPSL-------------\n+----ASSDCSTSSA------------------YMTYSESSS---------SNKHNSS-EM\n+NFGK---AENSSVFDAGY------QNFCFG-VG--HVNYHYQ------------------\n+----------------------------------------\n+>gnl_Glyma1.01_PACid_16301084\n+---MADE-------------------------------------------------GNKS\n+NNFYSILGLKKECTELELKNAYRKLAKKWHPDRCSATG---NSELVEEAKKKFQEIREAY\n+S------VLSDANKRLMYDVGVYDS--------DDDENGMGDFLDEMLTMMSQTKSNENG\n+--EESFEELQ------QLFEDMFEADI--GL----------DGGPSL-------------\n+----ASSDCSTSSA------------------YMTYSESSS---------SNKHNSS-EM\n+NFGK---AENSSVFDAGY------QNFCFG-TG--EPTPRYKEE--------KGGNSRRR\n+----------------------------------------\n+>gnl_Medtr3.5_Medtr8g022310.1\n+---MANE-------------------------------------------------GNKS\n+NDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASG---NVKFVEEAKKKFQAIQEAY\n+S------VLSDSNKRLMYDVGVYDS--------DDDENGMGDFLNEMVTMMSQTKSNENG\n+--EESFEELQ------QLFDDMFQADI--GL----------NGSTSL-------------\n+----NASGCSTSST------------------FMTFSESSN---------SNKRNST-QM\n+NFGK---AEDSSSFGANY------QNFCFG-MK--HLQEDVEKE--KGGILEGGGSKKQR\n+KGRKQKISC-GH-VSSNDHPGISAN---------------\n+>gnl_Medtr3.5_Medtr8g022310.2\n+---MANE-------------------------------------------------GNKS\n+NDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASG---NVKFVEEAKKKFQAIQEAY\n+S------VLSDSNKRLMYDVGVYDS--------DDDENGMGDFLNEMVTMMSQTKSNENG\n+--EESFEELQ------QLFDDMFQADI--GL----------NGSTSL-------------\n+----NASGCSTSST------------------FMTFSESSN---------SNKRNST-QM\n+NFGK---AEDSSSFGANY------QNFCFG-VN--LVNYHYQ------------------\n+----------------------------------------\n+>gnl_Musac1.0_GSMUA_Achr6T31040_001\n+---MAAE-------------------------------------------------EDKS\n+GDFYAVLGLRKECSETELRNAYKKLAMRWHPDKCLASG---NAQIVGEAKEKFQEIQKAY\n+S------VLSDSNKRFLYDVGVYDND------DDNDENGMGDFIGEMLEMMSQTKPNENS\n+--QDSFQELQ------ELFVEMFQDDL--DAG---------FGGSIFH-DCPWAQP----\n+---TNGQDCWTSSG------------------LHFANGRSK--------CGNKRGNS-AV\n+NLGK----VNLEELEHGT------SDFYFG-LN--DAAQPSQGKGGS--------NNKRR\n+NGRKQKVSS-N'..b'HDVSSEDETAGS-----------------\n+>gnl_Thepa2.0_Tp3g12470\n+---MASNN-----------------------------------------------SEKGN\n+DDLYGVLGLKKECTTTELRTAYKKLALRWHPDRCSSMG---TPEFVDEAKKKFQAIQEAY\n+S------VLSDSNKRFLYDVGAYNS-------DDEDQNGMGDFLNEMAAMMNQSKPSENN\n+-SGDSFEQLQ------DLFNEMFQGDA--AA----------FSSSSS-------------\n+------SSCSASTF------------------TSSCSFVFD--------TNSQRSPF-ET\n+SSMG---TNDLFGFDHSA------HTFSLG-VE--HQQDFKKGKN----------SGGRR\n+NRRKNNAQSAAHETASSNNYGVPTS---------------\n+>gnl_Theca1.0_Tc06_g010450\n+---MANG-------------------------------------------------EEKN\n+NDFYAVLGLNKECTPTELRTAYKKLALRWHPDRCSASG---NSKFVEEAKKKFQAIQQAY\n+S------VLSDSNKRFLYDVGAYDS--------DDDENGMGDFLNEMAGMMSQTKSNENG\n+--GESFEELQ------ELFEEMFQADI--DS----------FESTGQ-------------\n+----STPSCSASSS------------------FGSYGESSS---------SNKRNSS-EM\n+SSVE-TRLESSSSFDAQF------HSFCLG-VE--HRQDIKQHRGARGGMRGAAGGSRRR\n+NGRKQKVSS-GHDVTSND-CGISAS---------------\n+>gnl_Vitvi12X_PACid_17827068\n+---MAAG-------------------------------------------------EEKS\n+NDFYAVLGLKKECTASELRNAYKRLALMWHPDRCSSSG---NSKFVEEAKKKFQAIQEAY\n+S------VLSDANKRFLYDVGAYDS--------DDDENGMGDFLNEMAVMMSQTKSNENG\n+--KESFEELQ------ELFEDMFQRDV--DA----------FNSASHH------------\n+----PMNSFPSSTS------------------TSSYCESSN--------ANNKRNSA-EM\n+GSGRMMSAGESSAFDAHF------QSFCFG-T-----------GGTPGRFQEGE-RSKRR\n+NSRRSQR---------------------------------\n+>gnl_Selmo1.0_PACid_15401289\n+---M----------------------------------------------------EKRK\n+EDPYTVLGVQKSSSSSEIRSAYRKLAMKWHPDK--------QHSLEDQAKAKFQGIQEAY\n+S------VLSDDKKRVLYDSGLYDEG------DDE-------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+-------------------------------VS---------------------------\n+----------------------------------------\n+>gnl_Orysa6.0_PACid_16864430\n+---MARGGG-----------------------------------G----------GGGAD\n+ADLYAVLGLSRECTDADLRLAYRKLAMIWHPDRCSVAGGSASAAGVDEAKERFQEIQGAY\n+S------VLSDSNKRFLYDVGVYDGNDGDDDDDEADLSGMGDFLGEMAQMMSQATP----\n+--AESFEELQ------QLFVDMFQDDI--DAG---------LCQSTP-------------\n+----PPPSWPSPPA-------------------AANARSPAAAAT-SRKGVNKRCSP-AA\n+MDMD-----SGLSSLLGI------SGFCFE-AP--WTSQDASTAAGGG-------GGKRR\n+KQRPPPA---SHNV--------------------------\n+>gnl_Sorbi1.4_PACid_1982925\n+---MAATSH-----------------------------------CGNIQDQDEEASAPGA\n+ADLYAVLGLNRECTDAELRVAYRRLAMIWHPDRCSASG--SSPARMEEAKERFQEIQGAY\n+S------VLSDSNKRLLYDVGVYDSDD-----DEADLSGMGDFLGEMADMMSQATP----\n+--TETFEELQ------QVFVDMFQDDLD-DAG---------FFGGLPT-TGRRAQA--PS\n+TSLPPSVSSSPLRP----------------TPAAGRSKGPQATPSSSFKGVERRGST-ST\n+AKRPRPNGSAGLESDLGL------SGFCFM-VS--KEMSKSKERQAV---WASD-DGDRS\n+TDGKQRLST-SRDVSGGG-MSRSLQGQSSKNLLQCMASKS\n+>gnl_Medtr3.5_Medtr8g022310.3\n+---MANE-------------------------------------------------GNKS\n+NDFYAVLGLNKECSDSELRNAYKKLALKWHPDRCSASG---NVKFVEEAKKKFQAIQEAY\n+S------VLSDSNKRLMYDVGVYDS--------DDDENVRHLF-----------------\n+---HTIHELG------TLFCVMFCFFISLR------------GE----------------\n+---------KRSNL------------------NLTFSLSH--------------------\n+------------------------------------------------------------\n+----------------------------------------\n+>gnl_Nelnu1.0_NNU_000115-RA\n+---MEVD------------------------------------------------SHRSS\n+PSYYTILGVDQNSSASEIRNAYRKLAMQWHPDKWTKTP-----SLLEKAKSKFQQIQEAY\n+SGGLLVFMLSDQGKRTLYDVGLYDPDD-----ETNDEVGLRRF--HAGDDISHERC----\n+--EETGEEIQLGGTTGDVSGNVTRAGV--EDGECWWCVVVRWSCSLKE------------\n+----------------------------------------------------------EL\n+KEGQ--------------------MGIISE-SD--DA-----------------------\n+----------GHDTPSLPHLHGSEL-----ELLGRTGCCN\n+>contig_7\n+------------------------------------------------------------\n+------------------------------------------------------------\n+----------------------------------ENEWSGAEFLNEMAAMMTQNKSNENG\n+--TGTFEELQ------QLFDEMFQSDI--ES----------FNGCSSS------------\n+----SNETCS---------------------------------------NSNKRNSI-ES\n+SSAN----------------FRPENGNESG-----------------------E-ISGKK\n+NTRKGKGDX-------------------------------\n'
b
diff -r a73c2e65098e -r 3384b6a842b0 test-data/3722.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3722.fna Mon Oct 30 09:52:00 2017 -0400
b
b'@@ -0,0 +1,378 @@\n+>gnl_Glyma1.01_PACid_16266208\n+ATGGCTAATGAAGGAAAGAAAAGCAATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAA\n+GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGGAATTTAGAGTTAGTGGAAGAAG\n+CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCTGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA\n+GTCTACGACAGTGATGACGACGAAAACGGCATGGGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATC\n+AAATGAAAATGGAGAGGAGAGCTTTGAGGAGTTGCAACAGCTTTTTGAAGACATGTTTCAAGCGGATATTGGATTGGATG\n+GAGGCCCTTCTCTTGCTTCTTCTGATTCCTCAACTTCATCTGCTTACATGACTTACAGTGAAAGTTCTAGTTCAAATAAA\n+CGCAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCCAGTTACCAGAATTTCTGTTTTGG\n+GGTGAACCAACTCCAAGATATAAAGAAGGGAAAGGGGGGAATTCTAGGAGGAGGAGGTAGAAGTAGACACAGAAGTGGCA\n+GAAAGCAAAAAATGTTCTATGGCCATGATGTT\n+>gnl_Glyma1.01_PACid_16266209\n+ATGGCTAATGAAGGAAAGAAAAGCAATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAA\n+GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGGAATTTAGAGTTAGTGGAAGAAG\n+CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCTGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA\n+GTCTACGACAGTGATGACGACGAAAACGGCATGGGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATC\n+AAATGAAAATGGAGAGGAGAGCTTTGAGGAGTTGCAACAGCTTTTTGAAGACATGTTTCAAGCGGATATTGGATTGGATG\n+GAGGCCCTTCTCTTGCTTCTTCTGATTCCTCAACTTCATCTGCTTACATGACTTACAGTGAAAGTTCTAGTTCAAATAAA\n+CGCAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCCAGTTACCAGAATTTCTGTTTTGG\n+GACAGGTGAACCAACTCCAAGATATAAAGAAGGGAAAGGGGGGAATTCTAGGAGGAGGAGG\n+>gnl_Glyma1.01_PACid_16266210\n+ATGGCTAATGAAGGAAAGAAAAGCAATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAA\n+GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGGAATTTAGAGTTAGTGGAAGAAG\n+CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCTGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA\n+GTCTACGACAGTGATGACGACGAAAACGGCATGGGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATC\n+AAATGAAAATGGAGAGGAGAGCTTTGAGGAGTTGCAACAGCTTTTTGAAGACATGTTTCAAGCGGATATTGGATTGGATG\n+GAGGCCCTTCTCTTGCTTCTTCTGATTCCTCAACTTCATCTGCTTACATGACTTACAGTGAAAGTTCTAGTTCAAATAAA\n+CGCAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCCAGTTACCAGAATTTCTGTTTTGG\n+GGTCGGTCATGTAAACTATCATTACCAA\n+>gnl_Glyma1.01_PACid_16301083\n+ATGGCCGATGAAGGAAACAAAAGCAATAACTTCTATTCGATCTTGGGGTTGAAGAAGGAGTGCACTGAATTGGAGCTAAA\n+GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCGACCGGGAATTCAGAGTTAGTGGAAGAAG\n+CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCAGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA\n+GTCTACGACAGTGATGACGACGAGAACGGCATGGGGGACTTCTTGGACGAAATGCTAACAATGATGAGTCAGACCAAATC\n+GAATGAAAACGGAGAGGAGAGCTTTGAGGAGTTGCAACAGTTGTTTGAAGACATGTTTGAAGCAGATATTGGATTGGACG\n+GAGGCCCTTCTCTTGCTTCTTCTGATTGCTCAACTTCATCTGCTTACATGACTTATAGTGAAAGTTCTAGTTCAAATAAA\n+CACAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCTGGTTACCAGAATTTCTGTTTTGG\n+GGTGAACCAACTCCAAGATATAAAGAAGAAAAAGGGGGGAATTCTAGGAGGAGGTAGAAGTAGACACAGAAATGGCAGAA\n+AGCAAAATATGTCCTATGGCCATGATGTTTCATCGAATGACTACCCTGGAATTTCCACAAAG\n+>gnl_Glyma1.01_PACid_16301085\n+ATGGCCGATGAAGGAAACAAAAGCAATAACTTCTATTCGATCTTGGGGTTGAAGAAGGAGTGCACTGAATTGGAGCTAAA\n+GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCGACCGGGAATTCAGAGTTAGTGGAAGAAG\n+CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCAGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA\n+GTCTACGACAGTGATGACGACGAGAACGGCATGGGGGACTTCTTGGACGAAATGCTAACAATGATGAGTCAGACCAAATC\n+GAATGAAAACGGAGAGGAGAGCTTTGAGGAGTTGCAACAGTTGTTTGAAGACATGTTTGAAGCAGATATTGGATTGGACG\n+GAGGCCCTTCTCTTGCTTCTTCTGATTGCTCAACTTCATCTGCTTACATGACTTATAGTGAAAGTTCTAGTTCAAATAAA\n+CACAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGTCTTTGATGCTGGTTACCAGAATTTCTGTTTTGG\n+GGTAGGTCATGTAAACTATCATTACCAA\n+>gnl_Glyma1.01_PACid_16301084\n+ATGGCCGATGAAGGAAACAAAAGCAATAACTTCTATTCGATCTTGGGGTTGAAGAAGGAGTGCACTGAATTGGAGCTAAA\n+GAATGCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCGACCGGGAATTCAGAGTTAGTGGAAGAAG\n+CTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTATTCAGTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTGGGA\n+GTCTACGACAGTGATGACGACGAGAACGGCATGGGGGACTTCTTGGACGAAATGCTAACAATGATGAGTCAGACCAAATC\n+GAATGAAAACGGAGAGGAGAGCTTTGAGGAGTTGCAACAGTTGTTTGAAGACATGTTTGAAGCAGATATTGGATTGGACG\n+GAGGCCCTTCTCTTGCTTCTTCTGATTGCTCAACTTCATCTGCTTACATGACTTATAGTGAAAGTTCTAGTTCAAATAAA\n+CACAATTCCTCTGAGATGAATTTCGGGAAGGCAGAGAATTCTTCTGT'..b'AGAAGAGCAATGATTTTTATGCCGTTCTAGGGTTGAAAAAGGAATGCACCGCCTCCGAGCTCAG\n+AAATGCGTACAAGAGACTTGCCCTGATGTGGCACCCAGATCGTTGCTCCTCGTCGGGAAACTCGAAATTCGTGGAAGAAG\n+CGAAGAAGAAATTTCAGGCCATACAAGAAGCCTATTCAGTTCTCTCTGATGCGAATAAAAGGTTTCTGTACGACGTTGGA\n+GCCTACGACAGCGATGATGACGAAAACGGAATGGGGGATTTTTTGAATGAGATGGCGGTTATGATGAGCCAAACCAAGTC\n+CAATGAAAATGGGAAGGAGAGCTTTGAGGAGTTGCAGGAGCTCTTTGAGGATATGTTCCAAAGGGATGTCGACGCATTCA\n+ACTCTGCCTCTCATCACCCCATGAACTCTTTCCCCAGTTCTACTTCCACTTCTTCCTACTGCGAAAGCTCCAATGCCAAC\n+AACAAGCGGAATTCGGCTGAAATGGGCTCTGGAAGGATGATGAGTGCAGGGGAGTCCTCTGCTTTTGATGCCCACTTTCA\n+GAGCTTCTGCTTTGGGACAGGCGGCACGCCAGGGAGATTTCAGGAGGGGGAAAGGAGCAAGAGGAGGAATTCCAGGAGGA\n+GCCAACGG\n+>gnl_Selmo1.0_PACid_15401289\n+ATGGAGAAGAGGAAAGAGGATCCCTACACTGTTCTTGGTGTCCAAAAGTCGAGTTCTAGCTCGGAAATTCGCTCCGCTTA\n+TCGGAAGCTCGCCATGAAATGGCATCCAGATAAGCAACACTCTTTAGAGGATCAAGCAAAAGCGAAGTTCCAGGGCATTC\n+AAGAAGCTTATTCAGTGCTATCCGACGACAAAAAAAGAGTTCTTTATGATTCGGGACTTTATGACGAGGGAGATGACGAG\n+GTGAGT\n+>gnl_Orysa6.0_PACid_16864430\n+ATGGCCCGCGGCGGCGGCGGCGGCGGCGGCGCGGACGCCGACCTGTACGCCGTCCTCGGCCTCAGCAGGGAGTGCACCGA\n+CGCCGACCTCAGGCTCGCCTACCGCAAGCTCGCCATGATATGGCATCCGGACAGGTGCTCGGTGGCCGGCGGCAGCGCGA\n+GCGCGGCGGGCGTCGACGAGGCCAAGGAGCGATTCCAGGAGATCCAGGGCGCCTACTCCGTGCTCTCCGACTCCAACAAG\n+CGCTTCCTCTACGACGTCGGCGTCTACGACGGCAACGACGGCGACGACGACGACGACGAAGCAGATCTGTCGGGGATGGG\n+CGATTTCCTCGGCGAGATGGCGCAGATGATGAGCCAGGCGACGCCTGCGGAGAGCTTCGAGGAGTTGCAGCAGCTGTTCG\n+TGGACATGTTCCAGGACGACATCGACGCCGGCCTCTGCCAGTCGACGCCGCCGCCGCCGTCATGGCCGTCGCCTCCGGCG\n+GCCGCCAATGCACGATCGCCGGCGGCGGCGGCGACTTCACGCAAGGGCGTGAACAAGCGGTGCTCACCGGCGGCGATGGA\n+CATGGACTCCGGTTTGAGCAGCCTGCTGGGCATTTCGGGCTTCTGTTTCGAGGCGCCATGGACGTCGCAGGACGCGAGCA\n+CTGCCGCCGGCGGTGGCGGCGGCAAGAGGAGAAAGCAGAGGCCGCCGCCGGCGAGCCACAACGTG\n+>gnl_Sorbi1.4_PACid_1982925\n+ATGGCTGCTACAAGTCACTGCGGCAACATCCAGGACCAGGACGAAGAAGCTTCGGCTCCTGGCGCCGCCGACCTCTACGC\n+CGTGCTCGGGCTCAACAGGGAGTGCACCGACGCCGAGCTCAGGGTCGCGTACCGGCGGCTCGCCATGATATGGCATCCGG\n+ACAGGTGCTCGGCGTCCGGCAGCTCGCCGGCGCGCATGGAGGAGGCCAAGGAGCGGTTCCAGGAGATCCAGGGCGCCTAC\n+TCCGTGCTCTCCGACTCCAACAAGCGGCTCCTCTACGACGTCGGCGTCTACGACAGCGACGACGACGAGGCTGACCTGTC\n+GGGGATGGGCGACTTCCTCGGAGAGATGGCCGACATGATGAGCCAGGCCACGCCAACGGAGACCTTCGAGGAGCTGCAGC\n+AGGTGTTCGTGGACATGTTCCAGGACGACCTGGACGACGCCGGCTTCTTCGGCGGGCTTCCGACGACGGGCCGCAGGGCC\n+CAGGCACCCAGCACCTCGCTGCCGCCGTCGGTGTCGTCGTCGCCGTTGCGGCCGACGCCTGCCGCTGGAAGAAGCAAGGG\n+TCCGCAAGCGACGCCGTCGTCGTCGTTTAAAGGCGTCGAGAGGCGGGGTTCGACGTCGACGGCGAAACGGCCGAGGCCCA\n+ACGGGTCGGCGGGCCTGGAATCGGACCTGGGCCTCTCCGGATTCTGCTTCATGGTGAGTAAGGAGATGAGCAAGTCGAAG\n+GAGAGGCAAGCGGTATGGGCCAGTGACGACGGTGACAGGAGCACCGATGGCAAGCAGAGGTTGTCGACGAGCCGCGATGT\n+CTCCGGTGGTGGGATGTCACGCTCACTGCAGGGCCAAAGCAGCAAAAACTTGTTGCAGTGTATGGCCTCTAAGTCT\n+>gnl_Medtr3.5_Medtr8g022310.3\n+ATGGCTAACGAAGGAAACAAAAGCAATGATTTCTATGCAGTTTTGGGATTGAATAAGGAATGCTCTGATTCAGAGCTAAG\n+GAATGCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGTTGTTCAGCTTCAGGGAATGTGAAGTTTGTGGAAGAAG\n+CTAAGAAGAAATTTCAGGCAATTCAAGAAGCCTATTCTGTTTTATCTGACTCGAACAAGAGATTAATGTACGACGTTGGA\n+GTTTACGACAGTGATGATGACGAAAATGTAAGGCACTTGTTTCACACCATTCATGAGTTGGGGACCCTCTTTTGCGTTAT\n+GTTTTGTTTCTTCATTTCCTTGAGGGGAGAGAAGAGAAGCAACCTTAATTTAACCTTTTCACTTTCACAT\n+>gnl_Nelnu1.0_NNU_000115-RA\n+ATGGAGGTGGACTCCCATCGATCATCTCCATCTTACTACACCATCCTTGGTGTAGATCAGAATTCCTCCGCTTCCGAGAT\n+ACGCAATGCTTACAGGAAGCTCGCGATGCAATGGCATCCAGACAAATGGACGAAAACTCCGTCGCTCTTAGAGAAAGCCA\n+AGAGTAAATTCCAGCAAATCCAGGAGGCTTATTCGGGTGGGTTACTCGTTTTCATGTTATCGGATCAGGGGAAGAGAACA\n+CTGTATGATGTCGGTCTGTATGACCCGGACGATGAAACGAATGACGAGGTGGGGCTTCGCAGATTTCATGCAGGAGATGA\n+TATCTCTCATGAACGATGTGAAGAAACAGGAGAAGAAATACAGCTTGGAGGAACTACAGGAGATGTTAGTGGAAATGTCA\n+CAAGGGCTGGAGTTGAAGATGGAGAGTGCTGGTGGTGTGTGGTTGTTAGATGGAGCTGCAGCCTCAAGGAGGAGCTCAAA\n+GAGGGCCAGATGGGAATCATCAGCGAGTCCGACGACGCTGGACACGACACACCTTCTCTCCCCCACTTGCACGGTTCAGA\n+GCTGGAATTGTTAGGAAGAACCGGCTGTTGCAAT\n+>contig_7\n+GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAATAAATCCAATGAAAACGGAAC\n+CGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATGTTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCAT\n+CATCCAATGAAACATGTAGCAACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGGA\n+AACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACGNN\n'
b
diff -r a73c2e65098e -r 3384b6a842b0 test-data/3722.fna.aln
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3722.fna.aln Mon Oct 30 09:52:00 2017 -0400
b
b'@@ -0,0 +1,819 @@\n+>gnl_Glyma1.01_PACid_16266208\n+---------ATGGCTAATGAA---------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------GGAAAGAAAAGC\n+AATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAAGAAT\n+GCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGG------\n+---AATTTAGAGTTAGTGGAAGAAGCTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTAT\n+TCT------------------GTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTG\n+GGAGTCTACGACAGT------------------------GATGACGACGAAAACGGCATG\n+GGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATCAAATGAAAATGGA\n+------GAGGAGAGCTTTGAGGAGTTGCAA------------------CAGCTTTTTGAA\n+GACATGTTTCAAGCGGATATT------GGATTG---------------------------\n+---GATGGAGGCCCTTCTCTT---------------------------------------\n+------------GCTTCTTCTGATTCCTCAACTTCATCTGCT------------------\n+------------------------------------TACATGACTTACAGTGAAAGTTCT\n+AGT---------------------------TCAAATAAACGCAATTCCTCT---GAGATG\n+AATTTCGGGAAG---------GCAGAGAATTCTTCTGTCTTTGATGCCAGTTAC------\n+------------CAGAATTTCTGTTTTGGG---GTGAAC------CAACTCCAAGATATA\n+AAGAAGGGAAAG---------GGGGGAATTCTAGGAGGAGGAGGTAGAAGTAGACACAGA\n+AGTGGCAGAAAGCAAAAAATGTTCTAT---GGCCATGATGTT------------------\n+------------------------------------------------------------\n+>gnl_Glyma1.01_PACid_16266209\n+---------ATGGCTAATGAA---------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------GGAAAGAAAAGC\n+AATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAAGAAT\n+GCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGG------\n+---AATTTAGAGTTAGTGGAAGAAGCTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTAT\n+TCT------------------GTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTG\n+GGAGTCTACGACAGT------------------------GATGACGACGAAAACGGCATG\n+GGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATCAAATGAAAATGGA\n+------GAGGAGAGCTTTGAGGAGTTGCAA------------------CAGCTTTTTGAA\n+GACATGTTTCAAGCGGATATT------GGATTG---------------------------\n+---GATGGAGGCCCTTCTCTT---------------------------------------\n+------------GCTTCTTCTGATTCCTCAACTTCATCTGCT------------------\n+------------------------------------TACATGACTTACAGTGAAAGTTCT\n+AGT---------------------------TCAAATAAACGCAATTCCTCT---GAGATG\n+AATTTCGGGAAG---------GCAGAGAATTCTTCTGTCTTTGATGCCAGTTAC------\n+------------CAGAATTTCTGTTTTGGG---ACAGGT------GAACCAACTCCAAGA\n+TATAAAGAAGGG------------------------AAAGGGGGGAATTCTAGGAGGAGG\n+AGG---------------------------------------------------------\n+------------------------------------------------------------\n+>gnl_Glyma1.01_PACid_16266210\n+---------ATGGCTAATGAA---------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------GGAAAGAAAAGC\n+AATAACTTCTATTCGATCTTGGGCTTGAGCAAGGAGTGCACTGAATTGGAGCTAAAGAAT\n+GCTTATAGGAAACTTGCAAAGAAATGGCACCCAGATCGTTGTTCAGCCACCGGG------\n+---AATTTAGAGTTAGTGGAAGAAGCTAAGAAAAAATTTCAGGAAATTCGGGAAGCCTAT\n+TCT------------------GTTTTATCTGACGCCAACAAAAGGTTAATGTACGACGTG\n+GGAGTCTACGACAGT------------------------GATGACGACGAAAACGGCATG\n+GGGGACTTCTTGGACGAAATGTTAACAATGATGAGTCATACCAAATCAAATGAAAATGGA\n+------GAGGAGAGCTTTGAGGAGTTGCAA------------------CAGCTTTTTGAA\n+GACATGTTTCAAGCGGATATT------GGATTG---------------------------\n+---GATGGAGGCCCTTCTCTT---------------------------------------\n+------------GCTTCTTCTGATTCCTCAACTTCATCTGCT------------------\n+------------------------------------TACATGACTTACAGTGAAAGTTCT\n+AGT---------------------------TCAAATAAACGCAATTCCTCT---GAGATG\n+AATTTCGGGAAG---------GCAGAGAATTCTTCTGTCTTTGATGCCAGTTAC------\n+------------CAGAATTTCTGTTTTGGG---GTCGGT------CATGTAAACTATCAT\n+TACCAA------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+>gnl_Glyma1.01_PACid_16301083\n+---------ATGGCCGATGAA---------------------------------------\n+------------------------------------------------------------\n+------------'..b'------AAGGAGATGAGCAAG\n+TCGAAGGAGAGGCAAGCGGTA---------TGGGCCAGTGAC---GACGGTGACAGGAGC\n+ACCGATGGCAAGCAGAGGTTGTCGACG---AGCCGCGATGTCTCCGGTGGTGGG---ATG\n+TCACGCTCACTGCAGGGCCAAAGCAGCAAAAACTTGTTGCAGTGTATGGCCTCTAAGTCT\n+>gnl_Medtr3.5_Medtr8g022310.3\n+---------ATGGCTAACGAA---------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------GGAAACAAAAGC\n+AATGATTTCTATGCAGTTTTGGGATTGAATAAGGAATGCTCTGATTCAGAGCTAAGGAAT\n+GCTTATAAGAAACTTGCACTGAAATGGCATCCAGATCGTTGTTCAGCTTCAGGG------\n+---AATGTGAAGTTTGTGGAAGAAGCTAAGAAGAAATTTCAGGCAATTCAAGAAGCCTAT\n+TCT------------------GTTTTATCTGACTCGAACAAGAGATTAATGTACGACGTT\n+GGAGTTTACGACAGT------------------------GATGATGACGAAAATGTAAGG\n+CACTTGTTT---------------------------------------------------\n+---------CACACCATTCATGAGTTGGGG------------------ACCCTCTTTTGC\n+GTTATGTTTTGTTTCTTCATTTCCTTGAGG------------------------------\n+------GGAGAG------------------------------------------------\n+---------------------------AAGAGAAGCAACCTT------------------\n+------------------------------------AATTTAACCTTTTCACTTTCACAT\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+>gnl_Nelnu1.0_NNU_000115-RA\n+---------ATGGAGGTGGAC---------------------------------------\n+------------------------------------------------------------\n+---------------------------------------------TCCCATCGATCATCT\n+CCATCTTACTACACCATCCTTGGTGTAGATCAGAATTCCTCCGCTTCCGAGATACGCAAT\n+GCTTACAGGAAGCTCGCGATGCAATGGCATCCAGACAAATGGACGAAAACTCCG------\n+---------TCGCTCTTAGAGAAAGCCAAGAGTAAATTCCAGCAAATCCAGGAGGCTTAT\n+TCGGGTGGGTTACTCGTTTTCATGTTATCGGATCAGGGGAAGAGAACACTGTATGATGTC\n+GGTCTGTATGACCCGGACGAT---------------GAAACGAATGACGAGGTGGGGCTT\n+CGCAGATTT------CATGCAGGAGATGATATCTCTCATGAACGATGT------------\n+------GAAGAAACAGGAGAAGAAATACAGCTTGGAGGAACTACAGGAGATGTTAGTGGA\n+AATGTCACAAGGGCTGGAGTT------GAAGATGGAGAGTGCTGGTGGTGTGTGGTTGTT\n+AGATGGAGCTGCAGCCTCAAGGAG------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------GAGCTC\n+AAAGAGGGCCAG------------------------------------------------\n+------------ATGGGAATCATCAGCGAG---TCCGAC------GACGCT---------\n+------------------------------------------------------------\n+------------------------------GGACACGACACACCTTCTCTCCCCCACTTG\n+CACGGTTCAGAGCTG---------------GAATTGTTAGGAAGAACCGGCTGTTGCAAT\n+>contig_7\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------GAGAATGAGTGGTCTGGG\n+GCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAATAAATCCAATGAAAACGGA\n+------ACCGGAACTTTTGAAGAACTGCAA------------------CAATTGTTCGAT\n+GAAATGTTTCAGAGCGACATC------GAGTCC---------------------------\n+---TTCAATGGTTGTTCTTCATCA------------------------------------\n+------------TCCAATGAAACATGTAGC------------------------------\n+------------------------------------------------------------\n+---------------------------AACTCGAACAAGAGGAATTCCATT---GAGTCG\n+AGCTCGGCTAAT------------------------------------------------\n+TTCAGACCCGAAAATGGAAACGAAAGCGGC------------------------------\n+---------------------------------------GAG---ATTAGCGGGAAGAAG\n+AATACTAGGAAAGGTAAAGGTGACGNN---------------------------------\n+------------------------------------------------------------\n'
b
diff -r a73c2e65098e -r 3384b6a842b0 test-data/38889.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/38889.faa Mon Oct 30 09:52:00 2017 -0400
b
@@ -0,0 +1,14 @@
+>gnl_Mimgu1.0_PACid_17675619
+MAETKTKVLKFDAEEDVAVALAKYTAYLSEKYIKEKNSFSVVLSGGTLIDTLRKLVEFPYKDSVDWSKWLIFWVDERVVP
+LDHEDSNYLLAYRGFLSKVPIPPSNIYAINDKKSPEGAADDYEERIKNLVEEKTLPISDSGFPKFDLMLLGMGPDGHVAS
+LFPSHNQRYEKKRWVTFITDSPKPPPPRITFTFPVINSASDIAMVVTGAELADTTKKALGNEKHTLPPLPCTEVSAEREL
+TWFLDKDAASKL
+>gnl_Solly2.3_Solyc06g053200.2.1
+MATQKGKKTVLKFDSEEDVSKALAKYTAELSEKFIKQKGSFTVVLSGGSLIDTMRKLVEPPYKDSIDWSKWWIFWVDERV
+VPLGHDDSNYKLASDGFLSKVPIPSSNIYAINDKESPEGAAADYEARLKQLIESKVLPLSAITGFPKFDLMLLGMGPDGH
+VASLFPLHPHRHEKERLVTFITDSPKPPPPRITFTFPVINSASEIAMVVTGAELAHMVDVALGNAPPPDGIPPPCTEVSA
+EEELTWFLDKDAASELQTSR
+>contig_2
+XLSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEK
+WVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAAS
+QLLNYVRFDD
b
diff -r a73c2e65098e -r 3384b6a842b0 test-data/38889.faa.aln
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/38889.faa.aln Mon Oct 30 09:52:00 2017 -0400
b
@@ -0,0 +1,18 @@
+>gnl_Mimgu1.0_PACid_17675619
+MAETKTK--VLKFDAEEDVAVALAKYTAYLSEKYIKEKNSFSVVLSGGTLIDTLRKLVEF
+PYKDSVDWSKWLIFWVDERVVPLDHEDSNYLLAYRGFLSKVPIPPSNIYAINDKKSPEGA
+ADDYEERIKNLVEEKTLPISD-SGFPKFDLMLLGMGPDGHVASLFPSHNQRYEKKRWVTF
+ITDSPKPPPPRITFTFPVINSASDIAMVVTGAELADTTKKALGNEKHTLP---PLPCTEV
+SAERELTWFLDKDAASKL--------
+>gnl_Solly2.3_Solyc06g053200.2.1
+MATQKGKKTVLKFDSEEDVSKALAKYTAELSEKFIKQKGSFTVVLSGGSLIDTMRKLVEP
+PYKDSIDWSKWWIFWVDERVVPLGHDDSNYKLASDGFLSKVPIPSSNIYAINDKESPEGA
+AADYEARLKQLIESKVLPLSAITGFPKFDLMLLGMGPDGHVASLFPLHPHRHEKERLVTF
+ITDSPKPPPPRITFTFPVINSASEIAMVVTGAELAHMVDVALGNAP--PPDGIPPPCTEV
+SAEEELTWFLDKDAASEL----QTSR
+>contig_2
+------------------------------------------------------------
+------------------------------------XLSKVPIPSNNIYAINDKKSPEDA
+ADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKWVTF
+ITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEV
+SAEGEVTWFLDKDAASQLLNYVRFDD
b
diff -r a73c2e65098e -r 3384b6a842b0 test-data/38889.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/38889.fna Mon Oct 30 09:52:00 2017 -0400
b
@@ -0,0 +1,30 @@
+>gnl_Mimgu1.0_PACid_17675619
+ATGGCCGAAACCAAAACAAAAGTACTGAAATTCGACGCGGAGGAAGATGTGGCCGTCGCTCTAGCAAAGTACACCGCCTA
+TCTCTCCGAAAAGTACATCAAGGAAAAGAATTCTTTCTCGGTGGTTCTCTCCGGCGGCACCCTAATAGATACACTCAGGA
+AACTAGTAGAGTTTCCATACAAGGATTCTGTGGATTGGTCGAAATGGCTGATATTTTGGGTTGACGAGAGAGTGGTTCCT
+CTTGATCATGAAGACAGCAACTACTTACTTGCATACCGTGGTTTTCTTTCAAAGGTACCTATTCCTCCAAGCAACATTTA
+CGCAATCAACGACAAGAAGTCTCCGGAAGGTGCAGCCGATGATTACGAGGAGCGTATCAAGAATCTGGTCGAGGAAAAAA
+CCCTACCTATTTCAGACAGTGGCTTCCCTAAATTCGACCTTATGCTTCTCGGAATGGGGCCCGATGGCCACGTGGCGTCT
+CTTTTCCCCTCTCACAATCAACGGTACGAAAAGAAACGGTGGGTGACATTCATAACTGACTCTCCCAAACCGCCGCCACC
+TAGGATCACTTTCACATTCCCAGTCATCAACTCTGCTTCGGACATTGCAATGGTGGTCACTGGTGCTGAGCTGGCGGATA
+CTACGAAGAAAGCATTGGGAAACGAGAAGCATACTCTTCCTCCTCTTCCTTGTACTGAAGTTTCGGCTGAGAGAGAGCTC
+ACTTGGTTCTTGGACAAAGATGCTGCTTCTAAACTG
+>gnl_Solly2.3_Solyc06g053200.2.1
+ATGGCAACCCAGAAAGGGAAGAAGACGGTGCTAAAATTCGACTCCGAAGAAGATGTATCAAAGGCACTTGCTAAATACAC
+TGCTGAGCTATCGGAAAAATTCATCAAACAAAAAGGTTCTTTCACTGTTGTGCTCTCTGGTGGTTCTCTTATCGATACCA
+TGAGGAAATTGGTAGAGCCGCCGTACAAAGACTCAATTGATTGGTCGAAATGGTGGATTTTTTGGGTAGACGAAAGAGTG
+GTTCCTCTAGGTCACGATGATAGCAATTATAAACTTGCTTCGGATGGGTTTCTTTCTAAGGTTCCGATCCCCTCTTCTAA
+CATTTATGCGATTAATGACAAGGAGTCACCTGAGGGTGCAGCTGCTGATTACGAAGCTCGTCTGAAACAATTGATTGAGA
+GCAAAGTTCTTCCGTTATCAGCAATTACTGGATTCCCCAAATTTGATCTTATGCTATTAGGTATGGGGCCAGATGGACAT
+GTAGCGTCTTTGTTTCCTTTGCATCCTCACCGCCACGAGAAGGAGCGGCTGGTCACCTTCATTACAGACTCACCAAAACC
+TCCTCCACCAAGGATTACTTTCACCTTTCCGGTAATTAATTCGGCTTCAGAGATAGCAATGGTGGTCACAGGAGCAGAGT
+TAGCTCATATGGTTGATGTCGCTTTGGGTAATGCGCCTCCTCCTGATGGAATTCCTCCCCCTTGTACTGAGGTTTCAGCT
+GAAGAGGAACTGACCTGGTTTTTAGACAAGGATGCTGCATCAGAACTACAGACCTCTAGA
+>contig_2
+NNCCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGA
+CTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACC
+TCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAA
+TGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTC
+GGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGC
+CGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCA
+CAACTGTTAAATTATGTGCGCTTTGATGAT
b
diff -r a73c2e65098e -r 3384b6a842b0 test-data/38889.fna.aln
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/38889.fna.aln Mon Oct 30 09:52:00 2017 -0400
b
@@ -0,0 +1,45 @@
+>gnl_Mimgu1.0_PACid_17675619
+ATGGCCGAAACCAAAACAAAA------GTACTGAAATTCGACGCGGAGGAAGATGTGGCC
+GTCGCTCTAGCAAAGTACACCGCCTATCTCTCCGAAAAGTACATCAAGGAAAAGAATTCT
+TTCTCGGTGGTTCTCTCCGGCGGCACCCTAATAGATACACTCAGGAAACTAGTAGAGTTT
+CCATACAAGGATTCTGTGGATTGGTCGAAATGGCTGATATTTTGGGTTGACGAGAGAGTG
+GTTCCTCTTGATCATGAAGACAGCAACTACTTACTTGCATACCGTGGTTTTCTTTCAAAG
+GTACCTATTCCTCCAAGCAACATTTACGCAATCAACGACAAGAAGTCTCCGGAAGGTGCA
+GCCGATGATTACGAGGAGCGTATCAAGAATCTGGTCGAGGAAAAAACCCTACCTATTTCA
+GAC---AGTGGCTTCCCTAAATTCGACCTTATGCTTCTCGGAATGGGGCCCGATGGCCAC
+GTGGCGTCTCTTTTCCCCTCTCACAATCAACGGTACGAAAAGAAACGGTGGGTGACATTC
+ATAACTGACTCTCCCAAACCGCCGCCACCTAGGATCACTTTCACATTCCCAGTCATCAAC
+TCTGCTTCGGACATTGCAATGGTGGTCACTGGTGCTGAGCTGGCGGATACTACGAAGAAA
+GCATTGGGAAACGAGAAGCATACTCTTCCT---------CCTCTTCCTTGTACTGAAGTT
+TCGGCTGAGAGAGAGCTCACTTGGTTCTTGGACAAAGATGCTGCTTCTAAACTG------
+------------------
+>gnl_Solly2.3_Solyc06g053200.2.1
+ATGGCAACCCAGAAAGGGAAGAAGACGGTGCTAAAATTCGACTCCGAAGAAGATGTATCA
+AAGGCACTTGCTAAATACACTGCTGAGCTATCGGAAAAATTCATCAAACAAAAAGGTTCT
+TTCACTGTTGTGCTCTCTGGTGGTTCTCTTATCGATACCATGAGGAAATTGGTAGAGCCG
+CCGTACAAAGACTCAATTGATTGGTCGAAATGGTGGATTTTTTGGGTAGACGAAAGAGTG
+GTTCCTCTAGGTCACGATGATAGCAATTATAAACTTGCTTCGGATGGGTTTCTTTCTAAG
+GTTCCGATCCCCTCTTCTAACATTTATGCGATTAATGACAAGGAGTCACCTGAGGGTGCA
+GCTGCTGATTACGAAGCTCGTCTGAAACAATTGATTGAGAGCAAAGTTCTTCCGTTATCA
+GCAATTACTGGATTCCCCAAATTTGATCTTATGCTATTAGGTATGGGGCCAGATGGACAT
+GTAGCGTCTTTGTTTCCTTTGCATCCTCACCGCCACGAGAAGGAGCGGCTGGTCACCTTC
+ATTACAGACTCACCAAAACCTCCTCCACCAAGGATTACTTTCACCTTTCCGGTAATTAAT
+TCGGCTTCAGAGATAGCAATGGTGGTCACAGGAGCAGAGTTAGCTCATATGGTTGATGTC
+GCTTTGGGTAATGCGCCT------CCTCCTGATGGAATTCCTCCCCCTTGTACTGAGGTT
+TCAGCTGAAGAGGAACTGACCTGGTTTTTAGACAAGGATGCTGCATCAGAACTA------
+------CAGACCTCTAGA
+>contig_2
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------NNCCTTTCCAAG
+GTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCA
+GCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCA
+ACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCCCGACGGCCAT
+GTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGGGTCACTTTC
+ATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAAC
+TCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATA
+GCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTGTACTGAAGTT
+TCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAACTGTTAAAT
+TATGTGCGCTTTGATGAT
b
diff -r a73c2e65098e -r 3384b6a842b0 test-data/39614.faa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/39614.faa Mon Oct 30 09:52:00 2017 -0400
b
@@ -0,0 +1,19 @@
+>gnl_Orysa6.0_PACid_16878968
+MSHMEAFQNVVLLHKANSNSTLEDISSLSAASCCSLDQLLACVEGEAQKIFGDIQNLLADHRSEVAHFTQELRESFRISL
+DRTKDMSSFILGLFDKYVEETSKLQSHSNHTHEAQVKSLEDFQKAYEEQSKSEEQKLLADITSLVSKHVTRQRELVGGRL
+NSLGDAARGNKAFLDEHTSAMEVVTKDAKRKWEMFAEQAENDCKVGSNFSAAKHCRMETILQECACTVDTAAQQWKASHA
+TVNDLCRKQIAEVEALVRSAIETNEQHEAEIASSRATAEEHASNSSKDLLQDVDNMLQEARNSSSRVVSTVEAHLGESQH
+LQESHSSHTAGINTHADNAFQSSYKDYEPTGETPVRSEPEVPSKDAIESLRAMPMESLMDEFRENHPYEPSKDRRPSLIP
+RSPLATINN
+>gnl_Phoda3.0_PDK_30s1023721g001
+VNQKMMKCTLIKDLYGEIERLKAEVYAAREKVGVYIPKERYHQEESERKAMAEQIEQMGVLLENNQKQIEDLQERYNTQL
+QQSDDLSKKLDATEILCVSLSKKLDATEKSLEHTSKLLAAAREDLKQAQYTLKEKDFVISEQRKAAREDKLNTANRSIVN
+NFRADLATRVGTLCNTVVASLDRQNEHLQSVEKLCQSSLDFHDKAVSELKRKVSASRALYTSHMEALQNVVRLHKASSNA
+SLEEMSSMISANTCSLDQLLALGQSEADLIFSDLQSILSIHRGEIANFTRELREKFQVNLDRTKEMSNFILELLEKIGKG
+TKEFQNDSTLVHEAQVKSIGDFQKAYEVEVRLTGLGDAARDSKAIMDNHASSMDIVTTDAKRKWEEYSKQAEQDSEDGSN
+FSAAKHCRMELMLQQCVNSVDATSQQWKKTHASVSEMSSKHVAEIEALVRSAIESNDQHDAEVASARMAAEEDVAKNSKD
+VLQHFDTVIDHERNSAAGVMAAVEAHSATLHKLQEEQSSQATEINSHAEDTFQNTYMDYEPTGETPTRSEPDIPSRGTIE
+SLRAMPIEALLEEFRENHPYESKEPKPSLIPRSPLVQLN
+>contig_3
+XVDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKG
+TIESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN
b
diff -r a73c2e65098e -r 3384b6a842b0 test-data/39614.faa.aln
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/39614.faa.aln Mon Oct 30 09:52:00 2017 -0400
b
@@ -0,0 +1,36 @@
+>gnl_Orysa6.0_PACid_16878968
+M-----------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------------------------------SHMEAFQNVVLLHKANSNS
+TLEDISSLSAASCCSLDQLLACVEGEAQKIFGDIQNLLADHRSEVAHFTQELRESFRISL
+DRTKDMSSFILGLFDKYVEETSKLQSHSNHTHEAQVKSLEDFQKAYEEQSKSEEQKLLAD
+ITSLVSKHVTRQRELVGGRLNSLGDAARGNKAFLDEHTSAMEVVTKDAKRKWEMFAEQAE
+NDCKVGSNFSAAKHCRMETILQECACTVDTAAQQWKASHATVNDLCRKQIAEVEALVRSA
+IETNEQHEAEIASSRATAEEHASNSSKDLLQDVDNMLQEARNSSSRVVSTVEAHLGESQH
+LQESHSSHTAGINTHADNAFQSSYKDYEPTGETPVRSEPEVPSKDAIESLRAMPMESLMD
+EFRENHPYEPS---KDRRPSLIPRSPLATINN
+>gnl_Phoda3.0_PDK_30s1023721g001
+VNQKMMKCTLIKDLYGEIERLKAEVYAAREKVGVYIPKERYHQEESERKAMAEQIEQMGV
+LLENNQKQIEDLQERYNTQLQQSDDLSKKLDATEILCVSLSKKLDATEKSLEHTSKLLAA
+AREDLKQAQYTLKEKDFVISEQRKAAREDKLNTANRSIVNNFRADLATRVGTLCNTVVAS
+LDRQNEHLQSVEKLCQSSLDFHDKAVSELKRKVSASRALYTSHMEALQNVVRLHKASSNA
+SLEEMSSMISANTCSLDQLLALGQSEADLIFSDLQSILSIHRGEIANFTRELREKFQVNL
+DRTKEMSNFILELLEKIGKGTKEFQNDSTLVHEAQVKSIGDFQKAYE-------------
+---------------VEVRLTGLGDAARDSKAIMDNHASSMDIVTTDAKRKWEEYSKQAE
+QDSEDGSNFSAAKHCRMELMLQQCVNSVDATSQQWKKTHASVSEMSSKHVAEIEALVRSA
+IESNDQHDAEVASARMAAEEDVAKNSKDVLQHFDTVIDHERNSAAGVMAAVEAHSATLHK
+LQEEQSSQATEINSHAEDTFQNTYMDYEPTGETPTRSEPDIPSRGTIESLRAMPIEALLE
+EFRENHPYES----KEPKPSLIPRSPLVQLN-
+>contig_3
+X-----------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------VD------------------------EGVV---
+--------------------------------VAGLSEQEKASVSEILTTARAHSETIEN
+LKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKGTIESLRAMPIDALEE
+EFRENHSYESAVTGKELMPSVTTRAPFSQIN-
b
diff -r a73c2e65098e -r 3384b6a842b0 test-data/39614.fna
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/39614.fna Mon Oct 30 09:52:00 2017 -0400
b
@@ -0,0 +1,47 @@
+>gnl_Orysa6.0_PACid_16878968
+ATGTCACATATGGAAGCCTTCCAAAATGTTGTGCTCCTGCATAAAGCAAATTCAAATTCTACACTAGAGGATATATCATC
+CCTATCTGCTGCAAGCTGTTGCAGCCTTGATCAGCTTCTAGCTTGTGTCGAGGGAGAGGCACAGAAGATATTTGGTGATA
+TCCAGAATTTGCTAGCTGATCATCGAAGCGAAGTGGCACATTTCACTCAAGAGTTGCGGGAGAGTTTCCGCATTAGCTTG
+GATAGGACGAAGGACATGTCTAGTTTCATCCTTGGGTTGTTCGATAAGTATGTGGAGGAAACTTCGAAGTTGCAGAGCCA
+CTCCAATCACACACATGAAGCACAAGTCAAAAGCCTTGAAGATTTCCAGAAGGCTTATGAGGAGCAATCAAAATCAGAAG
+AACAAAAGCTTCTGGCGGACATCACCAGTTTGGTTTCTAAACACGTTACTCGACAACGAGAACTGGTGGGTGGTAGACTA
+AACTCTCTTGGTGACGCCGCTCGTGGAAACAAAGCATTTTTGGATGAGCACACGTCCGCCATGGAGGTGGTCACGAAGGA
+CGCCAAGAGAAAGTGGGAAATGTTTGCAGAGCAGGCAGAGAATGACTGCAAAGTTGGGTCCAACTTCTCTGCAGCTAAGC
+ATTGTCGCATGGAAACCATTCTGCAGGAATGTGCATGCACCGTCGACACTGCTGCTCAACAATGGAAAGCATCACATGCA
+ACTGTTAACGATCTATGCAGAAAACAAATAGCTGAAGTTGAAGCACTCGTCAGGAGTGCAATCGAAACCAACGAGCAGCA
+CGAAGCAGAGATTGCATCTTCCCGTGCCACGGCCGAGGAGCATGCGTCCAACAGCAGCAAGGACCTACTCCAAGATGTTG
+ACAATATGCTGCAGGAGGCGCGCAATTCGTCGTCGAGAGTGGTGTCGACGGTGGAAGCTCATTTGGGAGAGAGCCAGCAT
+CTACAGGAGAGCCACTCCAGCCATACCGCCGGCATCAACACCCACGCCGACAACGCTTTCCAGAGCAGCTACAAGGACTA
+CGAGCCGACCGGCGAAACTCCGGTGAGGTCGGAGCCGGAGGTGCCGAGCAAAGACGCGATCGAGTCGCTGCGAGCGATGC
+CGATGGAGTCCCTGATGGACGAGTTCCGCGAGAACCACCCCTACGAGCCGAGCAAGGACCGCAGGCCATCGCTCATCCCT
+CGCTCGCCGCTCGCCACCATCAACAAC
+>gnl_Phoda3.0_PDK_30s1023721g001
+GTAAACCAAAAAATGATGAAATGTACATTAATCAAAGATCTCTATGGAGAAATTGAGCGTCTAAAAGCAGAGGTGTATGC
+TGCTCGTGAGAAAGTTGGAGTTTACATACCAAAAGAACGCTACCATCAAGAAGAGAGCGAACGGAAGGCAATGGCAGAAC
+AAATTGAACAAATGGGGGTCTTGCTCGAAAACAATCAAAAGCAAATTGAGGATCTACAAGAAAGGTATAATACTCAACTT
+CAACAGTCTGATGACCTGAGCAAAAAGCTTGATGCCACCGAGATTCTCTGTGTTTCTCTGAGCAAAAAGCTTGATGCCAC
+CGAGAAAAGTTTGGAGCACACTAGCAAGTTATTGGCTGCTGCCAGAGAAGATCTGAAGCAAGCTCAGTATACTCTGAAGG
+AGAAAGATTTTGTTATATCAGAGCAGAGGAAAGCAGCTAGAGAAGACAAACTGAATACTGCCAACAGATCTATTGTGAAC
+AATTTTCGGGCTGATCTTGCAACAAGGGTTGGAACACTTTGTAATACTGTTGTTGCATCCTTGGATCGGCAAAATGAACA
+CCTTCAGTCTGTTGAGAAACTATGTCAATCTAGCCTTGATTTCCATGACAAGGCAGTATCAGAGCTGAAAAGGAAAGTGT
+CAGCTTCAAGAGCTTTGTATACTTCCCATATGGAAGCACTACAAAATGTAGTGCGTTTGCATAAGGCAAGCAGCAATGCC
+AGCTTAGAAGAGATGTCATCCATGATTTCTGCCAATACCTGCTCTCTTGATCAGTTACTTGCCTTGGGGCAAAGCGAAGC
+AGATCTGATTTTTAGTGATCTGCAAAGCATATTGTCAATTCACCGAGGAGAGATTGCAAATTTCACCCGTGAACTTCGTG
+AGAAATTTCAAGTTAATTTGGATCGGACAAAGGAGATGTCCAATTTTATTCTTGAGCTGCTTGAAAAGATAGGGAAGGGA
+ACAAAAGAATTTCAGAATGACTCAACTTTGGTACATGAGGCTCAGGTGAAGAGCATTGGTGATTTCCAAAAGGCATATGA
+GGTGGAAGTGAGGCTCACTGGACTGGGAGATGCTGCTCGAGATAGCAAAGCAATTATGGATAACCATGCATCATCAATGG
+ACATCGTCACAACTGATGCTAAGAGGAAGTGGGAAGAATATTCCAAGCAGGCAGAGCAAGATTCAGAGGACGGTTCTAAC
+TTTTCAGCAGCAAAACATTGTCGCATGGAACTCATGCTCCAACAGTGTGTAAACTCTGTTGATGCTACTTCTCAACAGTG
+GAAGAAGACACATGCATCTGTTAGTGAGATGAGCAGCAAACACGTTGCTGAAATTGAAGCACTTGTAAGGAGTGCCATTG
+AGAGCAATGATCAGCATGATGCTGAGGTTGCTTCAGCAAGAATGGCAGCAGAAGAGGATGTAGCGAAAAATAGCAAAGAT
+GTTCTTCAGCATTTTGATACTGTGATTGATCATGAGCGCAACTCAGCCGCTGGAGTGATGGCAGCAGTCGAAGCTCACTC
+AGCAACCCTGCATAAACTGCAAGAGGAACAATCAAGCCAGGCAACAGAGATTAATAGCCATGCGGAGGACACATTCCAAA
+ACACCTACATGGACTATGAACCAACGGGAGAAACCCCAACAAGGTCGGAACCAGATATACCAAGCAGGGGAACAATCGAA
+TCTCTTCGAGCCATGCCGATAGAAGCCCTCCTTGAAGAGTTCCGGGAGAACCATCCATACGAGTCCAAGGAGCCCAAACC
+GTCTCTCATACCACGCTCTCCACTCGTCCAGCTCAAC
+>contig_3
+NNTGTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAG
+AGCTCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATA
+CTTTCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGC
+ACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGT
+TACAGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAAC
b
diff -r a73c2e65098e -r 3384b6a842b0 test-data/39614.fna.aln
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/39614.fna.aln Mon Oct 30 09:52:00 2017 -0400
b
@@ -0,0 +1,99 @@
+>gnl_Orysa6.0_PACid_16878968
+ATG---------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+---TCACATATGGAAGCCTTCCAAAATGTTGTGCTCCTGCATAAAGCAAATTCAAATTCT
+ACACTAGAGGATATATCATCCCTATCTGCTGCAAGCTGTTGCAGCCTTGATCAGCTTCTA
+GCTTGTGTCGAGGGAGAGGCACAGAAGATATTTGGTGATATCCAGAATTTGCTAGCTGAT
+CATCGAAGCGAAGTGGCACATTTCACTCAAGAGTTGCGGGAGAGTTTCCGCATTAGCTTG
+GATAGGACGAAGGACATGTCTAGTTTCATCCTTGGGTTGTTCGATAAGTATGTGGAGGAA
+ACTTCGAAGTTGCAGAGCCACTCCAATCACACACATGAAGCACAAGTCAAAAGCCTTGAA
+GATTTCCAGAAGGCTTATGAGGAGCAATCAAAATCAGAAGAACAAAAGCTTCTGGCGGAC
+ATCACCAGTTTGGTTTCTAAACACGTTACTCGACAACGAGAACTGGTGGGTGGTAGACTA
+AACTCTCTTGGTGACGCCGCTCGTGGAAACAAAGCATTTTTGGATGAGCACACGTCCGCC
+ATGGAGGTGGTCACGAAGGACGCCAAGAGAAAGTGGGAAATGTTTGCAGAGCAGGCAGAG
+AATGACTGCAAAGTTGGGTCCAACTTCTCTGCAGCTAAGCATTGTCGCATGGAAACCATT
+CTGCAGGAATGTGCATGCACCGTCGACACTGCTGCTCAACAATGGAAAGCATCACATGCA
+ACTGTTAACGATCTATGCAGAAAACAAATAGCTGAAGTTGAAGCACTCGTCAGGAGTGCA
+ATCGAAACCAACGAGCAGCACGAAGCAGAGATTGCATCTTCCCGTGCCACGGCCGAGGAG
+CATGCGTCCAACAGCAGCAAGGACCTACTCCAAGATGTTGACAATATGCTGCAGGAGGCG
+CGCAATTCGTCGTCGAGAGTGGTGTCGACGGTGGAAGCTCATTTGGGAGAGAGCCAGCAT
+CTACAGGAGAGCCACTCCAGCCATACCGCCGGCATCAACACCCACGCCGACAACGCTTTC
+CAGAGCAGCTACAAGGACTACGAGCCGACCGGCGAAACTCCGGTGAGGTCGGAGCCGGAG
+GTGCCGAGCAAAGACGCGATCGAGTCGCTGCGAGCGATGCCGATGGAGTCCCTGATGGAC
+GAGTTCCGCGAGAACCACCCCTACGAGCCGAGC---------AAGGACCGCAGGCCATCG
+CTCATCCCTCGCTCGCCGCTCGCCACCATCAACAAC
+>gnl_Phoda3.0_PDK_30s1023721g001
+GTAAACCAAAAAATGATGAAATGTACATTAATCAAAGATCTCTATGGAGAAATTGAGCGT
+CTAAAAGCAGAGGTGTATGCTGCTCGTGAGAAAGTTGGAGTTTACATACCAAAAGAACGC
+TACCATCAAGAAGAGAGCGAACGGAAGGCAATGGCAGAACAAATTGAACAAATGGGGGTC
+TTGCTCGAAAACAATCAAAAGCAAATTGAGGATCTACAAGAAAGGTATAATACTCAACTT
+CAACAGTCTGATGACCTGAGCAAAAAGCTTGATGCCACCGAGATTCTCTGTGTTTCTCTG
+AGCAAAAAGCTTGATGCCACCGAGAAAAGTTTGGAGCACACTAGCAAGTTATTGGCTGCT
+GCCAGAGAAGATCTGAAGCAAGCTCAGTATACTCTGAAGGAGAAAGATTTTGTTATATCA
+GAGCAGAGGAAAGCAGCTAGAGAAGACAAACTGAATACTGCCAACAGATCTATTGTGAAC
+AATTTTCGGGCTGATCTTGCAACAAGGGTTGGAACACTTTGTAATACTGTTGTTGCATCC
+TTGGATCGGCAAAATGAACACCTTCAGTCTGTTGAGAAACTATGTCAATCTAGCCTTGAT
+TTCCATGACAAGGCAGTATCAGAGCTGAAAAGGAAAGTGTCAGCTTCAAGAGCTTTGTAT
+ACTTCCCATATGGAAGCACTACAAAATGTAGTGCGTTTGCATAAGGCAAGCAGCAATGCC
+AGCTTAGAAGAGATGTCATCCATGATTTCTGCCAATACCTGCTCTCTTGATCAGTTACTT
+GCCTTGGGGCAAAGCGAAGCAGATCTGATTTTTAGTGATCTGCAAAGCATATTGTCAATT
+CACCGAGGAGAGATTGCAAATTTCACCCGTGAACTTCGTGAGAAATTTCAAGTTAATTTG
+GATCGGACAAAGGAGATGTCCAATTTTATTCTTGAGCTGCTTGAAAAGATAGGGAAGGGA
+ACAAAAGAATTTCAGAATGACTCAACTTTGGTACATGAGGCTCAGGTGAAGAGCATTGGT
+GATTTCCAAAAGGCATATGAG---------------------------------------
+---------------------------------------------GTGGAAGTGAGGCTC
+ACTGGACTGGGAGATGCTGCTCGAGATAGCAAAGCAATTATGGATAACCATGCATCATCA
+ATGGACATCGTCACAACTGATGCTAAGAGGAAGTGGGAAGAATATTCCAAGCAGGCAGAG
+CAAGATTCAGAGGACGGTTCTAACTTTTCAGCAGCAAAACATTGTCGCATGGAACTCATG
+CTCCAACAGTGTGTAAACTCTGTTGATGCTACTTCTCAACAGTGGAAGAAGACACATGCA
+TCTGTTAGTGAGATGAGCAGCAAACACGTTGCTGAAATTGAAGCACTTGTAAGGAGTGCC
+ATTGAGAGCAATGATCAGCATGATGCTGAGGTTGCTTCAGCAAGAATGGCAGCAGAAGAG
+GATGTAGCGAAAAATAGCAAAGATGTTCTTCAGCATTTTGATACTGTGATTGATCATGAG
+CGCAACTCAGCCGCTGGAGTGATGGCAGCAGTCGAAGCTCACTCAGCAACCCTGCATAAA
+CTGCAAGAGGAACAATCAAGCCAGGCAACAGAGATTAATAGCCATGCGGAGGACACATTC
+CAAAACACCTACATGGACTATGAACCAACGGGAGAAACCCCAACAAGGTCGGAACCAGAT
+ATACCAAGCAGGGGAACAATCGAATCTCTTCGAGCCATGCCGATAGAAGCCCTCCTTGAA
+GAGTTCCGGGAGAACCATCCATACGAGTCC------------AAGGAGCCCAAACCGTCT
+CTCATACCACGCTCTCCACTCGTCCAGCTCAAC---
+>contig_3
+NNT---------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------GTGGAT---------------------------------
+---------------------------------------GAAGGAGTTGTT---------
+------------------------------------------------------------
+------------------------------------GTTGCTGGCTTGTCAGAGCAGGAG
+AAGGCATCTGTTTCTGAAATTCTGACAACTGCTAGAGCTCATTCAGAAACAATTGAGAAC
+CTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATACTTTC
+AGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGAT
+ATTCCCAGCAAAGGCACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAA
+GAATTTCGAGAAAACCATTCATACGAATCTGCTGTTACAGGAAAGGAACTAATGCCGTCT
+GTTACGACTCGTGCACCATTTTCACAGATCAAC---
b
diff -r a73c2e65098e -r 3384b6a842b0 utils.py
--- a/utils.py Fri Aug 25 13:01:25 2017 -0400
+++ b/utils.py Mon Oct 30 09:52:00 2017 -0400
[
@@ -27,7 +27,7 @@
     return fstderr, fherr, fstdout, fhout
 
 
-def move_directory_files(source_dir, destination_dir, copy=False):
+def move_directory_files(source_dir, destination_dir, copy=False, remove_source_dir=False):
     source_directory = os.path.abspath(source_dir)
     destination_directory = os.path.abspath(destination_dir)
     if not os.path.isdir(destination_directory):
@@ -38,6 +38,8 @@
             shutil.copy(source_entry, destination_directory)
         else:
             shutil.move(source_entry, destination_directory)
+    if remove_source_dir:
+        os.rmdir(source_directory)
 
 
 def run_command(cmd):
@@ -52,29 +54,3 @@
 
 def stop_err(msg):
     sys.exit(msg)
-
-
-def write_html_output(output, title, dir):
-    with open(output, 'w') as fh:
-        dir_items = sorted(os.listdir(dir))
-        # Directories can only contain either files or directories,
-        # but not both.
-        if len(dir_items) > 0:
-            item_path = os.path.join(dir, dir_items[0])
-            if os.path.isdir(item_path):
-                header = 'Directories'
-            else:
-                header = 'Datasets'
-        else:
-            header = ''
-        fh.write('<html><head><h3>%s: %d items</h3></head>\n' % (title, len(dir_items)))
-        fh.write('<body><p/><table cellpadding="2">\n')
-        fh.write('<tr><b>%s</th></b>\n' % header)
-        for index, fname in enumerate(dir_items):
-            if index % 2 == 0:
-                bgcolor = '#D8D8D8'
-            else:
-                bgcolor = '#FFFFFF'
-            link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname)
-            fh.write('<tr bgcolor="%s"><td>%s</td></tr>\n' % (bgcolor, link))
-        fh.write('</table></body></html>\n')