Repository 'plant_tribes_assembly_post_processor'
hg clone https://toolshed.g2.bx.psu.edu/repos/greg/plant_tribes_assembly_post_processor

Changeset 0:fcc558568020 (2017-06-08)
Next changeset 1:cc94680dbc43 (2017-06-20)
Commit message:
Uploaded
added:
.shed.yml
assembly_post_processor.py
assembly_post_processor.xml
macros.xml
plant_tribes_scaffolds.loc
plant_tribes_scaffolds.loc.sample
test-data/arabidopsis_thaliana.smat
test-data/assembly.fasta
test-data/assembly_tgf.fasta
test-data/output.pttgf
test-data/target_orthos.ids
test-data/transcripts.cds
test-data/transcripts.cleaned.cds
test-data/transcripts.cleaned.nr.cds
test-data/transcripts.cleaned.nr.pep
test-data/transcripts.cleaned.pep
test-data/transcripts.cleaned_tgf.cds
test-data/transcripts.cleaned_tgf.pep
test-data/transcripts.pep
test-data/transcripts2.cds
test-data/transcripts2.pep
test-data/transcripts_tgf.cds
test-data/transcripts_tgf.cleaned.nr.cds
test-data/transcripts_tgf.cleaned.nr.pep
test-data/transcripts_tgf.pep
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
utils.py
b
diff -r 000000000000 -r fcc558568020 .shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,15 @@
+name: plant_tribes_assembly_post_processor
+owner: greg
+description: |
+  Contains a tool that postprocesses de novo assembly transcripts into putative coding sequences and their
+  corresponding amino acid translations, locally assembling targeted gene families.
+homepage_url: https://github.com/dePamphilis/PlantTribes
+long_description: |
+  Contains a tool that is one of the PlantTribes collection of automated modular analysis pipelines that
+  utilize objective classifications of complete protein sequences from sequenced plant genomes to perform
+  comparative evolutionary studies. It postprocesses de novo assembly transcripts into putative coding
+  sequences and their corresponding amino acid translations, locally assembling targeted gene families.
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/assembly_post_processor
+type: unrestricted
+categories:
+- Phylogenetics
b
diff -r 000000000000 -r fcc558568020 assembly_post_processor.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/assembly_post_processor.py Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+import argparse
+import os
+import shutil
+
+import utils
+
+OUTPUT_DIR = 'assemblyPostProcessing_dir'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--dereplicate', dest='dereplicate', default=None, help='Remove duplicate sequences')
+parser.add_argument('--gap_trimming', dest='gap_trimming', type=float, default=0, help='Trim alignments')
+parser.add_argument('--gene_family_search', dest='gene_family_search', default=None, help='Targeted gene families')
+parser.add_argument('--method', dest='method', default=None, help='Protein clustering method')
+parser.add_argument('--min_length', dest='min_length', type=int, default=0, help='Minimum sequence length')
+parser.add_argument('--num_threads', dest='num_threads', type=int, help='Number of processors')
+parser.add_argument('--output_pttgf', dest='output_pttgf', default=None, help='Primary targeted gene families dataset')
+parser.add_argument('--output_cds', dest='output_cds', help='Output transcripts.cds')
+parser.add_argument('--output_cleaned_cds', dest='output_cleaned_cds', help='Output transcripts.cleaned.cds')
+parser.add_argument('--output_cleaned_nr_cds', dest='output_cleaned_nr_cds', default=None, help='Output transcripts.cleaned.nr.cds')
+parser.add_argument('--output_cleaned_nr_pep', dest='output_cleaned_nr_pep', default=None, help='Output transcripts.cleaned.nr.pep')
+parser.add_argument('--output_cleaned_pep', dest='output_cleaned_pep', help='Output transcripts.cleaned.pep')
+parser.add_argument('--output_pep', dest='output_pep', help='Output transcripts.pep')
+parser.add_argument('--output_pttgf_dir', dest='output_pttgf_dir', default=None, help='Directory hierarchy of targeted gene family datasets')
+parser.add_argument('--prediction_method', dest='prediction_method', help='Coding regions prediction method')
+parser.add_argument('--scaffold', dest='scaffold', default=None, help='Gene family scaffold')
+parser.add_argument('--score_matrices', dest='score_matrices', default=None, help='Scores matrices')
+parser.add_argument('--strand_specific', dest='strand_specific', default=None, help='Strand-specific assembly')
+parser.add_argument('--transcripts', dest='transcripts', help='Transcriptome assembly fasta file')
+
+args = parser.parse_args()
+
+# Build the command line.
+cmd = 'AssemblyPostProcessor'
+if args.dereplicate is not None:
+    cmd += ' --dereplicate'
+if args.gap_trimming > 0:
+    cmd += ' --gap_trimming %4f' % args.gap_trimming
+if args.gene_family_search is not None:
+    cmd += ' --gene_family_search %s' % args.gene_family_search
+if args.method is not None:
+    cmd += ' --method %s' % args.method
+if args.min_length > 0:
+    cmd += ' --min_length %d' % args.min_length
+cmd += ' --num_threads %d' % args.num_threads
+cmd += ' --prediction_method %s' % args.prediction_method
+if args.scaffold is not None:
+    cmd += ' --scaffold %s' % args.scaffold
+if args.score_matrices is not None:
+    cmd += ' --score_matrices %s' % args.score_matrices
+if args.strand_specific is not None:
+    cmd += ' --strand_specific'
+cmd += ' --transcripts %s' % args.transcripts
+# Run the command.
+utils.run_command(cmd)
+
+# Handle outputs.
+shutil.move(os.path.join(OUTPUT_DIR, 'transcripts.cds'), args.output_cds)
+shutil.move(os.path.join(OUTPUT_DIR, 'transcripts.cleaned.cds'), args.output_cleaned_cds)
+if args.output_cleaned_nr_cds is not None:
+    shutil.move(os.path.join(OUTPUT_DIR, 'transcripts.cleaned.nr.cds'), args.output_cleaned_nr_cds)
+if args.output_cleaned_nr_pep is not None:
+    shutil.move(os.path.join(OUTPUT_DIR, 'transcripts.cleaned.nr.pep'), args.output_cleaned_nr_pep)
+shutil.move(os.path.join(OUTPUT_DIR, 'transcripts.cleaned.pep'), args.output_cleaned_pep)
+shutil.move(os.path.join(OUTPUT_DIR, 'transcripts.pep'), args.output_pep)
+if args.output_pttgf is not None and args.output_pttgf_dir is not None:
+    src_output_dir = os.path.join(OUTPUT_DIR, 'targeted_gene_families')
+    utils.move_directory_files(src_output_dir, args.output_pttgf_dir)
+    utils.write_html_output(args.output_pttgf, 'Targeted gene families', args.output_pttgf_dir)
\ No newline at end of file
b
diff -r 000000000000 -r fcc558568020 assembly_post_processor.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/assembly_post_processor.xml Thu Jun 08 12:44:09 2017 -0400
[
b'@@ -0,0 +1,288 @@\n+<tool id="plant_tribes_assembly_post_processor" name="AssemblyPostProcessor" version="@WRAPPER_VERSION@.0">\n+    <description>post-processes de novo transcriptome assembly</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <expand macro="requirements_assembly_post_processor" />\n+    <command detect_errors="exit_code"><![CDATA[\n+python $__tool_directory__/assembly_post_processor.py\n+--transcripts \'$input\'\n+--prediction_method $prediction_method_cond.prediction_method\n+#if str($prediction_method_cond.prediction_method) == \'estscan\':\n+    --score_matrices \'$score_matrices\'\n+#end if\n+#if str($options_type.options_type_selector) == \'advanced\':\n+    #set target_gene_family_assembly_cond = $options_type.target_gene_family_assembly_cond\n+    #if str($target_gene_family_assembly_cond.target_gene_family_assembly) == \'yes\':\n+        --gene_family_search \'$target_gene_family_assembly_cond.orthogroups\'\n+        --output_pttgf $output_pttgf\n+        --output_pttgf_dir $output_pttgf.files_path\n+        --scaffold \'$target_gene_family_assembly_cond.scaffold.fields.path\'\n+        --method \'$target_gene_family_assembly_cond.method\'\n+        --gap_trimming $target_gene_family_assembly_cond.gap_trimming\n+    #end if\n+    #if str($options_type.strand_specific) == \'yes\':\n+        --strand_specific \'true\'\n+    #end if\n+    #if str($options_type.dereplicate) == \'yes\':\n+        --dereplicate \'true\'\n+        --output_cleaned_nr_cds \'$output_cleaned_nr_cds\'\n+        --output_cleaned_nr_pep \'$output_cleaned_nr_pep\'\n+    #end if\n+    --min_length $options_type.min_length\n+#end if\n+--num_threads \\${GALAXY_SLOTS:-4}\n+--output_cds \'$output_cds\'\n+--output_cleaned_cds \'$output_cleaned_cds\'\n+--output_cleaned_pep \'$output_cleaned_pep\'\n+--output_pep \'$output_pep\'\n+    ]]></command>\n+    <inputs>\n+        <param name="input" format="fasta" type="data" label="Transcriptome assembly fasta file"/>\n+        <conditional name="prediction_method_cond">\n+            <param name="prediction_method" type="select" label="Coding regions prediction method">\n+                <option value="transdecoder" selected="true">TransDecoder</option>\n+                <option value="estscan">ESTScan</option>\n+            </param>\n+            <when value="transdecoder" />\n+            <when value="estscan">\n+                <param name="score_matrices" format="smat" type="data" label="Scores matrices"/>\n+            </when>\n+        </conditional>\n+        <conditional name="options_type">\n+            <param name="options_type_selector" type="select" label="Options configuration">\n+                <option value="basic" selected="true">Basic</option>\n+                <option value="advanced">Advanced</option>\n+            </param>\n+            <when value="basic" />\n+            <when value="advanced">\n+                <conditional name="target_gene_family_assembly_cond">\n+                    <param name="target_gene_family_assembly" type="select" label="Perform targeted gene assembly?">\n+                        <option value="no" selected="true">No</option>\n+                        <option value="yes">Yes</option>\n+                    </param>\n+                    <when value="no" />\n+                    <when value="yes">\n+                        <param name="orthogroups" format="tabular" type="data" label="Targeted gene families"/>\n+                        <param name="scaffold" type="select" label="Gene family scaffold">\n+                            <options from_data_table="plant_tribes_scaffolds" />\n+                            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table."/>\n+                        </param>\n+                        <param name="method" type="select" label="Protein clustering method">\n+                            <option value="gfam" selected="true">GF'..b'eference generation and analysis},\n+            year = {2013},\n+            volume = {8},\n+            number = {8},\n+            pages = {1494-1512},}\n+        </citation>\n+        <citation type="bibtex">\n+            @article{Iseli1999,\n+            journal = {ISMB},\n+            author = {4. Iseli C, Jongeneel CV, Bucher P},\n+            title = {ESTScan: a program for detecting, evaluating, and reconstructing potential coding regions in EST sequences},\n+            year = {1999},\n+            volume = {99},\n+            pages = {138-148},\n+            url = {http://estscan.sourceforge.net},}\n+        </citation>\n+        <citation type="bibtex">\n+            @article{Huang1999,\n+            journal = {Genome Research},\n+            author = {5. Huang X, Madan A},\n+            title = {CAP3: A DNA sequence assembly program},\n+            year = {1999},\n+            volume = {9},\n+            number = {9},\n+            pages = {868-877},\n+            url = {http://seq.cs.iastate.edu/cap3.html},}\n+        </citation>\n+        <citation type="bibtex">\n+            @article{Eddy2009,\n+            journal = {Genome Inform},\n+            author = {6. Eddy SR},\n+            title = {A new generation of homology search tools based on probabilistic inference},\n+            year = {2009},\n+            volume = {23},\n+            number = {1},\n+            pages = {205-211},}\n+        </citation>\n+        <citation type="bibtex">\n+            @article{Katoh2013,\n+            journal = {Molecular biology and evolution},\n+            author = {7. Katoh K, Standley DM},\n+            title = {MAFFT multiple sequence alignment software version 7: improvements in performance and usability},\n+            year = {2013},\n+            volume = {30},\n+            number = {4},\n+            pages = {772-780},}\n+        </citation>\n+        <citation type="bibtex">\n+            @article{Sasidharan2012,\n+            journal = {Nucleic Acids Research},\n+            author = {8. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},\n+            title = {GFam: a platform for automatic annotation of gene families},\n+            year = {2012},\n+            pages = {gks631},}\n+        </citation>\n+        <citation type="bibtex">\n+            @article{Li2003,\n+            journal = {Genome Research}\n+            author = {9. Li L, Stoeckert CJ, Roos DS},\n+            title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},\n+            year = {2003},\n+            volume = {13},\n+            number = {9},\n+            pages = {2178-2189},}\n+        </citation>\n+        <citation type="bibtex">\n+            @article{Emms2015,\n+            journal = {Genome Biology}\n+            author = {10. Emms DM, Kelly S},\n+            title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},\n+            year = {2015},\n+            volume = {16},\n+            number = {1},\n+            pages = {157},}\n+        </citation>\n+        <citation type="bibtex">\n+            @article{Capella-Gutierrez2009,\n+            journal = {Bioinformatics,},\n+            author = {11. Capella-Gutierrez S, Silla-Mart\xc3\xadnez JM, Gabald\xc3\xb3n T},\n+            title = {trimAl: a tool for automated alignment trimming in large-scale phylogenetic analyses},\n+            year = {2009},\n+            volume = {25},\n+            number = {15},\n+            pages = {1972-1973},}\n+        </citation>\n+        <citation type="bibtex">\n+            @article{Gremme2013,\n+            journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics},\n+            author = {12. Gremme G, Steinbiss S, Kurtz S},\n+            title = {GenomeTools: a comprehensive software library for efficient processing of structured genome annotations},\n+            year = {2013},\n+            volume = {10},\n+            number = {3},\n+            pages = {645-656},}\n+        </citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r fcc558568020 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,130 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <token name="@WRAPPER_VERSION@">1.0</token>
+    <xml name="requirements_assembly_post_processor">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_assembly_post_processor</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_aligner">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_aligner</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_classifier">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_classifier</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_integrator">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_integrator</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_kaks_analysis">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_kaks_analysis</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_ks_distribution">
+        <requirements>
+            <requirement type="package" version="1.3.0">r-optparse</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_phylogeny_builder">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_phylogeny_builder</requirement>
+        </requirements>
+    </xml>
+    <xml name="param_codon_alignments">
+        <param name="codon_alignments" type="select" label="Codon alignments">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+        </param>
+    </xml>
+    <xml name="param_method">
+        <param name="method" type="select" label="Protein clustering method">
+            <option value="gfam" selected="true">GFam</option>
+            <option value="orthofinder">OrthoFinder</option>
+            <option value="orthomcl">OrthoMCL</option>
+        </param>
+    </xml>
+    <xml name="param_options_type">
+        <param name="options_type" type="select" label="Options Configuration">
+            <option value="basic" selected="true">Basic</option>
+            <option value="advanced">Advanced</option>
+        </param>
+    </xml>
+    <xml name="param_orthogroup_fna">
+        <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+        </param>
+    </xml>
+    <xml name="param_scaffold">
+        <param name="scaffold" type="select" label="Gene family scaffold">
+            <options from_data_table="plant_tribes_scaffolds" />
+            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." />
+        </param>
+    </xml>
+    <xml name="param_sequence_type">
+        <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference (dna)">
+            <option value="protein" selected="true">Amino acid based</option>
+            <option value="dna">Nucleotide based</option>
+        </param>
+    </xml>
+    <xml name="cond_alignment_method">
+        <conditional name="alignment_method_cond">
+            <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method">
+                <option value="mafft" selected="true">MAFFT</option>
+                <option value="pasta">PASTA</option>
+            </param>
+            <when value="mafft" />
+            <when value="pasta">
+                <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="cond_remove_gappy_sequences">
+        <conditional name="remove_gappy_sequences_cond">
+            <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no" />
+            <when value="yes">
+                <conditional name="trim_type_cond">
+                    <param name="trim_type" type="select" label="Trimming method">
+                        <option value="gap_trimming" selected="true">Gap score based trimming</option>
+                        <option value="automated_trimming">Automated heuristic trimming</option>
+                    </param>
+                    <when value="gap_trimming">
+                        <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" />
+                    </when>
+                    <when value="automated_trimming" />
+                </conditional>
+                <conditional name="remove_sequences_with_gaps_cond">
+                    <param name="remove_sequences_with_gaps" type="select" label="Remove sequences">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" />
+                        <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" />
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="citation1">
+        <citation type="bibtex">
+            @misc{None,
+            journal = {None},
+            author = {1. Wafula EK},
+            title = {Manuscript in preparation},
+            year = {None},
+            url = {https://github.com/dePamphilis/PlantTribes},}
+        </citation>
+    </xml>
+</macros>
b
diff -r 000000000000 -r fcc558568020 plant_tribes_scaffolds.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,3 @@
+## Plant Tribes scaffolds
+#Value Name Path Description
+22Gv1.1 22Gv1.1 ${__HERE__}/test-data/tool-data/plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
b
diff -r 000000000000 -r fcc558568020 plant_tribes_scaffolds.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc.sample Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,4 @@
+## Plant Tribes scaffolds
+#Value Name Path Description
+#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0)
+#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
b
diff -r 000000000000 -r fcc558568020 test-data/arabidopsis_thaliana.smat
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/arabidopsis_thaliana.smat Thu Jun 08 12:44:09 2017 -0400
b
b'@@ -0,0 +1,8248 @@\n+FORMAT: at.conf CODING REGION 6 3 1 s C+G: 0 42\n+-2     1      4      -3    \n+1      1      0      -2    \n+1      0      2      -4    \n+-3     1      4      -4    \n+1      -1     2      -3    \n+1      0      2      -4    \n+1      0      1      -2    \n+-3     2      3      -2    \n+0      -1     3      -3    \n+1      -1     -2     0     \n+2      0      0      -3    \n+-2     -1     3      -1    \n+-1     0      2      -1    \n+1      0      1      -2    \n+0      -1     4      -4    \n+-3     2      4      -3    \n+-1     1      3      -4    \n+2      0      -1     -1    \n+2      1      -1     -2    \n+-3     2      3      -2    \n+1      -1     2      -3    \n+2      -4     2      -3    \n+1      -2     4      -4    \n+-2     1      3      -2    \n+0      -1     3      -3    \n+1      1      -1     -2    \n+2      0      -2     -1    \n+-3     1      3      -1    \n+0      0      3      -3    \n+1      -1     0      -1    \n+2      -1     1      -4    \n+-3     2      4      -4    \n+-1     -1     5      -5    \n+2      -2     1      -2    \n+2      0      0      -3    \n+-3     0      4      -2    \n+1      -2     3      -3    \n+3      -4     1      -3    \n+1      -3     2      -2    \n+-2     1      3      -2    \n+1      -3     3      -3    \n+1      -1     0      -1    \n+3      -2     -2     -2    \n+-3     0      3      -1    \n+0      -1     3      -3    \n+2      -1     0      -1    \n+2      -3     2      -3    \n+-2     1      4      -2    \n+1      -1     1      -2    \n+2      -1     0      -2    \n+2      2      -3     -2    \n+-3     1      4      -3    \n+1      -2     2      -2    \n+2      -2     2      -3    \n+2      -2     1      -3    \n+-3     2      3      -2    \n+1      -4     1      1     \n+2      -1     -1     -1    \n+1      -1     2      -2    \n+-3     0      4      -1    \n+-2     1      3      -3    \n+1      -1     0      -1    \n+0      0      3      -4    \n+-3     1      4      -4    \n+-1     -1     4      -4    \n+3      -2     -1     -3    \n+2      -1     1      -3    \n+-2     1      4      -4    \n+1      -2     2      -2    \n+3      -6     2      -4    \n+1      -2     2      -2    \n+-2     1      3      -1    \n+0      -3     4      -3    \n+2      -1     -1     0     \n+2      -1     0      -2    \n+-1     -1     3      -1    \n+0      -1     2      -1    \n+2      -1     0      -2    \n+1      -2     3      -2    \n+-3     1      4      -3    \n+1      0      3      -4    \n+2      2      -1     -3    \n+2      1      -2     -3    \n+-3     2      3      -2    \n+2      -1     -1     -2    \n+3      -5     1      -2    \n+2      -3     2      -3    \n+0      0      2      -1    \n+0      -2     3      -2    \n+1      0      1      -2    \n+2      -2     -2     0     \n+-2     1      3      -2    \n+2      -1     2      -4    \n+2      -1     0      -1    \n+2      -1     1      -4    \n+-2     2      3      -3    \n+-1     -2     5      -4    \n+1      -2     3      -4    \n+2      -1     1      -3    \n+-2     -1     4      -3    \n+1      -3     3      -3    \n+2      -7     3      -3    \n+1      -2     2      -2    \n+-2     0      3      -1    \n+1      -3     3      -2    \n+0      -1     3      -2    \n+3      -2     -2     0     \n+-3     -1     4      -1    \n+0      -1     3      -2    \n+1      -2     2      -2    \n+2      -3     3      -4    \n+-3     0      4      -2    \n+0      2      0      -2    \n+2      -2     0      -2    \n+2      -2     -2     0     \n+-2     0      4      -3    \n+1      -2     2      -2    \n+3      -4     0      -2    \n+1      -2     2      -3    \n+-3     1      3      -1    \n+-1     -2     0      2     \n+3      -2     -1     -2    \n+2      -1     1      -3    \n+-2     -1     4      -1    \n+0      0      2      -2    \n+2      -1     -3     0     \n+1      -1     2      -2    \n+-3     0      5      -3    \n+-1     1      3      -4    \n+2      0      -1     -2    \n+3      1      -1     -4    \n+-3     3      3      -3    \n+1      -1     1      -2    \n+3      -3     1      -4    \n+2      -2     1      -4    \n+-2     1      2'..b' -3     -1    \n+-1     1      -1     1     \n+1      0      -4     1     \n+1      -1     -4     2     \n+-1     1      0      0     \n+-4     2      -3     3     \n+0      -2     0      1     \n+1      0      -4     1     \n+0      1      -2     1     \n+-3     0      -1     3     \n+0      1      -2     1     \n+-1     -1     -4     3     \n+-1     2      -1     0     \n+-4     2      -1     2     \n+1      -1     0      0     \n+1      1      -3     1     \n+2      -1     -1     -1    \n+-3     1      -2     3     \n+2      -2     -1     0     \n+1      -2     -1     1     \n+1      -3     2      0     \n+-3     1      -1     3     \n+2      -4     -1     1     \n+-1     0      -4     3     \n+2      -4     -1     1     \n+-4     -1     -2     4     \n+2      -2     -1     1     \n+0      -1     -3     2     \n+1      -4     1      1     \n+-3     -1     -1     3     \n+1      -1     -3     1     \n+2      -1     -5     1     \n+1      0      -4     1     \n+0      0      -1     1     \n+1      0      -3     1     \n+1      -1     -3     1     \n+1      -2     -1     2     \n+-4     2      -1     2     \n+1      -2     -1     1     \n+1      -1     -5     2     \n+1      -2     -2     2     \n+-1     -3     1      2     \n+0      0      -2     2     \n+0      -1     -5     3     \n+-1     -2     -2     3     \n+-3     0      0      2     \n+2      -1     -1     -1    \n+1      1      -3     1     \n+2      -1     -2     0     \n+-1     0      -1     2     \n+1      -1     -2     1     \n+2      -2     -2     1     \n+1      -1     -1     1     \n+-2     1      -2     3     \n+0      -3     0      1     \n+-1     0      -6     3     \n+0      -3     2      0     \n+-2     -2     0      3     \n+1      -2     -3     2     \n+1      -2     -4     3     \n+0      -3     -2     3     \n+-1     -2     -1     2     \n+1      -1     -1     0     \n+1      1      -4     1     \n+2      -1     -2     0     \n+-2     2      -2     1     \n+1      0      -2     1     \n+1      0      -3     1     \n+1      -3     1      0     \n+-5     3      -4     2     \n+1      -3     -1     2     \n+-1     2      -4     2     \n+1      -2     -2     2     \n+-3     2      -2     2     \n+0      0      -2     2     \n+-2     0      -5     4     \n+0      -1     -1     2     \n+-5     4      -2     1     \n+1      -3     1      0     \n+0      0      -3     2     \n+2      -1     -1     0     \n+-3     0      0      2     \n+1      -4     1      1     \n+1      -2     -2     2     \n+0      -4     0      2     \n+-3     0      -1     3     \n+1      -4     0      1     \n+-1     -1     -4     3     \n+1      -3     -1     1     \n+-3     -1     0      3     \n+1      -2     -2     2     \n+-1     -2     -3     3     \n+0      -4     -1     3     \n+-3     -1     1      3     \n+1      -2     -1     1     \n+1      -1     -5     2     \n+0      -1     -1     1     \n+-2     0      -1     2     \n+0      -1     -1     1     \n+0      0      -3     2     \n+1      -2     -2     2     \n+-5     2      -3     3     \n+0      -3     -1     2     \n+0      -1     -5     3     \n+0      -3     -1     3     \n+-1     -3     -1     3     \n+0      -1     -1     2     \n+-1     -1     -5     3     \n+-1     -3     0      2     \n+-4     -1     -1     3     \n+FORMAT: at.conf START PROFILE 1 12 7 s C+G: 42 100\n+2      -3     0      0      \n+1      4      -4     -2     \n+6      -4     -1     -6     \n+8      -9     1      -9     \n+5      6      -14    -6     \n+6      -1     0      -11    \n+18     -100   -100   -100   \n+-38    -38    -39    18     \n+-49    -46    21     -44    \n+-3     -13    12     -10    \n+1      9      -4     -8     \n+-4     -7     6      2      \n+FORMAT: at.conf STOP PROFILE 1 12 6 s C+G: 42 100\n+0      0      2      -2     \n+1      1      -3     1      \n+-4     2      -1     3      \n+-59    -62    -50    18     \n+10     -66    10     -68    \n+15     -64    -2     -70    \n+4      -8     -1     1      \n+2      -2     -2     0      \n+3      -1     -4     0      \n+2      -2     -3     1      \n+2      -3     -3     2      \n+2      -2     -3     2      \n'
b
diff -r 000000000000 -r fcc558568020 test-data/assembly.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/assembly.fasta Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,73 @@
+>contig_1
+CGATTAACCATGTGTACAACATGACCAATTATGGCCGATCTTGCTTGTTCAGGAGTCTCA
+CTCATCATTCCAAAATCAAGAAAAGCAAGCTTCCCGTCAGGTGTAGCTAAGAGATTCCCT
+GGGTGAGGATCTGCATGAAAATAGCCGTACTCAAGCAGCTGTCGAAGACTGCACTGTATG
+CCGGTATTCACCAGATCCAGAACACTGAGCCCTTGACTCTCAACGGCATCTTGCTCATTT
+AATTTAACACCTTCAACCCACTCCATTGTTAGCACCTTTCCACTCGTGTAATCCCAGAAA
+ATATCTGGGACAAGGATATCTTCCTTGTCTCCATATAATTTTTTAA
+>contig_2
+CCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCC
+GGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCAT
+ACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCC
+CGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATG
+GGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCC
+GATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTAC
+AACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTG
+TACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACA
+ACTGTTAAATTATGTGCGCTTTGATGATTAAAAACACGGGGTAATTGAACTCAAGGGATG
+GTTGAGAATTGTAATATTTCTGTAAGTGTCGGGGGATGGTTGAAATGCTTTTTATTATGA
+AGTGTTAAAATGTAAGATAAA
+>contig_3
+CGGGGGCAAGGTTTACATGAAAGTACATCTACAGGACCTAGATAAGCATCATCACCTGAC
+AAAAAGCCATACATAGTAACACTTGCTAGTTGATCTGTGAAAATGGTGCACGAGTCGTAA
+CAGACGGCATTAGTTCCTTTCCTGTAACAGCAGATTCGTATGAATGGTTTTCTCGAAATT
+CTTCTTCAAGTGCATCTATAGGCATGGCACGAAGTGACTCTATTGTGCCTTTGCTGGGAA
+TATCCGGCTCGCTCCTAACTGGCGTGGACCCTGTAGGCTCGTAATCCATGTATTTTTGCC
+TGAAAGTATCATTCGTGTGCTGTTCGATACAGGATACCTGCTGGGAATGATCTCTCTTAA
+GGTTCTCAATTGTTTCTGAATGAGCTCTAGCAGTTGTCAGAATTTCAGAAACAGATGCCT
+TCTCCTGCTCTGACAAGCCAGCAACAACAACTCCTTCATCCACA
+>contig_4
+CTGACGATGTTCATATTCATGCCACTCTAAATGTATGCCATCCATGTTGAGGAGAAATGC
+TTGTGTAAAGAAGAAACTGGAGTCAAGCGGTCCCGGTTTTACAGTTGAACGTTGCTCTAA
+ATTAGTCAAAGTACCCGACTACCCGCTCATCAAACTGGCGAGCTCGTTTTCCTCGCCGCC
+CTACTCGGTGTCTTGGCGGGGCACATGGGGGTGGCGTGTGGCGTGCGGGGATGCGACAAC
+ATCATAAATTCATAATCGAAGG
+>contig_5
+GTGAGAGTGTCGCCTTTTCCATGCTACCCCTTTCCAGATCTGACTTGTTCGAACCTTCCA
+TCATCCGGTTCGACGTCAGCAGTCGGTCCTTCTTCTCCAAGCACGAGAGCAGACGGTGCG
+AGAGCGAGCAAAGACTCGCCTGCCGCGCCTCTCGTGACCTGGCTCGCATCTCCAGGATTC
+ATGGCTCCTTAGCTCGCCCTTCTTCGATACTTCGCTAGACGCCCGAGGACGTCATCGAAG
+GCGAGGGTCGGCACAACGGAGTGCTCCTTGCATGATGTCGGGGCCTGGCATCACGCGGCA
+TCGGCCAAATCGTCCCTCTCGACGTGCGTCACGGAGAGGGCCGAGCGCCGGGACGACCGC
+GTGGAAGAGCCCGCGGGATGCGGGATCCCGCTTGACGTGTGTTTGTGGCGGACCACCG
+>contig_6
+TATAACAATTCAAAATATTCCTATGAAAACCAAGACCAGAGGCAACACCGGTATAACCAA
+AGCCAGAGGCAATATCCCTTTATTCTTCGGGAACAGAAAAATACACGATATGCTACAGAC
+AAAAAAATAAATCCAAACCATTCTTCTGTGCATAAACATTGTAAAAGTTTATTAGACCGC
+TGTAAAACTCGTAAATGAAATCCCTGGCAATTGAATCTGTTAAACCTGCTCC
+>contig_7
+GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAAT
+AAATCCAATGAAAACGGAACCGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATG
+TTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCATCATCCAATGAAACATGTAGC
+AACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGCG
+AAACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACG
+>contig_8
+TGGCATGAAGTCGGCTCGACTCGTGCCCGCTCGTCTATAGACGAATTGAGCTAGATTCAC
+TCATTCCATGAACTCGACTCGTTCATGAGTCGGCTCGTAGTTGTCCGGACTCGCTCCATG
+GCCAGCTCTACAACAGACTGCCTGTATGTAGATGATTGTATTGATTTGTTCTTCTCTTGT
+TTAAATCCAACCAAATATCAACATGATATTTGCAATTTC
+>contig_9
+ACTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTT
+TGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCC
+TGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGTAG
+AATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAAC
+AGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAA
+ACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGC
+ACCTATGACTCAATATTTTGTCTTTGAGGCGGTTGA
+>contig_10
+GGGGCATTGCCGCCGATTAATTCGAGCGCGAGGTTTCGCGTATCAGGCGGAATTGGGATT
+TGGCGCCACAAAGAGCGCCACCTATGATTTCTGTGGCGGACGACTTCACGAACTCGGACG
+GGACTATCTAACCAATCTTCCAAGTTCATTGCGACATCAGTATAAGGGCGTGATGAAGGT
+TCGCTATCGTCGCTTGAGTTATTGATGGGGCCCAAATTGAGATCGAGGTTCATTGTAGTG
+GTGTTCTCTTCTGCCATTTGAGAATTTCACAAGTTCTAACAAACGAAAACGCAAATCTTC
+GGGACTAATATGCAGAATTTCCCTAAATAGAAGGGGTTTAAAA
b
diff -r 000000000000 -r fcc558568020 test-data/assembly_tgf.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/assembly_tgf.fasta Thu Jun 08 12:44:09 2017 -0400
b
b'@@ -0,0 +1,271 @@\n+>contig_1\n+CGATTAACCATGTGTACAACATGACCAATTATGGCCGATCTTGCTTGTTCAGGAGTCTCA\n+CTCATCATTCCAAAATCAAGAAAAGCAAGCTTCCCGTCAGGTGTAGCTAAGAGATTCCCT\n+GGGTGAGGATCTGCATGAAAATAGCCGTACTCAAGCAGCTGTCGAAGACTGCACTGTATG\n+CCGGTATTCACCAGATCCAGAACACTGAGCCCTTGACTCTCAACGGCATCTTGCTCATTT\n+AATTTAACACCTTCAACCCACTCCATTGTTAGCACCTTTCCACTCGTGTAATCCCAGAAA\n+ATATCTGGGACAAGGATATCTTCCTTGTCTCCATATAATTTTTTAA\n+>contig_2\n+CCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCC\n+GGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCAT\n+ACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCC\n+CGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATG\n+GGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCC\n+GATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTAC\n+AACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTG\n+TACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACA\n+ACTGTTAAATTATGTGCGCTTTGATGATTAAAAACACGGGGTAATTGAACTCAAGGGATG\n+GTTGAGAATTGTAATATTTCTGTAAGTGTCGGGGGATGGTTGAAATGCTTTTTATTATGA\n+AGTGTTAAAATGTAAGATAAA\n+>contig_3\n+CGGGGGCAAGGTTTACATGAAAGTACATCTACAGGACCTAGATAAGCATCATCACCTGAC\n+AAAAAGCCATACATAGTAACACTTGCTAGTTGATCTGTGAAAATGGTGCACGAGTCGTAA\n+CAGACGGCATTAGTTCCTTTCCTGTAACAGCAGATTCGTATGAATGGTTTTCTCGAAATT\n+CTTCTTCAAGTGCATCTATAGGCATGGCACGAAGTGACTCTATTGTGCCTTTGCTGGGAA\n+TATCCGGCTCGCTCCTAACTGGCGTGGACCCTGTAGGCTCGTAATCCATGTATTTTTGCC\n+TGAAAGTATCATTCGTGTGCTGTTCGATACAGGATACCTGCTGGGAATGATCTCTCTTAA\n+GGTTCTCAATTGTTTCTGAATGAGCTCTAGCAGTTGTCAGAATTTCAGAAACAGATGCCT\n+TCTCCTGCTCTGACAAGCCAGCAACAACAACTCCTTCATCCACA\n+>contig_4\n+CTGACGATGTTCATATTCATGCCACTCTAAATGTATGCCATCCATGTTGAGGAGAAATGC\n+TTGTGTAAAGAAGAAACTGGAGTCAAGCGGTCCCGGTTTTACAGTTGAACGTTGCTCTAA\n+ATTAGTCAAAGTACCCGACTACCCGCTCATCAAACTGGCGAGCTCGTTTTCCTCGCCGCC\n+CTACTCGGTGTCTTGGCGGGGCACATGGGGGTGGCGTGTGGCGTGCGGGGATGCGACAAC\n+ATCATAAATTCATAATCGAAGG\n+>contig_5\n+GTGAGAGTGTCGCCTTTTCCATGCTACCCCTTTCCAGATCTGACTTGTTCGAACCTTCCA\n+TCATCCGGTTCGACGTCAGCAGTCGGTCCTTCTTCTCCAAGCACGAGAGCAGACGGTGCG\n+AGAGCGAGCAAAGACTCGCCTGCCGCGCCTCTCGTGACCTGGCTCGCATCTCCAGGATTC\n+ATGGCTCCTTAGCTCGCCCTTCTTCGATACTTCGCTAGACGCCCGAGGACGTCATCGAAG\n+GCGAGGGTCGGCACAACGGAGTGCTCCTTGCATGATGTCGGGGCCTGGCATCACGCGGCA\n+TCGGCCAAATCGTCCCTCTCGACGTGCGTCACGGAGAGGGCCGAGCGCCGGGACGACCGC\n+GTGGAAGAGCCCGCGGGATGCGGGATCCCGCTTGACGTGTGTTTGTGGCGGACCACCG\n+>contig_6\n+TATAACAATTCAAAATATTCCTATGAAAACCAAGACCAGAGGCAACACCGGTATAACCAA\n+AGCCAGAGGCAATATCCCTTTATTCTTCGGGAACAGAAAAATACACGATATGCTACAGAC\n+AAAAAAATAAATCCAAACCATTCTTCTGTGCATAAACATTGTAAAAGTTTATTAGACCGC\n+TGTAAAACTCGTAAATGAAATCCCTGGCAATTGAATCTGTTAAACCTGCTCC\n+>contig_7\n+GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAAT\n+AAATCCAATGAAAACGGAACCGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATG\n+TTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCATCATCCAATGAAACATGTAGC\n+AACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGCG\n+AAACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACG\n+>contig_8\n+TGGCATGAAGTCGGCTCGACTCGTGCCCGCTCGTCTATAGACGAATTGAGCTAGATTCAC\n+TCATTCCATGAACTCGACTCGTTCATGAGTCGGCTCGTAGTTGTCCGGACTCGCTCCATG\n+GCCAGCTCTACAACAGACTGCCTGTATGTAGATGATTGTATTGATTTGTTCTTCTCTTGT\n+TTAAATCCAACCAAATATCAACATGATATTTGCAATTTC\n+>contig_9\n+ACTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTT\n+TGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCC\n+TGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGTAG\n+AATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAAC\n+AGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAA\n+ACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGC\n+ACCTATGACTCAATATTTTGTCTTTGAGGCGGTTGA\n+>contig_10\n+GGGGCATTGCCGCCGATTAATTCGAGCGCGAGGTTTCGCGTATCAGGCGGAATTGGGATT\n+TGGCGCCACAAAGAGCGCCACCTATGATTTCTGTGGCGGACGACTTCACGAACTCGGACG\n+GGACTATCTAACCAATCTTCCAAGTTCATTGCGACATCAGTATAAGGGCGTGATGAAGGT\n+TCGCTATCGTCGCTTGAGTTATTGATGGGGCCCAAATTGAGATCGAGGTTCATTGTAGTG\n+GTGTTCTCTTCTGCCATTTGAGAATTTCACAAGTTCTAACAAACGAAAACGCAAATCTTC\n+GGGACTAATATGCAGAATTTCCCTAAATAGAAGGGGTTTAAAA\n+>contig_11\n+CATTTATGGAAAGGGGATTCAAAATATGAGAACACTCTGGGGTGGCGTTGCTCCCGAATC\n+TCCCGTCGTTGTTGTTGGAGGCGGTGTTACTCCAGAATCTCCCGTCGTCGGAGGCATTGT\n+TCTAGGATCT'..b'TCAGACC\n+GCTCGAATTCGAGTCGAACATCTGATTAGGGAAGAGAAGATGATGGCTGCATATGATCTC\n+CTCGATATATACTGCGAGCTTGTTGTTGCACGTCTTCCAGTAATTGAGTCACAAAAGAAT\n+TGCCCCATTGACCTGAAAGAAGCAATTGCAAGTTTAGTATTTGCAGCACCAAGATGTGGA\n+GATGTACCCGAGTTTCTTGATGCAAGGAAGCAATTTTCAGCTAAATATGGAAAAGATTTC\n+ACTACTGCAGCCACAGAACTTCGTCCACAGTGCGGTGTAGGCCGCATGTTGGTTGAAAAA\n+TTATCTGCAACAGCGCCTGATGTACAGAGCAAAACTAAAATCTTGAATGCAATAGCTGAG\n+GAGCACAATGTTAAATGGGAACCCACATCATTTGGAGAGAACGACTCTGCACCTCTCAAT\n+GACCGACTGACTGGACCAAGTTCCTTTACGAAGGAGAGAGAACAGTACCCTGAACCTCCT\n+CATTTTGAAGCTGTACAAGTCCAAGCTCACCCGAGCAACAACACCTTGCATAGCTCGCCA\n+TCAAATTCTTCCCAGCAGGATGATAGAACCTCAGTTTTTGTTGAAACTTCTACTTTCAAG\n+ACACCATCG\n+>contig_30\n+AAGGCTAGTGGACTTTTTTCAAGTAAAGATTCTGTTGATTCAACAGATAGCCACATGGAA\n+AATGTATGCTCTGATATTTTATCAATGAGCATCAACAAAAACCAAATTTTAGAGAATGGC\n+CATGTTCATAACAATAGAGGAGCAGCAATATTTGAGTTGTCTGGAGCTGCTACAAATGTA\n+ATTGAGGACGTTGGTTTGTCAGATGTCCAGTCTGACACCAGATTGGGAATGGCAAGTCAA\n+GTTTGCCAAGTTGATATGCATGAACCAGAGGGCGGTTTGTTGTCTTTTAAGAACCAAAGA\n+TTTAAGGATGCTGAGGTTGCTACCAATATAACACATGATTATTGTCATGTATCTCATCTG\n+TTAAAGCATTCTAACGTTCAAGTTCCTAAGTACATTAGTGGTAATGGTTCAGCCACTGTT\n+GATCTGAATAGGCAGACTGTAGATAGGAATAACAATTTTAAAGTTTCAACATCTAATT\n+>contig_31\n+CCGTACTCAAGCTTTTAGTTTTATCCTCCTCATTATGACGTTTGCGTCTGTTTTCTTTTT\n+CAGACTTCTGGCTGCCTCTGTTAGTTAATCTTTCTGTATGCATCACATTTCTTTCGTCTC\n+TTCTTTTGTACTCTTTCTCAGAGTCACTGCTCATTCGCCGTTCTGTTTTCCTTTTCCTGT\n+GGACCTGTCCTTCATCGTTAACAAGCTGTTTTCTATCTATTTCGGATTTGACTCTGCCTC\n+TTTCAGAAGTTTTACCCTTTCTATATGACTCTCTGGCTTGCCTCTTATTTGATCCATTAT\n+ATGAATCCACACCTAAATGGCCATCTCTTTTTCTGTTGCTGTAATGTCGTCTTTCATTTT\n+CTCCTTCTCTCACTCTCTTGCTGCCTTCAGTCTTTTCCCATATACTCTTTAACTTGACAC\n+CACTTTCCCTATACTTTTCATCCTTATCCTGGACTAAATTGTCATTCTTACTATCCACTT\n+TTCTCCGGCAACTTACCTTCTGAAAATCATTTGAAGTATGTCTAACATCCATACGTTCAG\n+AAACCTCATTTATTTGCCGATGGTATTCTGGAAAATTAGAAATCTTTTCAATGTCACCAT\n+GCCCTGATGCACTATCAGCCACTCCAGAGCCACTGGCAACTTCTTTACCTGCATCTCCCT\n+TGTTATTTCTTCCGAGCATTATCTCATCATAGCTTAATGGCCTCGTTCTTGCACAAGTGC\n+CCTCCAAAAGATCATCCTGAGACACTCTACTTTCTAATTTCTCTATGTGTGGGCGGGATC\n+TAGTCATGGCGCTAACGGACTTCTACTATGTTTACGAGCATGAGGAACTGCCAGAGCGCG\n+GCCGGAGAGAATAAGCGTCGCCTGGTGGTTTTAC\n+>contig_32\n+CACCGACACATCACAGAAATGGGGCTTTCCATGTTACTTCATTCACGAATGCCTTTGTCA\n+TATTGGGATGATGCTTTTTCTGCTGCAATTCACACAATCAACCGCCTACCTACTCTGATT\n+CTCAACAAAGTTTCTCCTTTTGAAGTACTCTTTGGACGCGCACCGGTGTATTCTAACTTT\n+CATCCTTTCGGCTGCAGAGTTTTTCCCTATGTTCGACCCACTGCTGAACACAAACTTGCT\n+CCACGCAGTCTGCCTTGTATCTTCATCGGCTACAGCGCTCACTATAAAGGC\n+>contig_33\n+TTTTAATTCTATAGGGCGAAGTAATGCTGTCTTTTCAGCAACCTTTTGCCAATCGACAAA\n+GTCAATGAGCTCTTCATCCATTGATTCTTTTGCAGCAAGAAGAAGGATTTTCCTCCTTTC\n+TGCTTGAGTTAGTCGCTGAAGATGAAATATTCGATCCATTCTACCAGGACGCAGTAAAGC\n+CCCATCGATTTGTTTAAGATTCCGAGTAGTAGCCATCAAAACTACCCCATCTTGTTTCTC\n+AAACCCATCAAGCTCCACCAGAAGTTGATTGATGAAAGATTCGTGATCCAGCTTTTTGGT\n+CTGAAGGTCCTTCCCGCGGACACCAGCAAAGAGGTCAAAGTCTTCCACAAATATAATGAC\n+AGGAGCCAGATCCCGTGCTGCTTGAAACAATTCTCTAACGTTTGATGCACCTTGGCCCAC\n+AAACAGACCAGCTTCCAGCTGTTGGGCTTTTACTTCAACTAAAGGAACCTTCGCTTCCGC\n+TGCTATAGCCATTGCTAGGGATGTTTTACCTGTTCCTATCTCACCAACAATAAGAACACC\n+CCGAGGTGCCCGAGCTCCCATTTCTTGGAATGCACGAGGATTTTGTAGAAATGCCACAAC\n+TTCATTAATTTCTTC\n+>contig_34\n+ACCCGTTGGACCCTCTCGTGATTCAAACTCAACTTGTTTCATAAAATAGCACAACACTGC\n+TCGGTACTGAAAACTGGAGGATCTTGTAGGTCAATAATACAAGTTACAACTCTTGTAATT\n+CCTCAACTGAGTGCACAGTCCAAAGTGGCCAATGGCATCAAGTAACAGATTACCAATAAG\n+ATAACAGTAGAAGTCAGATGAATATTCCTGTCTTGATTTTGTTTTCTCAACTGCTTCTAC\n+TTCCAATTAAGGAGAGACACAGTCGAAAATAGAATAAACCAAATATTTATCTGTAGGCTT\n+CCGGCAGGCGGTAACCACTGATCACTAAATTGTCGGCGAACATCTTGCCAGTTTTGGGCA\n+TCACATGCCAATGCATTGTTAAGTTGAAGTCTTTACCACGTAGATTGCTTCCTTGGTCAA\n+TAAAACGATACTTGTTAGTTGTCTGGATCTGAAATATTGCTTGCTCTTTGGAGGGTATAA\n+TAAGATCCCACAAGGAGACCTGATTCAGCGCATTTGTTGGGGTTTCA\n+>contig_35\n+AATTCACAGCCTTCTCTTTCATCTCGAGATTGTCTACCTTCTGCAGATCGATATGTCGAT\n+GCTTTACAGAGGGATATCGAGGAAGGATAAACCTAGAGGACGTCAGCACGGGTTGACTCA\n+ACAGAAAAGGCAAGAGATAAAGGAAGCTTTTGAACTGTTTGACACCGATGGATCTGGAAC\n+TATTGATGCGAAAGAGTTGAACGTAGCAATGAGGGCTCTCGGTTTTGAAATGTCAGAAGA\n+GGAAATTACGAGAATGATAGCTGAAGTAGACAAAGACGGGAGTGGGGCGATTGACTTTGA\n+CGAGTTTTGTCACATGATGACAGCCAAATTCGGAGAAAGGGACACCAAAGAGGAGCTTAC\n+CAAGGCTTTTCAGATTATCGACAAAGATCAAAA\n'
b
diff -r 000000000000 -r fcc558568020 test-data/output.pttgf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.pttgf Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,6 @@
+<html><head><h3>Targeted gene families: 1 items</h3></head>
+<body><p/><table cellpadding="2">
+<tr><b>Directories</th></b>
+<tr bgcolor="#D8D8D8"><td><a href="752" type="text/plain">752</a>
+</td></tr>
+</table></body></html>
b
diff -r 000000000000 -r fcc558568020 test-data/target_orthos.ids
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/target_orthos.ids Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,5 @@
+213
+9300
+752
+4632
+4732
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts.cds
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cds Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,8 @@
+>Gene.1::contig_1::g.1::m.1 type:internal len:115 contig_1:344-3(-)
+AAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTACACGAGTGGAAAGGTGCTAACAATGGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGATGCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGAATACCGGCATACAGTGCAGTCTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTAGCTACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAACAAGCAAGATCGGCCATAATTGGTCATGTTGTACACATGGTTAAT
+>Gene.2::contig_2::g.2::m.2 type:5prime_partial len:170 contig_2:2-511(+)
+CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAACTGTTAAATTATGTGCGCTTTGATGATTAA
+>Gene.3::contig_3::g.3::m.3 type:5prime_partial len:126 contig_3:463-86(-)
+GTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAGAGCTCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATACTTTCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGCACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGTTACAGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAACTAG
+>Gene.4::contig_9::g.4::m.4 type:internal len:132 contig_9:2-394(+)
+CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACAGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts.cleaned.cds
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.cds Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,26 @@
+>contig_1
+AAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTACACGAGTGGAAAGGTGCTAACAAT
+GGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGATGCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGAATA
+CCGGCATACAGTGCAGTCTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTAGCT
+ACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAACAAGCAAGATCGGCCATAATTGG
+TCATGTTGTACACATGGTTAAT
+>contig_2
+CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTA
+CGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCA
+TGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGG
+GTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGA
+GATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGG
+GTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAA
+CTGTTAAATTATGTGCGCTTTGATGAT
+>contig_3
+GTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAGAGC
+TCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATACTT
+TCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGCACA
+ATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGTTAC
+AGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAAC
+>contig_9
+CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGT
+GGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAA
+AACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACA
+GCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGA
+ACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts.cleaned.nr.cds
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.nr.cds Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,43 @@
+>contig_1
+NTTAAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTACACGAGTGGAAAGGTGCTAAC
+AATGGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGATGCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGA
+ATACCGGCATACAGTGCAGTCTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTA
+GCTACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAACAAGCAAGATCGGCCATAAT
+TGGTCATGTTGTACACATGGTTAATCGN
+>contig_10
+ATGGCAGAAGAGAACACCACTACAATGAACCTCGATCTCAATTTGGGCCCCATCAATAACTCAAGCGACGATAGCGAACC
+TTCATCACGCCCTTATACTGATGTCGCAATGAACTTGGAAGATTGGTTAGATAGTCCCGTCCGAGTTCGTGAAGTCGTCC
+GCCACAGAAATCATAGGTGGCGCTCTTTGTGGCGCCAAATCCCAATTCCGCCTGATACGCGAAACCTCGCGCTCGAATTA
+ATCGGCGGCAATGCCCCN
+>contig_2
+NNCCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGA
+CTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACC
+TCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAA
+TGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTC
+GGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGC
+CGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCA
+CAACTGTTAAATTATGTGCGCTTTGATGAT
+>contig_3
+NNTGTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAG
+AGCTCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATA
+CTTTCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGC
+ACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGT
+TACAGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAAC
+>contig_5
+NNCGGTGGTCCGCCACAAACACACGTCAAGCGGGATCCCGCATCCCGCGGGCTCTTCCACGCGGTCGTCCCGGCGCTCGG
+CCCTCTCCGTGACGCACGTCGAGAGGGACGATTTGGCCGATGCCGCGTGATGCCAGGCCCCGACATCATCAAGGAGCACT
+CCGTTGTGCCGACCCTCGCCTTCGATGACGTCCTCGGGCGTCTAGCGAAGTATCGAAGAAGGGCGAGCGGAGCCATGAAT
+CCTGGAGATGCGAGCCAGGTCACGAGAGGCGCGGCAGGCGAGTCTTTGCTCGCTCTCGCACCGTCTGCTCTCGTGCTTGG
+AGAAGAAGGACCGACTGCTGACGTCGAACCGGATGATGGAAGGTTCGAACAAGTCAGATCTGGAAAGGGGCATGGAAAAG
+GCGACACTCTCACN
+>contig_7
+GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAATAAATCCAATGAAAACGGAAC
+CGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATGTTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCAT
+CATCCAATGAAACATGTAGCAACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGGA
+AACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACGNN
+>contig_9
+NNACTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCA
+TGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCT
+CAAAACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGA
+ACAGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGC
+TGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTTGAN
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts.cleaned.nr.pep
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.nr.pep Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,22 @@
+>contig_1
+XKKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLL
+ATPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVNR
+>contig_10
+MAEENTTTMNLDLNLGPINNSSDDSEPSSRPYTDVAMNLEDWLDSPVRVREVVRHRNHRWRSLWRQIPIPPDTRNLALEL
+IGGNAP
+>contig_2
+XLSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEK
+WVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAAS
+QLLNYVRFDD
+>contig_3
+XVDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKG
+TIESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN
+>contig_5
+XGGPPQTHVKRDPASRGLFHAVVPALGPLRDARREGRFGRCRVMPGPDIIKEHSVVPTLAFDDVLGRLAKYRRRASGAMN
+PGDASQVTRGAAGESLLALAPSALVLGEEGPTADVEPDDGRFEQVRSGKGHGKGDTLT
+>contig_7
+ENEWSGAEFLNEMAAMMTQNKSNENGTGTFEELQQLFDEMFQSDIESFNGCSSSSNETCSNSNKRNSIESSSANFRPENG
+NESGEISGKKNTRKGKGDX
+>contig_9
+XLRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGR
+TAHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAVX
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts.cleaned.pep
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.pep Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,13 @@
+>contig_1
+KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLA
+TPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN
+>contig_2
+LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW
+VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ
+LLNYVRFDD
+>contig_3
+VDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKGT
+IESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN
+>contig_9
+LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT
+AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts.cleaned_tgf.cds
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned_tgf.cds Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,102 @@
+>contig_1
+AAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTACACGAGTGGAAAGGTGCTAACAAT
+GGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGATGCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGAATA
+CCGGCATACAGTGCAGTCTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTAGCT
+ACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAACAAGCAAGATCGGCCATAATTGG
+TCATGTTGTACACATGGTTAAT
+>contig_11
+ATTTATGGAAAGGGGATTCAAAATATGAGAACACTCTGGGGTGGCGTTGCTCCCGAATCTCCCGTCGTTGTTGTTGGAGG
+CGGTGTTACTCCAGAATCTCCCGTCGTCGGAGGCATTGTTCTAGGATCTCCCGCAGTGCTGCTTAAGTCGGATTTACTCC
+AAGCTCTCCCACCGCGGCTCCTGAAGTCGGCGTTGTTCCAAAATCTCCCGTCGCCACTGTTGAAGTCGGGGTTGCTCCAT
+GATATCCCGTCGGAGTTGCTCCGAATCTCCCGTTGTTGCTGCTGCAGTCGGTGTTGCTCCAGAAACTCCCGCTGTGGCTA
+T
+>contig_18
+ACAGAGGTGAGAAGATGGAACAAGCAGGAAGATTGGGGTAGAAAGACATGGAAAGAAGCTAAGGAATCCACATTGCCGAA
+AATAGTAGGTGAAGGGATCTATGGAGTTGGTCCCATTTTAGCTGCACTCTCATCCGGGCGAAGAGAACTCTACGCGTTGT
+ACGTTCAGGAAGGTTTGGATTTGAGTAGTAACAGTAAGAAGAAGGACAAGAAACGGTTCGAGAGAGTTTTGAAAATGGTG
+GAAAAGATTGGATTAAGCAAAAAAGAGGTATCCAAACACGACCTCAACATGGTCGTTGATAATAGGCCTCACCAGGGCTT
+GCTTCTTGATGCTTCGGCACTTGAAATGGTTAGTATAAAGGAATTAGACCCCGTTTCCATTGATGGAGAGAAGTGCCCGC
+TTTGGTTGGCATTGGATGAGGTTACC
+>contig_2
+CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTA
+CGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCA
+TGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGG
+GTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGA
+GATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGG
+GTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAA
+CTGTTAAATTATGTGCGCTTTGATGAT
+>contig_20
+GGCTTACGCATCGATGCTCACGGATATGCCCTACAAGATTGGCAACGCGATCTGTTTCAACTTGATCTTGTACTTCATGA
+CGAACCTTCGCCGCGAACCAGGAGCATTCTTCTTCTTCCTGCTGATATCGTTCACGCTGACGCTTGTCATGTCGATGCTC
+TTCCGCACTATTGGATCCGTTTCGCGTACACTGTCGCAGGCCATGGCTCCCGCAGCTATCCTTATCCTTGCGCTGGTCAT
+TTACACTGGTTTCGTCATTCCTACCCGCTACATGTTGGGCTGGTCTCGTTGGATCAACTACTTGGATCA
+>contig_21
+GTTTCCATAGTGAATCAGGAACCTGTTCTATTCTCGGTTTCCATTGGAGAAAATATTGCTTACGGACTCCCAGATGATTA
+TGTTTCCAAGGTCGATGTGATAAAAGCTGCTAAAGCAGCTAATGCTCATGAATTTATCATTGCGTTGCCACAGGGTTATG
+ACACATTAGTTGGTGAGCGTGGTGGGTTGTTAAGTGGAGGACAAAGACAGAGGGTAGCCATTGCAAGGGCTCTGCTCAAG
+AATGCTCCAATCTTGATTCTTGATGAGGCTACCAGTGCTTTGGACGCAGTCAGTGAACGTCTGGTTCAGGATGCTTTGAA
+CCGTTTGATGAAAGGAAGAACGACTTTAGTGATTGCTCACAGGCTGAGCACCGTTCAAAATGCTGATCAAATTGCTTTAT
+GCTCTGATGGGAAGATTTCAGAACTGGGGACACACTCTGAGTTGTTAGAGCAAAAGGGTCTTTATGCCTCACTGGTTGGC
+ACCCAAAGACTTGCATTCGAG
+>contig_24
+AAAACCCGTGAATTAGAAATCGAACAAATCTTGTTAAAATCCAAGGATTTGGAAAGCGAATTGGAGAGCAAGGGTAGTAT
+GTTTATAAAGGAGACTGAGGCACTTGTTGCAGAAAACTCGAAACTCAGTCAGGAGTTGGGTGCATTTAAATCCGAGCTAA
+ACGATATACAGATGAAATTGAACGTTGTTTCATCTGAGAAAGACGGCACTGTTGAAGAACTAACTAGTGCAAGAAAAGAA
+ATAGAAGAGCTGACTCAGAAGCTTGCTTCTGAAGGACAAAAGCTGCAGTCTCAGATATCTTCTATAATGGAAGAGAACAA
+TTTACTTAAC
+>contig_29
+ATGAAGAAATCGAAGCTTCTGCAGAATTCGAAGGACTTACTTTCTAGGAGCTTCAATCCTGCTAAATGCAAAACGTCTCT
+GAGGCTGGCGGGTTCAAGGTTGAAGCTATTGAGAAACAAGAAAGAGGTGCAACTGAAGCAGATGAAGCGGGAAATAGCAC
+AGTTGCTCGAGTCTGGACAGGATCAGACCGCTCGAATTCGAGTCGAACATCTGATTAGGGAAGAGAAGATGATGGCTGCA
+TATGATCTCCTCGATATATACTGCGAGCTTGTTGTTGCACGTCTTCCAGTAATTGAGTCACAAAAGAATTGCCCCATTGA
+CCTGAAAGAAGCAATTGCAAGTTTAGTATTTGCAGCACCAAGATGTGGAGATGTACCCGAGTTTCTTGATGCAAGGAAGC
+AATTTTCAGCTAAATATGGAAAAGATTTCACTACTGCAGCCACAGAACTTCGTCCACAGTGCGGTGTAGGCCGCATGTTG
+GTTGAAAAATTATCTGCAACAGCGCCTGATGTACAGAGCAAAACTAAAATCTTGAATGCAATAGCTGAGGAGCACAATGT
+TAAATGGGAACCCACATCATTTGGAGAGAACGACTCTGCACCTCTCAATGACCGACTGACTGGACCAAGTTCCTTTACGA
+AGGAGAGAGAACAGTACCCTGAACCTCCTCATTTTGAAGCTGTACAAGTCCAAGCTCACCCGAGCAACAACACCTTGCAT
+AGCTCGCCATCAAATTCTTCCCAGCAGGATGATAGAACCTCAGTTTTTGTTGAAACTTCTACTTTCAAGACACCATCG
+>contig_3
+GTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAGAGC
+TCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATACTT
+TCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGCACA
+ATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGTTAC
+AGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAAC
+>contig_30
+AAGGCTAGTGGACTTTTTTCAAGTAAAGATTCTGTTGATTCAACAGATAGCCACATGGAAAATGTATGCTCTGATATTTT
+ATCAATGAGCATCAACAAAAACCAAATTTTAGAGAATGGCCATGTTCATAACAATAGAGGAGCAGCAATATTTGAGTTGT
+CTGGAGCTGCTACAAATGTAATTGAGGACGTTGGTTTGTCAGATGTCCAGTCTGACACCAGATTGGGAATGGCAAGTCAA
+GTTTGCCAAGTTGATATGCATGAACCAGAGGGCGGTTTGTTGTCTTTTAAGAACCAAAGATTTAAGGATGCTGAGGTTGC
+TACCAATATAACACATGATTATTGTCATGTATCTCATCTGTTAAAGCATTCTAACGTTCAAGTTCCTAAGTACATTAGTG
+GTAATGGTTCAGCCACTGTTGATCTGAATAGGCAGACTGTAGATAGGAATAACAATTTTAAAGTTTCAACATCTAAT
+>contig_31
+ATGACTAGATCCCGCCCACACATAGAGAAATTAGAAAGTAGAGTGTCTCAGGATGATCTTTTGGAGGGCACTTGTGCAAG
+AACGAGGCCATTAAGCTATGATGAGATAATGCTCGGAAGAAATAACAAGGGAGATGCAGGTAAAGAAGTTGCCAGTGGCT
+CTGGAGTGGCTGATAGTGCATCAGGGCATGGTGACATTGAAAAGATTTCTAATTTTCCAGAATACCATCGGCAAATAAAT
+GAGGTTTCTGAACGTATGGATGTTAGACATACTTCAAATGATTTTCAGAAGGTAAGTTGCCGGAGAAAAGTGGATAGTAA
+GAATGACAATTTAGTCCAGGATAAGGATGAAAAGTATAGGGAAAGTGGTGTCAAGTTAAAGAGTATATGGGAAAAGACTG
+AAGGCAGCAAGAGAGTGAGAGAAGGAGAAAATGAAAGACGACATTACAGCAACAGAAAAAGAGATGGCCATTTAGGTGTG
+GATTCATATAATGGATCAAATAAGAGGCAAGCCAGAGAGTCATATAGAAAGGGTAAAACTTCTGAAAGAGGCAGAGTCAA
+ATCCGAAATAGATAGAAAACAGCTTGTTAACGATGAAGGACAGGTCCACAGGAAAAGGAAAACAGAACGGCGAATGAGCA
+GTGACTCTGAGAAAGAGTACAAAAGAAGAGACGAAAGAAATGTGATGCATACAGAAAGATTAACTAACAGAGGCAGCCAG
+AAGTCTGAAAAAGAAAACAGACGCAAACGTCATAATGAGGAGGATAAAACTAAAAGCTTGAGTACG
+>contig_33
+GAAGAAATTAATGAAGTTGTGGCATTTCTACAAAATCCTCGTGCATTCCAAGAAATGGGAGCTCGGGCACCTCGGGGTGT
+TCTTATTGTTGGTGAGATAGGAACAGGTAAAACATCCCTAGCAATGGCTATAGCAGCGGAAGCGAAGGTTCCTTTAGTTG
+AAGTAAAAGCCCAACAGCTGGAAGCTGGTCTGTTTGTGGGCCAAGGTGCATCAAACGTTAGAGAATTGTTTCAAGCAGCA
+CGGGATCTGGCTCCTGTCATTATATTTGTGGAAGACTTTGACCTCTTTGCTGGTGTCCGCGGGAAGGACCTTCAGACCAA
+AAAGCTGGATCACGAATCTTTCATCAATCAACTTCTGGTGGAGCTTGATGGGTTTGAGAAACAAGATGGGGTAGTTTTGA
+TGGCTACTACTCGGAATCTTAAACAAATCGATGGGGCTTTACTGCGTCCTGGTAGAATGGATCGAATATTTCATCTTCAG
+CGACTAACTCAAGCAGAAAGGAGGAAAATCCTTCTTCTTGCTGCAAAAGAATCAATGGATGAAGAGCTCATTGACTTTGT
+CGATTGGCAAAAGGTTGCTGAAAAGACAGCATTACTTCGCCCTATAGAATTAAAA
+>contig_35
+ATTCACAGCCTTCTCTTTCATCTCGAGATTGTCTACCTTCTGCAGATCGATATGTCGATGCTTTACAGAGGGATATCGAG
+GAAGGATAAACCTAGAGGACGTCAGCACGGGTTGACTCAACAGAAAAGGCAAGAGATAAAGGAAGCTTTTGAACTGTTTG
+ACACCGATGGATCTGGAACTATTGATGCGAAAGAGTTGAACGTAGCAATGAGGGCTCTCGGTTTTGAAATGTCAGAAGAG
+GAAATTACGAGAATGATAGCTGAAGTAGACAAAGACGGGAGTGGGGCGATTGACTTTGACGAGTTTTGTCACATGATGAC
+AGCCAAATTCGGAGAAAGGGACACCAAAGAGGAGCTTACCAAGGCTTTTCAGATTATCGACAAAGATCAA
+>contig_9
+CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGT
+GGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAA
+AACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACA
+GCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGA
+ACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts.cleaned_tgf.pep
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned_tgf.pep Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,49 @@
+>contig_1
+KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLA
+TPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN
+>contig_11
+IYGKGIQNMRTLWGGVAPESPVVVVGGGVTPESPVVGGIVLGSPAVLLKSDLLQALPPRLLKSALFQNLPSPLLKSGLLH
+DIPSELLRISRCCCCSRCCSRNSRCGY
+>contig_18
+TEVRRWNKQEDWGRKTWKEAKESTLPKIVGEGIYGVGPILAALSSGRRELYALYVQEGLDLSSNSKKKDKKRFERVLKMV
+EKIGLSKKEVSKHDLNMVVDNRPHQGLLLDASALEMVSIKELDPVSIDGEKCPLWLALDEVT
+>contig_2
+LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW
+VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ
+LLNYVRFDD
+>contig_20
+GLRIDAHGYALQDWQRDLFQLDLVLHDEPSPRTRSILLLPADIVHADACHVDALPHYWIRFAYTVAGHGSRSYPYPCAGH
+LHWFRHSYPLHVGLVSLDQLLGS
+>contig_21
+VSIVNQEPVLFSVSIGENIAYGLPDDYVSKVDVIKAAKAANAHEFIIALPQGYDTLVGERGGLLSGGQRQRVAIARALLK
+NAPILILDEATSALDAVSERLVQDALNRLMKGRTTLVIAHRLSTVQNADQIALCSDGKISELGTHSELLEQKGLYASLVG
+TQRLAFE
+>contig_24
+KTRELEIEQILLKSKDLESELESKGSMFIKETEALVAENSKLSQELGAFKSELNDIQMKLNVVSSEKDGTVEELTSARKE
+IEELTQKLASEGQKLQSQISSIMEENNLLN
+>contig_29
+MKKSKLLQNSKDLLSRSFNPAKCKTSLRLAGSRLKLLRNKKEVQLKQMKREIAQLLESGQDQTARIRVEHLIREEKMMAA
+YDLLDIYCELVVARLPVIESQKNCPIDLKEAIASLVFAAPRCGDVPEFLDARKQFSAKYGKDFTTAATELRPQCGVGRML
+VEKLSATAPDVQSKTKILNAIAEEHNVKWEPTSFGENDSAPLNDRLTGPSSFTKEREQYPEPPHFEAVQVQAHPSNNTLH
+SSPSNSSQQDDRTSVFVETSTFKTPS
+>contig_3
+VDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKGT
+IESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN
+>contig_30
+KASGLFSSKDSVDSTDSHMENVCSDILSMSINKNQILENGHVHNNRGAAIFELSGAATNVIEDVGLSDVQSDTRLGMASQ
+VCQVDMHEPEGGLLSFKNQRFKDAEVATNITHDYCHVSHLLKHSNVQVPKYISGNGSATVDLNRQTVDRNNNFKVSTSN
+>contig_31
+MTRSRPHIEKLESRVSQDDLLEGTCARTRPLSYDEIMLGRNNKGDAGKEVASGSGVADSASGHGDIEKISNFPEYHRQIN
+EVSERMDVRHTSNDFQKVSCRRKVDSKNDNLVQDKDEKYRESGVKLKSIWEKTEGSKRVREGENERRHYSNRKRDGHLGV
+DSYNGSNKRQARESYRKGKTSERGRVKSEIDRKQLVNDEGQVHRKRKTERRMSSDSEKEYKRRDERNVMHTERLTNRGSQ
+KSEKENRRKRHNEEDKTKSLST
+>contig_33
+EEINEVVAFLQNPRAFQEMGARAPRGVLIVGEIGTGKTSLAMAIAAEAKVPLVEVKAQQLEAGLFVGQGASNVRELFQAA
+RDLAPVIIFVEDFDLFAGVRGKDLQTKKLDHESFINQLLVELDGFEKQDGVVLMATTRNLKQIDGALLRPGRMDRIFHLQ
+RLTQAERRKILLLAAKESMDEELIDFVDWQKVAEKTALLRPIELK
+>contig_35
+IHSLLFHLEIVYLLQIDMSMLYRGISRKDKPRGRQHGLTQQKRQEIKEAFELFDTDGSGTIDAKELNVAMRALGFEMSEE
+EITRMIAEVDKDGSGAIDFDEFCHMMTAKFGERDTKEELTKAFQIIDKDQ
+>contig_9
+LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT
+AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts.pep
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.pep Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,8 @@
+>Gene.1::contig_1::g.1::m.1 type:internal len:115 gc:universal contig_1:344-3(-)
+KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLATPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN
+>Gene.2::contig_2::g.2::m.2 type:5prime_partial len:170 gc:universal contig_2:2-511(+)
+LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKWVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQLLNYVRFDD*
+>Gene.3::contig_3::g.3::m.3 type:5prime_partial len:126 gc:universal contig_3:463-86(-)
+VDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKGTIESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN*
+>Gene.4::contig_9::g.4::m.4 type:internal len:132 gc:universal contig_9:2-394(+)
+LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRTAHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts2.cds
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts2.cds Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,54 @@
+>contig_1; 91 1 346  minus strand
+XTTAAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTAC
+ACGAGTGGAAAGGTGCTAACAATGGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGAT
+GCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGAATACCGGCATACAGTGCAGT
+CTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTA
+GCTACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAA
+CAAGCAAGATCGGCCATAATTGGTCATGTTGTACACATGGTTAATCGX
+>contig_2 218 1 511 
+XXCCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCC
+CCGGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATC
+ATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGG
+CCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAA
+TGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTT
+CCGATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGT
+ACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCT
+TGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCA
+CAACTGTTAAATTATGTGCGCTTTGATGATTAA
+>contig_3; 134 1 379  minus strand
+XXTGTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCT
+GAAATTCTGACAACTGCTAGAGCTCATTCAGAAACAATTGAGAACCTTAAGAGAGATCAT
+TCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATACTTTCAGGCAAAAATACATG
+GATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGC
+ACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAAC
+CATTCATACGAATCTGCTGTTACAGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCA
+CCATTTTCACAGATCAACTAG
+>contig_5; -67 1 418  minus strand
+XXCGGTGGTCCGCCACAAACACACGTCAAGCGGGATCCCGCATCCCGCGGGCTCTTCCAC
+GCGGTCGTCCCGGCGCTCGGCCCTCTCCGTGACGCACGTCGAGAGGGACGATTTGGCCGA
+TGCCGCGTGATGCCAGGCCCCGACATCATgCAAGGAGCACTCCGTTGTGCCGACCCTCGC
+CTTCGATGACGTCCTCGGGCGTCTAGCGAAGTATCGAAGAAGGGCGAGCTAAGGAGCCAT
+GAATCCTGGAGATGCGAGCCAGGTCACGAGAGGCGCGGCAGGCGAGTCTTTGCTCGCTCT
+CGCACCGTCTGCTCTCGTGCTTGGAGAAGAAGGACCGACTGCTGACGTCGAACCGGATGA
+TGGAAGGTTCGAACAAGTCAGATCTGGAAAGGGGTAGCATGGAAAAGGCGACACTCTCAC
+X
+>contig_7 103 1 296 
+GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAAT
+AAATCCAATGAAAACGGAACCGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATG
+TTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCATCATCCAATGAAACATGTAGC
+AACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGcG
+AAACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACGXX
+>contig_9 34 1 396 
+XXACTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATT
+TTTGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGC
+CCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGT
+AGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGA
+ACAGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATT
+AAACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAG
+GCACCTATGACTCAATATTTTGTCTTTGAGGCGGTTGAX
+>contig_10; 78 87 343  minus strand
+ATGGCAGAAGAGAACACCACTACAATGAACCTCGATCTCAATTTGGGCCCCATCAATAAC
+TCAAGCGACGATAGCGAACCTTCATCACGCCCTTATACTGATGTCGCAATGAACTTGGAA
+GATTGGTTAGATAGTCCCGTCCGAGTTCGTGAAGTCGTCCGCCACAGAAATCATAGGTGG
+CGCTCTTTGTGGCGCCAAATCCCAATTCCGCCTGATACGCGAAACCTCGCGCTCGAATTA
+ATCGGCGGCAATGCCCCX
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts2.pep
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts2.pep Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,25 @@
+>contig_1; 91 1 346  minus strand; translated
+XKKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCS
+LRQLLEYGYFHADPHPGNLLATPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVNR
+>contig_2 218 1 511 ; translated
+XLSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMG
+PDGHVASLFPSHMQRYEKEKWVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAG
+TTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQLLNYVRFDD
+>contig_3; 134 1 379  minus strand; translated
+XVDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYM
+DYEPTGSTPVRSEPDIPSKGTIESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRA
+PFSQIN
+>contig_5; -67 1 418  minus strand; translated
+XGGPPQTHVKRDPASRGLFHAVVPALGPLRDARREGRFGRCRVMPGPDIIKEHSVVPTLA
+FDDVLGRLAKYRRRASXGAMNPGDASQVTRGAAGESLLALAPSALVLGEEGPTADVEPDD
+GRFEQVRSGKGXHGKGDTLT
+>contig_7 103 1 296 ; translated
+ENEWSGAEFLNEMAAMMTQNKSNENGTGTFEELQQLFDEMFQSDIESFNGCSSSSNETCS
+NSNKRNSIESSSANFRPENGNESGEISGKKNTRKGKGDX
+>contig_9 34 1 396 ; translated
+XLRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEG
+RIVLAVASSGIALLLVEGGRTAHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDE
+APMTQYFVFEAVX
+>contig_10; 78 87 343  minus strand; translated
+MAEENTTTMNLDLNLGPINNSSDDSEPSSRPYTDVAMNLEDWLDSPVRVREVVRHRNHRW
+RSLWRQIPIPPDTRNLALELIGGNAP
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts_tgf.cds
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts_tgf.cds Thu Jun 08 12:44:09 2017 -0400
b
b'@@ -0,0 +1,34 @@\n+>Gene.1::contig_1::g.1::m.1 type:internal len:115 contig_1:344-3(-)\n+AAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTACACGAGTGGAAAGGTGCTAACAATGGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGATGCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGAATACCGGCATACAGTGCAGTCTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTAGCTACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAACAAGCAAGATCGGCCATAATTGGTCATGTTGTACACATGGTTAAT\n+>Gene.2::contig_2::g.2::m.2 type:5prime_partial len:170 contig_2:2-511(+)\n+CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAACTGTTAAATTATGTGCGCTTTGATGATTAA\n+>Gene.3::contig_3::g.3::m.3 type:5prime_partial len:126 contig_3:463-86(-)\n+GTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAGAGCTCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATACTTTCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGCACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGTTACAGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAACTAG\n+>Gene.4::contig_9::g.4::m.4 type:internal len:132 contig_9:2-394(+)\n+CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACAGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT\n+>Gene.5::contig_11::g.5::m.5 type:5prime_partial len:108 contig_11:2-325(+)\n+ATTTATGGAAAGGGGATTCAAAATATGAGAACACTCTGGGGTGGCGTTGCTCCCGAATCTCCCGTCGTTGTTGTTGGAGGCGGTGTTACTCCAGAATCTCCCGTCGTCGGAGGCATTGTTCTAGGATCTCCCGCAGTGCTGCTTAAGTCGGATTTACTCCAAGCTCTCCCACCGCGGCTCCTGAAGTCGGCGTTGTTCCAAAATCTCCCGTCGCCACTGTTGAAGTCGGGGTTGCTCCATGATATCCCGTCGGAGTTGCTCCGAATCTCCCGTTGTTGCTGCTGCAGTCGGTGTTGCTCCAGAAACTCCCGCTGTGGCTATTGA\n+>Gene.6::contig_11::g.6::m.6 type:5prime_partial len:100 contig_11:443-144(-)\n+GCTGCTAAAGTCACGGTTGCTCCCGGATCTCGCGTCGCTGCTGCTGGAAGCGGCGTTGCCTCAGAATCTCCGGCGACTGCTGAAGCCGGCGTTGCCCCAGGATCTCCCGCGGTCGACTTCAATAGCCACAGCGGGAGTTTCTGGAGCAACACCGACTGCAGCAGCAACAACGGGAGATTCGGAGCAACTCCGACGGGATATCATGGAGCAACCCCGACTTCAACAGTGGCGACGGGAGATTTTGGAACAACGCCGACTTCAGGAGCCGCGGTGGGAGAGCTTGGAGTAAATCCGACTTAA\n+>Gene.7::contig_18::g.7::m.7 type:internal len:143 contig_18:426-1(-)\n+ACAGAGGTGAGAAGATGGAACAAGCAGGAAGATTGGGGTAGAAAGACATGGAAAGAAGCTAAGGAATCCACATTGCCGAAAATAGTAGGTGAAGGGATCTATGGAGTTGGTCCCATTTTAGCTGCACTCTCATCCGGGCGAAGAGAACTCTACGCGTTGTACGTTCAGGAAGGTTTGGATTTGAGTAGTAACAGTAAGAAGAAGGACAAGAAACGGTTCGAGAGAGTTTTGAAAATGGTGGAAAAGATTGGATTAAGCAAAAAAGAGGTATCCAAACACGACCTCAACATGGTCGTTGATAATAGGCCTCACCAGGGCTTGCTTCTTGATGCTTCGGCACTTGAAATGGTTAGTATAAAGGAATTAGACCCCGTTTCCATTGATGGAGAGAAGTGCCCGCTTTGGTTGGCATTGGATGAGGTTACC\n+>Gene.8::contig_20::g.8::m.8 type:internal len:104 contig_20:1-309(+)\n+GGCTTACGCATCGATGCTCACGGATATGCCCTACAAGATTGGCAACGCGATCTGTTTCAACTTGATCTTGTACTTCATGACGAACCTTCGCCGCGAACCAGGAGCATTCTTCTTCTTCCTGCTGATATCGTTCACGCTGACGCTTGTCATGTCGATGCTCTTCCGCACTATTGGATCCGTTTCGCGTACACTGTCGCAGGCCATGGCTCCCGCAGCTATCCTTATCCTTGCGCTGGTCATTTACACTGGTTTCGTCATTCCTACCCGCTACATGTTGGGCTGGTCTCGTTGGATCAACTACTTGGATCA\n+>Gene.9::contig_20::g.9::m.9 type:internal len:103 contig_20:2-307(+)\n+GCTTACGCATCGATGCTCACGGATATGCCCTACAAGATTGGCAACGCGATCTGTTTCAACTTGATCTTGTACTTCATGACGAACCTTCGCCGCGAACCAGGAGCATTCTTCTTCTTCCTGCTGATATCGTTCACGCTGACGCTTGTCATGTCGATGCTCTTCCGCACTATTGGATCCGTTTCGCGTACACTGTCGCAGGCCATGGCTCCCGCAGCTATCCTTATCCTTGCGCTGGTCATTTACACTGGTTTCGTCATTCCTACCCGCTACATGTTGGGCTGGTCTCGTTGGATCAACTACTTGGAT\n+>Gene.10::contig_'..b'CGTTCAAAATGCTGATCAAATTGCTTTATGCTCTGATGGGAAGATTTCAGAACTGGGGACACACTCTGAGTTGTTAGAGCAAAAGGGTCTTTATGCCTCACTGGTTGGCACCCAAAGACTTGCATTCGAGTGA\n+>Gene.12::contig_24::g.12::m.12 type:internal len:111 contig_24:331-2(-)\n+AAAACCCGTGAATTAGAAATCGAACAAATCTTGTTAAAATCCAAGGATTTGGAAAGCGAATTGGAGAGCAAGGGTAGTATGTTTATAAAGGAGACTGAGGCACTTGTTGCAGAAAACTCGAAACTCAGTCAGGAGTTGGGTGCATTTAAATCCGAGCTAAACGATATACAGATGAAATTGAACGTTGTTTCATCTGAGAAAGACGGCACTGTTGAAGAACTAACTAGTGCAAGAAAAGAAATAGAAGAGCTGACTCAGAAGCTTGCTTCTGAAGGACAAAAGCTGCAGTCTCAGATATCTTCTATAATGGAAGAGAACAATTTACTTAAC\n+>Gene.13::contig_29::g.13::m.13 type:3prime_partial len:267 contig_29:52-849(+)\n+ATGAAGAAATCGAAGCTTCTGCAGAATTCGAAGGACTTACTTTCTAGGAGCTTCAATCCTGCTAAATGCAAAACGTCTCTGAGGCTGGCGGGTTCAAGGTTGAAGCTATTGAGAAACAAGAAAGAGGTGCAACTGAAGCAGATGAAGCGGGAAATAGCACAGTTGCTCGAGTCTGGACAGGATCAGACCGCTCGAATTCGAGTCGAACATCTGATTAGGGAAGAGAAGATGATGGCTGCATATGATCTCCTCGATATATACTGCGAGCTTGTTGTTGCACGTCTTCCAGTAATTGAGTCACAAAAGAATTGCCCCATTGACCTGAAAGAAGCAATTGCAAGTTTAGTATTTGCAGCACCAAGATGTGGAGATGTACCCGAGTTTCTTGATGCAAGGAAGCAATTTTCAGCTAAATATGGAAAAGATTTCACTACTGCAGCCACAGAACTTCGTCCACAGTGCGGTGTAGGCCGCATGTTGGTTGAAAAATTATCTGCAACAGCGCCTGATGTACAGAGCAAAACTAAAATCTTGAATGCAATAGCTGAGGAGCACAATGTTAAATGGGAACCCACATCATTTGGAGAGAACGACTCTGCACCTCTCAATGACCGACTGACTGGACCAAGTTCCTTTACGAAGGAGAGAGAACAGTACCCTGAACCTCCTCATTTTGAAGCTGTACAAGTCCAAGCTCACCCGAGCAACAACACCTTGCATAGCTCGCCATCAAATTCTTCCCAGCAGGATGATAGAACCTCAGTTTTTGTTGAAACTTCTACTTTCAAGACACCATCG\n+>Gene.14::contig_30::g.14::m.14 type:internal len:160 contig_30:1-477(+)\n+AAGGCTAGTGGACTTTTTTCAAGTAAAGATTCTGTTGATTCAACAGATAGCCACATGGAAAATGTATGCTCTGATATTTTATCAATGAGCATCAACAAAAACCAAATTTTAGAGAATGGCCATGTTCATAACAATAGAGGAGCAGCAATATTTGAGTTGTCTGGAGCTGCTACAAATGTAATTGAGGACGTTGGTTTGTCAGATGTCCAGTCTGACACCAGATTGGGAATGGCAAGTCAAGTTTGCCAAGTTGATATGCATGAACCAGAGGGCGGTTTGTTGTCTTTTAAGAACCAAAGATTTAAGGATGCTGAGGTTGCTACCAATATAACACATGATTATTGTCATGTATCTCATCTGTTAAAGCATTCTAACGTTCAAGTTCCTAAGTACATTAGTGGTAATGGTTCAGCCACTGTTGATCTGAATAGGCAGACTGTAGATAGGAATAACAATTTTAAAGTTTCAACATCTAAT\n+>Gene.15::contig_31::g.15::m.15 type:3prime_partial len:263 contig_31:787-2(-)\n+ATGACTAGATCCCGCCCACACATAGAGAAATTAGAAAGTAGAGTGTCTCAGGATGATCTTTTGGAGGGCACTTGTGCAAGAACGAGGCCATTAAGCTATGATGAGATAATGCTCGGAAGAAATAACAAGGGAGATGCAGGTAAAGAAGTTGCCAGTGGCTCTGGAGTGGCTGATAGTGCATCAGGGCATGGTGACATTGAAAAGATTTCTAATTTTCCAGAATACCATCGGCAAATAAATGAGGTTTCTGAACGTATGGATGTTAGACATACTTCAAATGATTTTCAGAAGGTAAGTTGCCGGAGAAAAGTGGATAGTAAGAATGACAATTTAGTCCAGGATAAGGATGAAAAGTATAGGGAAAGTGGTGTCAAGTTAAAGAGTATATGGGAAAAGACTGAAGGCAGCAAGAGAGTGAGAGAAGGAGAAAATGAAAGACGACATTACAGCAACAGAAAAAGAGATGGCCATTTAGGTGTGGATTCATATAATGGATCAAATAAGAGGCAAGCCAGAGAGTCATATAGAAAGGGTAAAACTTCTGAAAGAGGCAGAGTCAAATCCGAAATAGATAGAAAACAGCTTGTTAACGATGAAGGACAGGTCCACAGGAAAAGGAAAACAGAACGGCGAATGAGCAGTGACTCTGAGAAAGAGTACAAAAGAAGAGACGAAAGAAATGTGATGCATACAGAAAGATTAACTAACAGAGGCAGCCAGAAGTCTGAAAAAGAAAACAGACGCAAACGTCATAATGAGGAGGATAAAACTAAAAGCTTGAGTACG\n+>Gene.16::contig_33::g.16::m.16 type:internal len:206 contig_33:615-1(-)\n+GAAGAAATTAATGAAGTTGTGGCATTTCTACAAAATCCTCGTGCATTCCAAGAAATGGGAGCTCGGGCACCTCGGGGTGTTCTTATTGTTGGTGAGATAGGAACAGGTAAAACATCCCTAGCAATGGCTATAGCAGCGGAAGCGAAGGTTCCTTTAGTTGAAGTAAAAGCCCAACAGCTGGAAGCTGGTCTGTTTGTGGGCCAAGGTGCATCAAACGTTAGAGAATTGTTTCAAGCAGCACGGGATCTGGCTCCTGTCATTATATTTGTGGAAGACTTTGACCTCTTTGCTGGTGTCCGCGGGAAGGACCTTCAGACCAAAAAGCTGGATCACGAATCTTTCATCAATCAACTTCTGGTGGAGCTTGATGGGTTTGAGAAACAAGATGGGGTAGTTTTGATGGCTACTACTCGGAATCTTAAACAAATCGATGGGGCTTTACTGCGTCCTGGTAGAATGGATCGAATATTTCATCTTCAGCGACTAACTCAAGCAGAAAGGAGGAAAATCCTTCTTCTTGCTGCAAAAGAATCAATGGATGAAGAGCTCATTGACTTTGTCGATTGGCAAAAGGTTGCTGAAAAGACAGCATTACTTCGCCCTATAGAATTAAAA\n+>Gene.17::contig_35::g.17::m.17 type:internal len:131 contig_35:2-391(+)\n+ATTCACAGCCTTCTCTTTCATCTCGAGATTGTCTACCTTCTGCAGATCGATATGTCGATGCTTTACAGAGGGATATCGAGGAAGGATAAACCTAGAGGACGTCAGCACGGGTTGACTCAACAGAAAAGGCAAGAGATAAAGGAAGCTTTTGAACTGTTTGACACCGATGGATCTGGAACTATTGATGCGAAAGAGTTGAACGTAGCAATGAGGGCTCTCGGTTTTGAAATGTCAGAAGAGGAAATTACGAGAATGATAGCTGAAGTAGACAAAGACGGGAGTGGGGCGATTGACTTTGACGAGTTTTGTCACATGATGACAGCCAAATTCGGAGAAAGGGACACCAAAGAGGAGCTTACCAAGGCTTTTCAGATTATCGACAAAGATCAA\n'
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts_tgf.cleaned.nr.cds
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts_tgf.cleaned.nr.cds Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,102 @@
+>contig_1
+AAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTACACGAGTGGAAAGGTGCTAACAAT
+GGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGATGCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGAATA
+CCGGCATACAGTGCAGTCTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTAGCT
+ACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAACAAGCAAGATCGGCCATAATTGG
+TCATGTTGTACACATGGTTAAT
+>contig_11
+ATTTATGGAAAGGGGATTCAAAATATGAGAACACTCTGGGGTGGCGTTGCTCCCGAATCTCCCGTCGTTGTTGTTGGAGG
+CGGTGTTACTCCAGAATCTCCCGTCGTCGGAGGCATTGTTCTAGGATCTCCCGCAGTGCTGCTTAAGTCGGATTTACTCC
+AAGCTCTCCCACCGCGGCTCCTGAAGTCGGCGTTGTTCCAAAATCTCCCGTCGCCACTGTTGAAGTCGGGGTTGCTCCAT
+GATATCCCGTCGGAGTTGCTCCGAATCTCCCGTTGTTGCTGCTGCAGTCGGTGTTGCTCCAGAAACTCCCGCTGTGGCTA
+T
+>contig_18
+ACAGAGGTGAGAAGATGGAACAAGCAGGAAGATTGGGGTAGAAAGACATGGAAAGAAGCTAAGGAATCCACATTGCCGAA
+AATAGTAGGTGAAGGGATCTATGGAGTTGGTCCCATTTTAGCTGCACTCTCATCCGGGCGAAGAGAACTCTACGCGTTGT
+ACGTTCAGGAAGGTTTGGATTTGAGTAGTAACAGTAAGAAGAAGGACAAGAAACGGTTCGAGAGAGTTTTGAAAATGGTG
+GAAAAGATTGGATTAAGCAAAAAAGAGGTATCCAAACACGACCTCAACATGGTCGTTGATAATAGGCCTCACCAGGGCTT
+GCTTCTTGATGCTTCGGCACTTGAAATGGTTAGTATAAAGGAATTAGACCCCGTTTCCATTGATGGAGAGAAGTGCCCGC
+TTTGGTTGGCATTGGATGAGGTTACC
+>contig_2
+CTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGACTA
+CGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCA
+TGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATGG
+GTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTCGGA
+GATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGG
+GTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACAA
+CTGTTAAATTATGTGCGCTTTGATGAT
+>contig_20
+GGCTTACGCATCGATGCTCACGGATATGCCCTACAAGATTGGCAACGCGATCTGTTTCAACTTGATCTTGTACTTCATGA
+CGAACCTTCGCCGCGAACCAGGAGCATTCTTCTTCTTCCTGCTGATATCGTTCACGCTGACGCTTGTCATGTCGATGCTC
+TTCCGCACTATTGGATCCGTTTCGCGTACACTGTCGCAGGCCATGGCTCCCGCAGCTATCCTTATCCTTGCGCTGGTCAT
+TTACACTGGTTTCGTCATTCCTACCCGCTACATGTTGGGCTGGTCTCGTTGGATCAACTACTTGGATCA
+>contig_21
+GTTTCCATAGTGAATCAGGAACCTGTTCTATTCTCGGTTTCCATTGGAGAAAATATTGCTTACGGACTCCCAGATGATTA
+TGTTTCCAAGGTCGATGTGATAAAAGCTGCTAAAGCAGCTAATGCTCATGAATTTATCATTGCGTTGCCACAGGGTTATG
+ACACATTAGTTGGTGAGCGTGGTGGGTTGTTAAGTGGAGGACAAAGACAGAGGGTAGCCATTGCAAGGGCTCTGCTCAAG
+AATGCTCCAATCTTGATTCTTGATGAGGCTACCAGTGCTTTGGACGCAGTCAGTGAACGTCTGGTTCAGGATGCTTTGAA
+CCGTTTGATGAAAGGAAGAACGACTTTAGTGATTGCTCACAGGCTGAGCACCGTTCAAAATGCTGATCAAATTGCTTTAT
+GCTCTGATGGGAAGATTTCAGAACTGGGGACACACTCTGAGTTGTTAGAGCAAAAGGGTCTTTATGCCTCACTGGTTGGC
+ACCCAAAGACTTGCATTCGAG
+>contig_24
+AAAACCCGTGAATTAGAAATCGAACAAATCTTGTTAAAATCCAAGGATTTGGAAAGCGAATTGGAGAGCAAGGGTAGTAT
+GTTTATAAAGGAGACTGAGGCACTTGTTGCAGAAAACTCGAAACTCAGTCAGGAGTTGGGTGCATTTAAATCCGAGCTAA
+ACGATATACAGATGAAATTGAACGTTGTTTCATCTGAGAAAGACGGCACTGTTGAAGAACTAACTAGTGCAAGAAAAGAA
+ATAGAAGAGCTGACTCAGAAGCTTGCTTCTGAAGGACAAAAGCTGCAGTCTCAGATATCTTCTATAATGGAAGAGAACAA
+TTTACTTAAC
+>contig_29
+ATGAAGAAATCGAAGCTTCTGCAGAATTCGAAGGACTTACTTTCTAGGAGCTTCAATCCTGCTAAATGCAAAACGTCTCT
+GAGGCTGGCGGGTTCAAGGTTGAAGCTATTGAGAAACAAGAAAGAGGTGCAACTGAAGCAGATGAAGCGGGAAATAGCAC
+AGTTGCTCGAGTCTGGACAGGATCAGACCGCTCGAATTCGAGTCGAACATCTGATTAGGGAAGAGAAGATGATGGCTGCA
+TATGATCTCCTCGATATATACTGCGAGCTTGTTGTTGCACGTCTTCCAGTAATTGAGTCACAAAAGAATTGCCCCATTGA
+CCTGAAAGAAGCAATTGCAAGTTTAGTATTTGCAGCACCAAGATGTGGAGATGTACCCGAGTTTCTTGATGCAAGGAAGC
+AATTTTCAGCTAAATATGGAAAAGATTTCACTACTGCAGCCACAGAACTTCGTCCACAGTGCGGTGTAGGCCGCATGTTG
+GTTGAAAAATTATCTGCAACAGCGCCTGATGTACAGAGCAAAACTAAAATCTTGAATGCAATAGCTGAGGAGCACAATGT
+TAAATGGGAACCCACATCATTTGGAGAGAACGACTCTGCACCTCTCAATGACCGACTGACTGGACCAAGTTCCTTTACGA
+AGGAGAGAGAACAGTACCCTGAACCTCCTCATTTTGAAGCTGTACAAGTCCAAGCTCACCCGAGCAACAACACCTTGCAT
+AGCTCGCCATCAAATTCTTCCCAGCAGGATGATAGAACCTCAGTTTTTGTTGAAACTTCTACTTTCAAGACACCATCG
+>contig_3
+GTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAGAGC
+TCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATACTT
+TCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGCACA
+ATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGTTAC
+AGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAAC
+>contig_30
+AAGGCTAGTGGACTTTTTTCAAGTAAAGATTCTGTTGATTCAACAGATAGCCACATGGAAAATGTATGCTCTGATATTTT
+ATCAATGAGCATCAACAAAAACCAAATTTTAGAGAATGGCCATGTTCATAACAATAGAGGAGCAGCAATATTTGAGTTGT
+CTGGAGCTGCTACAAATGTAATTGAGGACGTTGGTTTGTCAGATGTCCAGTCTGACACCAGATTGGGAATGGCAAGTCAA
+GTTTGCCAAGTTGATATGCATGAACCAGAGGGCGGTTTGTTGTCTTTTAAGAACCAAAGATTTAAGGATGCTGAGGTTGC
+TACCAATATAACACATGATTATTGTCATGTATCTCATCTGTTAAAGCATTCTAACGTTCAAGTTCCTAAGTACATTAGTG
+GTAATGGTTCAGCCACTGTTGATCTGAATAGGCAGACTGTAGATAGGAATAACAATTTTAAAGTTTCAACATCTAAT
+>contig_31
+ATGACTAGATCCCGCCCACACATAGAGAAATTAGAAAGTAGAGTGTCTCAGGATGATCTTTTGGAGGGCACTTGTGCAAG
+AACGAGGCCATTAAGCTATGATGAGATAATGCTCGGAAGAAATAACAAGGGAGATGCAGGTAAAGAAGTTGCCAGTGGCT
+CTGGAGTGGCTGATAGTGCATCAGGGCATGGTGACATTGAAAAGATTTCTAATTTTCCAGAATACCATCGGCAAATAAAT
+GAGGTTTCTGAACGTATGGATGTTAGACATACTTCAAATGATTTTCAGAAGGTAAGTTGCCGGAGAAAAGTGGATAGTAA
+GAATGACAATTTAGTCCAGGATAAGGATGAAAAGTATAGGGAAAGTGGTGTCAAGTTAAAGAGTATATGGGAAAAGACTG
+AAGGCAGCAAGAGAGTGAGAGAAGGAGAAAATGAAAGACGACATTACAGCAACAGAAAAAGAGATGGCCATTTAGGTGTG
+GATTCATATAATGGATCAAATAAGAGGCAAGCCAGAGAGTCATATAGAAAGGGTAAAACTTCTGAAAGAGGCAGAGTCAA
+ATCCGAAATAGATAGAAAACAGCTTGTTAACGATGAAGGACAGGTCCACAGGAAAAGGAAAACAGAACGGCGAATGAGCA
+GTGACTCTGAGAAAGAGTACAAAAGAAGAGACGAAAGAAATGTGATGCATACAGAAAGATTAACTAACAGAGGCAGCCAG
+AAGTCTGAAAAAGAAAACAGACGCAAACGTCATAATGAGGAGGATAAAACTAAAAGCTTGAGTACG
+>contig_33
+GAAGAAATTAATGAAGTTGTGGCATTTCTACAAAATCCTCGTGCATTCCAAGAAATGGGAGCTCGGGCACCTCGGGGTGT
+TCTTATTGTTGGTGAGATAGGAACAGGTAAAACATCCCTAGCAATGGCTATAGCAGCGGAAGCGAAGGTTCCTTTAGTTG
+AAGTAAAAGCCCAACAGCTGGAAGCTGGTCTGTTTGTGGGCCAAGGTGCATCAAACGTTAGAGAATTGTTTCAAGCAGCA
+CGGGATCTGGCTCCTGTCATTATATTTGTGGAAGACTTTGACCTCTTTGCTGGTGTCCGCGGGAAGGACCTTCAGACCAA
+AAAGCTGGATCACGAATCTTTCATCAATCAACTTCTGGTGGAGCTTGATGGGTTTGAGAAACAAGATGGGGTAGTTTTGA
+TGGCTACTACTCGGAATCTTAAACAAATCGATGGGGCTTTACTGCGTCCTGGTAGAATGGATCGAATATTTCATCTTCAG
+CGACTAACTCAAGCAGAAAGGAGGAAAATCCTTCTTCTTGCTGCAAAAGAATCAATGGATGAAGAGCTCATTGACTTTGT
+CGATTGGCAAAAGGTTGCTGAAAAGACAGCATTACTTCGCCCTATAGAATTAAAA
+>contig_35
+ATTCACAGCCTTCTCTTTCATCTCGAGATTGTCTACCTTCTGCAGATCGATATGTCGATGCTTTACAGAGGGATATCGAG
+GAAGGATAAACCTAGAGGACGTCAGCACGGGTTGACTCAACAGAAAAGGCAAGAGATAAAGGAAGCTTTTGAACTGTTTG
+ACACCGATGGATCTGGAACTATTGATGCGAAAGAGTTGAACGTAGCAATGAGGGCTCTCGGTTTTGAAATGTCAGAAGAG
+GAAATTACGAGAATGATAGCTGAAGTAGACAAAGACGGGAGTGGGGCGATTGACTTTGACGAGTTTTGTCACATGATGAC
+AGCCAAATTCGGAGAAAGGGACACCAAAGAGGAGCTTACCAAGGCTTTTCAGATTATCGACAAAGATCAA
+>contig_9
+CTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCATGT
+GGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAA
+AACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAACA
+GCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGCTGA
+ACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTT
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts_tgf.cleaned.nr.pep
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts_tgf.cleaned.nr.pep Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,49 @@
+>contig_1
+KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLA
+TPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN
+>contig_11
+IYGKGIQNMRTLWGGVAPESPVVVVGGGVTPESPVVGGIVLGSPAVLLKSDLLQALPPRLLKSALFQNLPSPLLKSGLLH
+DIPSELLRISRCCCCSRCCSRNSRCGY
+>contig_18
+TEVRRWNKQEDWGRKTWKEAKESTLPKIVGEGIYGVGPILAALSSGRRELYALYVQEGLDLSSNSKKKDKKRFERVLKMV
+EKIGLSKKEVSKHDLNMVVDNRPHQGLLLDASALEMVSIKELDPVSIDGEKCPLWLALDEVT
+>contig_2
+LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW
+VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ
+LLNYVRFDD
+>contig_20
+GLRIDAHGYALQDWQRDLFQLDLVLHDEPSPRTRSILLLPADIVHADACHVDALPHYWIRFAYTVAGHGSRSYPYPCAGH
+LHWFRHSYPLHVGLVSLDQLLGS
+>contig_21
+VSIVNQEPVLFSVSIGENIAYGLPDDYVSKVDVIKAAKAANAHEFIIALPQGYDTLVGERGGLLSGGQRQRVAIARALLK
+NAPILILDEATSALDAVSERLVQDALNRLMKGRTTLVIAHRLSTVQNADQIALCSDGKISELGTHSELLEQKGLYASLVG
+TQRLAFE
+>contig_24
+KTRELEIEQILLKSKDLESELESKGSMFIKETEALVAENSKLSQELGAFKSELNDIQMKLNVVSSEKDGTVEELTSARKE
+IEELTQKLASEGQKLQSQISSIMEENNLLN
+>contig_29
+MKKSKLLQNSKDLLSRSFNPAKCKTSLRLAGSRLKLLRNKKEVQLKQMKREIAQLLESGQDQTARIRVEHLIREEKMMAA
+YDLLDIYCELVVARLPVIESQKNCPIDLKEAIASLVFAAPRCGDVPEFLDARKQFSAKYGKDFTTAATELRPQCGVGRML
+VEKLSATAPDVQSKTKILNAIAEEHNVKWEPTSFGENDSAPLNDRLTGPSSFTKEREQYPEPPHFEAVQVQAHPSNNTLH
+SSPSNSSQQDDRTSVFVETSTFKTPS
+>contig_3
+VDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKGT
+IESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN
+>contig_30
+KASGLFSSKDSVDSTDSHMENVCSDILSMSINKNQILENGHVHNNRGAAIFELSGAATNVIEDVGLSDVQSDTRLGMASQ
+VCQVDMHEPEGGLLSFKNQRFKDAEVATNITHDYCHVSHLLKHSNVQVPKYISGNGSATVDLNRQTVDRNNNFKVSTSN
+>contig_31
+MTRSRPHIEKLESRVSQDDLLEGTCARTRPLSYDEIMLGRNNKGDAGKEVASGSGVADSASGHGDIEKISNFPEYHRQIN
+EVSERMDVRHTSNDFQKVSCRRKVDSKNDNLVQDKDEKYRESGVKLKSIWEKTEGSKRVREGENERRHYSNRKRDGHLGV
+DSYNGSNKRQARESYRKGKTSERGRVKSEIDRKQLVNDEGQVHRKRKTERRMSSDSEKEYKRRDERNVMHTERLTNRGSQ
+KSEKENRRKRHNEEDKTKSLST
+>contig_33
+EEINEVVAFLQNPRAFQEMGARAPRGVLIVGEIGTGKTSLAMAIAAEAKVPLVEVKAQQLEAGLFVGQGASNVRELFQAA
+RDLAPVIIFVEDFDLFAGVRGKDLQTKKLDHESFINQLLVELDGFEKQDGVVLMATTRNLKQIDGALLRPGRMDRIFHLQ
+RLTQAERRKILLLAAKESMDEELIDFVDWQKVAEKTALLRPIELK
+>contig_35
+IHSLLFHLEIVYLLQIDMSMLYRGISRKDKPRGRQHGLTQQKRQEIKEAFELFDTDGSGTIDAKELNVAMRALGFEMSEE
+EITRMIAEVDKDGSGAIDFDEFCHMMTAKFGERDTKEELTKAFQIIDKDQ
+>contig_9
+LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT
+AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
b
diff -r 000000000000 -r fcc558568020 test-data/transcripts_tgf.pep
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts_tgf.pep Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,34 @@
+>Gene.1::contig_1::g.1::m.1 type:internal len:115 gc:universal contig_1:344-3(-)
+KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLATPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN
+>Gene.2::contig_2::g.2::m.2 type:5prime_partial len:170 gc:universal contig_2:2-511(+)
+LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKWVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQLLNYVRFDD*
+>Gene.3::contig_3::g.3::m.3 type:5prime_partial len:126 gc:universal contig_3:463-86(-)
+VDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKGTIESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN*
+>Gene.4::contig_9::g.4::m.4 type:internal len:132 gc:universal contig_9:2-394(+)
+LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRTAHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
+>Gene.5::contig_11::g.5::m.5 type:5prime_partial len:108 gc:universal contig_11:2-325(+)
+IYGKGIQNMRTLWGGVAPESPVVVVGGGVTPESPVVGGIVLGSPAVLLKSDLLQALPPRLLKSALFQNLPSPLLKSGLLHDIPSELLRISRCCCCSRCCSRNSRCGY*
+>Gene.6::contig_11::g.6::m.6 type:5prime_partial len:100 gc:universal contig_11:443-144(-)
+AAKVTVAPGSRVAAAGSGVASESPATAEAGVAPGSPAVDFNSHSGSFWSNTDCSSNNGRFGATPTGYHGATPTSTVATGDFGTTPTSGAAVGELGVNPT*
+>Gene.7::contig_18::g.7::m.7 type:internal len:143 gc:universal contig_18:426-1(-)
+TEVRRWNKQEDWGRKTWKEAKESTLPKIVGEGIYGVGPILAALSSGRRELYALYVQEGLDLSSNSKKKDKKRFERVLKMVEKIGLSKKEVSKHDLNMVVDNRPHQGLLLDASALEMVSIKELDPVSIDGEKCPLWLALDEVT
+>Gene.8::contig_20::g.8::m.8 type:internal len:104 gc:universal contig_20:1-309(+)
+GLRIDAHGYALQDWQRDLFQLDLVLHDEPSPRTRSILLLPADIVHADACHVDALPHYWIRFAYTVAGHGSRSYPYPCAGHLHWFRHSYPLHVGLVSLDQLLGS
+>Gene.9::contig_20::g.9::m.9 type:internal len:103 gc:universal contig_20:2-307(+)
+AYASMLTDMPYKIGNAICFNLILYFMTNLRREPGAFFFFLLISFTLTLVMSMLFRTIGSVSRTLSQAMAPAAILILALVIYTGFVIPTRYMLGWSRWINYLD
+>Gene.10::contig_20::g.10::m.10 type:internal len:103 gc:universal contig_20:307-2(-)
+IQVVDPTRPAQHVAGRNDETSVNDQRKDKDSCGSHGLRQCTRNGSNSAEEHRHDKRQRERYQQEEEECSWFAAKVRHEVQDQVETDRVANLVGHIREHRCVS
+>Gene.11::contig_21::g.11::m.11 type:5prime_partial len:168 gc:universal contig_21:519-16(-)
+VSIVNQEPVLFSVSIGENIAYGLPDDYVSKVDVIKAAKAANAHEFIIALPQGYDTLVGERGGLLSGGQRQRVAIARALLKNAPILILDEATSALDAVSERLVQDALNRLMKGRTTLVIAHRLSTVQNADQIALCSDGKISELGTHSELLEQKGLYASLVGTQRLAFE*
+>Gene.12::contig_24::g.12::m.12 type:internal len:111 gc:universal contig_24:331-2(-)
+KTRELEIEQILLKSKDLESELESKGSMFIKETEALVAENSKLSQELGAFKSELNDIQMKLNVVSSEKDGTVEELTSARKEIEELTQKLASEGQKLQSQISSIMEENNLLN
+>Gene.13::contig_29::g.13::m.13 type:3prime_partial len:267 gc:universal contig_29:52-849(+)
+MKKSKLLQNSKDLLSRSFNPAKCKTSLRLAGSRLKLLRNKKEVQLKQMKREIAQLLESGQDQTARIRVEHLIREEKMMAAYDLLDIYCELVVARLPVIESQKNCPIDLKEAIASLVFAAPRCGDVPEFLDARKQFSAKYGKDFTTAATELRPQCGVGRMLVEKLSATAPDVQSKTKILNAIAEEHNVKWEPTSFGENDSAPLNDRLTGPSSFTKEREQYPEPPHFEAVQVQAHPSNNTLHSSPSNSSQQDDRTSVFVETSTFKTPS
+>Gene.14::contig_30::g.14::m.14 type:internal len:160 gc:universal contig_30:1-477(+)
+KASGLFSSKDSVDSTDSHMENVCSDILSMSINKNQILENGHVHNNRGAAIFELSGAATNVIEDVGLSDVQSDTRLGMASQVCQVDMHEPEGGLLSFKNQRFKDAEVATNITHDYCHVSHLLKHSNVQVPKYISGNGSATVDLNRQTVDRNNNFKVSTSN
+>Gene.15::contig_31::g.15::m.15 type:3prime_partial len:263 gc:universal contig_31:787-2(-)
+MTRSRPHIEKLESRVSQDDLLEGTCARTRPLSYDEIMLGRNNKGDAGKEVASGSGVADSASGHGDIEKISNFPEYHRQINEVSERMDVRHTSNDFQKVSCRRKVDSKNDNLVQDKDEKYRESGVKLKSIWEKTEGSKRVREGENERRHYSNRKRDGHLGVDSYNGSNKRQARESYRKGKTSERGRVKSEIDRKQLVNDEGQVHRKRKTERRMSSDSEKEYKRRDERNVMHTERLTNRGSQKSEKENRRKRHNEEDKTKSLST
+>Gene.16::contig_33::g.16::m.16 type:internal len:206 gc:universal contig_33:615-1(-)
+EEINEVVAFLQNPRAFQEMGARAPRGVLIVGEIGTGKTSLAMAIAAEAKVPLVEVKAQQLEAGLFVGQGASNVRELFQAARDLAPVIIFVEDFDLFAGVRGKDLQTKKLDHESFINQLLVELDGFEKQDGVVLMATTRNLKQIDGALLRPGRMDRIFHLQRLTQAERRKILLLAAKESMDEELIDFVDWQKVAEKTALLRPIELK
+>Gene.17::contig_35::g.17::m.17 type:internal len:131 gc:universal contig_35:2-391(+)
+IHSLLFHLEIVYLLQIDMSMLYRGISRKDKPRGRQHGLTQQKRQEIKEAFELFDTDGSGTIDAKELNVAMRALGFEMSEEEITRMIAEVDKDGSGAIDFDEFCHMMTAKFGERDTKEELTKAFQIIDKDQ
b
diff -r 000000000000 -r fcc558568020 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="plant_tribes_scaffolds" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="tool-data/plant_tribes_scaffolds.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r fcc558568020 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Thu Jun 08 12:44:09 2017 -0400
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="plant_tribes_scaffolds" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="${__HERE__}/plant_tribes_scaffolds.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r fcc558568020 utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py Thu Jun 08 12:44:09 2017 -0400
[
@@ -0,0 +1,80 @@
+import os
+import shutil
+import subprocess
+import sys
+
+FSTDERR = 'stderr.txt'
+FSTDOUT = 'stdout.txt'
+
+
+def check_execution_errors(rc, fstderr, fstdout):
+    if rc != 0:
+        fh = open(fstdout, 'rb')
+        out_msg = fh.read()
+        fh.close()
+        fh = open(fstderr, 'rb')
+        err_msg = fh.read()
+        fh.close()
+        msg = '%s\n%s\n' % (str(out_msg), str(err_msg))
+        stop_err(msg)
+
+
+def get_response_buffers():
+    fstderr = os.path.join(os.getcwd(), FSTDERR)
+    fherr = open(fstderr, 'wb')
+    fstdout = os.path.join(os.getcwd(), FSTDOUT)
+    fhout = open(fstdout, 'wb')
+    return fstderr, fherr, fstdout, fhout
+
+
+def move_directory_files(source_dir, destination_dir, copy=False):
+    source_directory = os.path.abspath(source_dir)
+    destination_directory = os.path.abspath(destination_dir)
+    if not os.path.isdir(destination_directory):
+        os.makedirs(destination_directory)
+    for dir_entry in os.listdir(source_directory):
+        source_entry = os.path.join(source_directory, dir_entry)
+        if copy:
+            shutil.copy(source_entry, destination_directory)
+        else:
+            shutil.move(source_entry, destination_directory)
+
+
+def run_command(cmd):
+    fstderr, fherr, fstdout, fhout = get_response_buffers()
+    proc = subprocess.Popen(args=cmd, stderr=fherr, stdout=fhout, shell=True)
+    rc = proc.wait()
+    # Check results.
+    fherr.close()
+    fhout.close()
+    check_execution_errors(rc, fstderr, fstdout)
+
+
+def stop_err(msg):
+    sys.exit(msg)
+
+
+def write_html_output(output, title, dir):
+    with open(output, 'w') as fh:
+        dir_items = sorted(os.listdir(dir))
+        # Directories can only contain either files or directories,
+        # but not both.
+        if len(dir_items) > 0:
+            item_path = os.path.join(dir, dir_items[0])
+            if os.path.isdir(item_path):
+                header = 'Directories'
+            else:
+                header = 'Datasets'
+        else:
+            header = ''
+        fh.write('<html><head><h3>%s: %d items</h3></head>\n' % (title, len(dir_items)))
+        fh.write('<body><p/><table cellpadding="2">\n')
+        fh.write('<tr><b>%s</th></b>\n' % header)
+        for index, fname in enumerate(dir_items):
+            if index % 2 == 0:
+                bgcolor = '#D8D8D8'
+            else:
+                bgcolor = '#FFFFFF'
+            link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname)
+            fh.write('<tr bgcolor="%s"><td>%s</td></tr>\n' % (bgcolor, link))
+        fh.write('</table></body></html>\n')