changeset 0:f8603464bea7 draft

Uploaded
author greg
date Thu, 08 Jun 2017 12:48:23 -0400
parents
children 95cb5d32a3b4
files .shed.yml gene_family_classifier.py gene_family_classifier.xml macros.xml plant_tribes_scaffolds.loc plant_tribes_scaffolds.loc.sample test-data/output.ptorthocs test-data/proteins.blastp.22Gv1.1 test-data/proteins.blastp.22Gv1.1.bestOrthos test-data/proteins.both.22Gv1.1.bestOrthos test-data/proteins.both.22Gv1.1.bestOrthos.summary test-data/proteins.hmmscan.22Gv1.1 test-data/proteins.hmmscan.22Gv1.1.bestOrthos test-data/tool-data/plant_tribes/scaffolds/README.txt test-data/transcripts.cleaned.nr.cds test-data/transcripts.cleaned.nr.pep tool_data_table_conf.xml.sample tool_data_table_conf.xml.test utils.py
diffstat 19 files changed, 901 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,15 @@
+name: plant_tribes_gene_family_classifier
+owner: greg
+description: |
+  Contains a tool that classifies gene sequences into precomputed orthologous gene family clusters using either
+  blastp (faster), HMMScan (slower but more sensitive to remote homologs) or both (more exhaustive).
+homepage_url: https://github.com/dePamphilis/PlantTribes
+long_description: |
+  Contains a tool that tool is one of the PlantTribes collection of automated modular analysis pipelines that
+  utilize objective classifications of complete protein sequences from sequenced plant genomes to perform
+  comparative evolutionary studies.  This tool classifies gene sequences into precomputed orthologous gene family
+  clusters using either blastp (faster), HMMScan (slower but more sensitive to remote homologs) or both (more exhaustive).
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/gene_family_classifier
+type: unrestricted
+categories:
+- Phylogenetics
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_classifier.py	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+import argparse
+import os
+import shutil
+
+import utils
+
+
+BUFF_SIZE = 1048576
+OUTPUT_DIR = 'geneFamilyClassification_dir'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--input', dest='input', help='Input dataset')
+parser.add_argument('--scaffold', dest='scaffold', help='Orthogroups or gene families proteins scaffold')
+parser.add_argument('--method', dest='method', help='Protein clustering method')
+parser.add_argument('--classifier', dest='classifier', help='Protein classification method')
+parser.add_argument('--config_dir', dest='config_dir', help='Directory containing default configuration files')
+parser.add_argument('--num_threads', dest='num_threads', type=int, help='Number of threads to use for execution')
+parser.add_argument('--super_orthogroups', dest='super_orthogroups', default=None, help='Super orthogroups clustering specification')
+parser.add_argument('--single_copy_custom', dest='single_copy_custom', default=None, help='Custom single copy orthogroup configuration')
+parser.add_argument('--single_copy_taxa', dest='single_copy_taxa', type=int, default=0, help='Minimum single copy taxa required in orthogroup')
+parser.add_argument('--taxa_present', dest='taxa_present', type=int, default=0, help='Minimum taxa required in single copy orthogroup')
+parser.add_argument('--orthogroup_fasta', dest='orthogroup_fasta', default=None, help='Flag to create orthogroup sequences')
+parser.add_argument('--coding_sequences', dest='coding_sequences', default=None, help='Flag to create orthogroup coding sequences')
+parser.add_argument('--save_hmmscan_log', dest='save_hmmscan_log', default=None, help='Flag to save the hmmscan log')
+parser.add_argument('--hmmscan_log', dest='hmmscan_log', default=None, help='hmmscan log file')
+parser.add_argument('--output_ptortho', dest='output_ptortho', default=None, help='Output for orthogroups')
+parser.add_argument('--output_ptortho_dir', dest='output_ptortho_dir', default=None, help='output_ptortho.files_path')
+parser.add_argument('--output_ptorthocs', dest='output_ptorthocs', default=None, help='Output for orthogroups with corresponding coding sequences')
+parser.add_argument('--output_ptorthocs_dir', dest='output_ptorthocs_dir', default=None, help='output_ptorthocs.files_path')
+parser.add_argument('--output_ptsco', dest='output_ptsco', default=None, help='Output for single copy orthogroups')
+parser.add_argument('--output_ptsco_dir', dest='output_ptsco_dir', default=None, help='output_ptsco.files_path')
+
+args = parser.parse_args()
+
+# Build the command line.
+cmd = 'GeneFamilyClassifier'
+cmd += ' --proteins %s' % args.input
+cmd += ' --scaffold %s' % args.scaffold
+cmd += ' --method %s' % args.method
+cmd += ' --classifier %s' % args.classifier
+cmd += ' --config_dir %s' % args.config_dir
+cmd += ' --num_threads %d' % args.num_threads
+if args.super_orthogroups is not None:
+    cmd += ' --super_orthogroups %s' % args.super_orthogroups
+if args.single_copy_custom is not None:
+    cmd += ' --single_copy_custom %s' % args.single_copy_custom
+if args.single_copy_taxa > 0:
+    cmd += ' --single_copy_taxa %d' % args.single_copy_taxa
+if args.taxa_present > 0:
+    cmd += ' --taxa_present %d' % args.taxa_present
+if args.orthogroup_fasta is None:
+    create_ortho_sequences = False
+else:
+    create_ortho_sequences = True
+    cmd += ' --orthogroup_fasta'
+if args.coding_sequences is None:
+    create_corresponding_coding_sequences = False
+else:
+    create_corresponding_coding_sequences = True
+    cmd += ' --coding_sequences %s' % args.coding_sequences
+
+# Run the command.
+utils.run_command(cmd)
+
+# Handle hmmscan.log output.
+if args.classifier in ['hmmscan', 'both']:
+    src_hmmscan_log = os.path.join(OUTPUT_DIR, 'hmmscan.log')
+    if os.path.exists(src_hmmscan_log):
+        if args.save_hmmscan_log is None:
+            os.remove(src_hmmscan_log)
+        else:
+            shutil.move(src_hmmscan_log, args.hmmscan_log)
+
+# Handle orthogroups outputs.
+if create_ortho_sequences:
+    if create_corresponding_coding_sequences:
+        out_file = args.output_ptorthocs
+        orthogroups_fasta_dest_dir = args.output_ptorthocs_dir
+        title = 'Orthogroups and corresponding coding sequences files'
+    else:
+        out_file = args.output_ptortho
+        orthogroups_fasta_dest_dir = args.output_ptortho_dir
+        title = 'Orthogroups files'
+    orthogroups_fasta_src_dir = os.path.join(OUTPUT_DIR, 'orthogroups_fasta')
+    utils.move_directory_files(orthogroups_fasta_src_dir, orthogroups_fasta_dest_dir)
+    utils.write_html_output(out_file, title, orthogroups_fasta_dest_dir)
+
+# Handle single copy orthogroup outputs.
+if args.output_ptsco is not None:
+    single_copy_fasta_src_dir = os.path.join(OUTPUT_DIR, 'single_copy_fasta')
+    single_copy_fasta_dest_dir = args.output_ptsco_dir
+    title = 'Single copy orthogroups files'
+    utils.move_directory_files(single_copy_fasta_src_dir, single_copy_fasta_dest_dir)
+    utils.write_html_output(args.output_ptsco, title, single_copy_fasta_dest_dir)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_classifier.xml	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,366 @@
+<tool id="plant_tribes_gene_family_classifier" name="GeneFamilyClassifier" version="@WRAPPER_VERSION@.0">
+    <description>classifies gene sequences into pre-computed orthologous gene family clusters</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements_gene_family_classifier" />
+    <command detect_errors="exit_code"><![CDATA[
+#if str($options_type.options_type_selector) == 'advanced':
+    #set specify_super_orthogroups_cond = $options_type.specify_super_orthogroups_cond
+    #set specify_super_orthogroups = $specify_super_orthogroups_cond.specify_super_orthogroups
+    #set create_orthogroup_cond = $options_type.create_orthogroup_cond
+    #set create_orthogroup = $create_orthogroup_cond.create_orthogroup
+    #set specify_single_copy_cond = $options_type.specify_single_copy_cond
+    #set specify_single_copy = $specify_single_copy_cond.specify_single_copy
+    #if str($specify_super_orthogroups) == 'yes':
+        #set specify_super_orthos = True
+        #set super_orthogroups = $specify_super_orthogroups_cond.super_orthogroups
+    #else:
+        #set specify_super_orthos = False
+    #end if
+    #if str($create_orthogroup) == 'yes':
+        #set create_ortho_sequences = True
+        #set create_corresponding_coding_sequences_cond = $create_orthogroup_cond.create_corresponding_coding_sequences_cond
+        #if str($create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences) == 'yes':
+            #set create_corresponding_coding_sequences = True
+        #else:
+            #set create_corresponding_coding_sequences = False
+        #end if
+    #else:
+        #set create_ortho_sequences = False
+        #set create_corresponding_coding_sequences = False
+    #end if
+    #if str($specify_single_copy) == 'yes':
+        #set single_copy_orthogroup = True
+        #set single_copy_cond = $specify_single_copy_cond.single_copy_cond
+        #set single_copy = $single_copy_cond.single_copy
+    #else:
+        #set single_copy_orthogroup = False
+    #end if
+#else:
+    #set single_copy_orthogroup = False
+    #set create_ortho_sequences = False
+    #set create_corresponding_coding_sequences = False
+#end if
+
+python $__tool_directory__/gene_family_classifier.py
+--input '$input'
+--scaffold '$scaffold.fields.path'
+--method $method
+--classifier $save_hmmscan_log_cond.classifier
+--config_dir '$scaffold.fields.path'
+--num_threads \${GALAXY_SLOTS:-4}
+
+#if str($options_type.options_type_selector) == 'advanced':
+    #if specify_super_orthos:
+        --super_orthogroups $super_orthogroups
+    #end if
+    #if $single_copy_orthogroup:
+        #if str($single_copy) == 'custom':
+            #set single_copy_custom_cond = $single_copy_cond.single_copy_custom_cond
+            #set single_copy_custom = $single_copy_custom_cond.single_copy_custom
+            #if str($single_copy_custom) == 'no':
+                --single_copy_custom 'default'
+            #else:
+                --single_copy_custom '$single_copy_custom_cond.single_copy_custom_config'
+            #end if
+        #else:
+            #if str($single_copy_cond.single_copy_taxa):
+                --single_copy_taxa $single_copy_cond.single_copy_taxa
+            #end if
+            #if str($single_copy_cond.taxa_present):
+                --taxa_present $single_copy_cond.taxa_present
+            #end if
+        #end if
+    #end if
+    #if $create_ortho_sequences:
+        --orthogroup_fasta "true"
+        #if $create_corresponding_coding_sequences:
+            --coding_sequences '$create_corresponding_coding_sequences_cond.coding_sequences'
+        #end if
+    #end if
+#end if
+
+#if (str($save_hmmscan_log_cond.classifier) == 'hmmscan' or str($save_hmmscan_log_cond.classifier) == 'both') and str($save_hmmscan_log_cond.save_hmmscan_log) == 'yes':
+    --save_hmmscan_log 'true'
+    --hmmscan_log '$hmmscan_log'
+#end if
+#if $create_ortho_sequences:
+    #if $create_corresponding_coding_sequences:
+        --output_ptorthocs '$output_ptorthocs'
+        --output_ptorthocs_dir '$output_ptorthocs.files_path'
+    #else:
+        --output_ptortho '$output_ptortho'
+        --output_ptortho_dir '$output_ptortho.files_path'
+    #end if
+#end if
+#if $single_copy_orthogroup:
+    #if $create_ortho_sequences:
+        --output_ptsco '$output_ptsco'
+        --output_ptsco_dir '$output_ptsco.files_path'
+    #end if
+#end if
+    ]]></command>
+    <inputs>
+        <param name="input" format="fasta" type="data" label="Proteins fasta file"/>
+        <param name="scaffold" type="select" label="Gene family scaffold">
+            <options from_data_table="plant_tribes_scaffolds" />
+            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table."/>
+        </param>
+        <param name="method" type="select" label="Protein clustering method">
+            <option value="gfam" selected="true">GFam</option>
+            <option value="orthofinder">OrthoFinder</option>
+            <option value="orthomcl">OrthoMCL</option>
+        </param>
+        <conditional name="save_hmmscan_log_cond">
+            <param name="classifier" type="select" label="Protein classifier">
+                <option value="blastp" selected="true">blastp</option>
+                <option value="hmmscan">hmmscan</option>
+                <option value="both">Both blastp and hmmscan</option>
+            </param>
+            <when value="blastp" />
+            <when value="hmmscan">
+                <param name="save_hmmscan_log" type="select" label="Save hmmscan log?">
+                    <option value="no" selected="true">No</option>
+                    <option value="yes">Yes</option>
+                </param>
+            </when>
+            <when value="both">
+                <param name="save_hmmscan_log" type="select" label="Save hmmscan log?">
+                    <option value="no" selected="true">No</option>
+                    <option value="yes">Yes</option>
+                </param>
+            </when>
+        </conditional>
+        <conditional name="options_type">
+            <param name="options_type_selector" type="select" label="Options configuration">
+                <option value="basic" selected="true">Basic</option>
+                <option value="advanced">Advanced</option>
+            </param>
+            <when value="basic" />
+            <when value="advanced">
+                <conditional name="specify_super_orthogroups_cond">
+                    <param name="specify_super_orthogroups" type="select" label="Super orthogroups configuration">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no"/>
+                    <when value="yes">
+                        <param name="super_orthogroups" type="select" label="Clustering distance measure">
+                            <option value="min_evalue" selected="true">minimum e-value</option>
+                            <option value="avg_evalue">average e-value</option>
+                        </param>
+                    </when>
+                </conditional>
+                <conditional name="specify_single_copy_cond">
+                    <param name="specify_single_copy" type="select" label="Single copy orthogroups configuration">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no"/>
+                    <when value="yes">
+                        <conditional name="single_copy_cond">
+                            <param name="single_copy" type="select" label="Selection criterion">
+                                <option value="taxa" selected="true">Global selection</option>
+                                <option value="custom">Custom selection</option>
+                            </param>
+                            <when value="custom">
+                                <conditional name="single_copy_custom_cond">
+                                    <param name="single_copy_custom" type="select" label="Custom selection configuration">
+                                        <option value="no" selected="true">No</option>
+                                        <option value="yes">Yes</option>
+                                    </param>
+                                    <when value="no"/>
+                                    <when value="yes">
+                                        <param name="single_copy_custom_config" format="txt" type="data" label="Custom selection file"/>
+                                    </when>
+                                </conditional>
+                            </when>
+                            <when value="taxa">
+                                <param name="single_copy_taxa" type="integer" optional="true" min="0" label="Minimum single copy taxa"/>
+                                <param name="taxa_present" type="integer" optional="true" min="0" label="Minimum taxa present"/>
+                            </when>
+                        </conditional>
+                    </when>
+                </conditional>
+                <conditional name="create_orthogroup_cond">
+                    <param name="create_orthogroup" type="select" label="Orthogroups fasta configuration">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <conditional name="create_corresponding_coding_sequences_cond">
+                            <param name="create_corresponding_coding_sequences" type="select" label="Orthogroups coding sequences">
+                                <option value="no" selected="true">No</option>
+                                <option value="yes">Yes</option>
+                            </param>
+                            <when value="no" />
+                            <when value="yes">
+                                <param name="coding_sequences" format="fasta" type="data" label="Coding sequences fasta file"/>
+                            </when>
+                        </conditional>
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="hmmscan_log" format="txt" label="${tool.name} (hmmscan.log) on ${on_string}">
+            <filter>save_hmmscan_log_cond['classifier'] in ['hmmscan', 'both'] and save_hmmscan_log_cond['save_hmmscan_log'] == 'yes'</filter>
+        </data>
+        <data name="output_ptortho" format="ptortho" label="${tool.name} (gene family clusters) on ${on_string}">
+            <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['create_orthogroup_cond']['create_corresponding_coding_sequences_cond']['create_corresponding_coding_sequences'] == 'no'</filter>
+        </data>
+        <data name="output_ptorthocs" format="ptorthocs" label="${tool.name} (gene family clusters) on ${on_string}">
+            <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['create_orthogroup_cond']['create_corresponding_coding_sequences_cond']['create_corresponding_coding_sequences'] == 'yes'</filter>
+        </data>
+        <data name="output_ptsco" format="tabular" label="${tool.name} (single copy orthogroups) on ${on_string}">
+            <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['specify_single_copy_cond']['specify_single_copy'] == 'yes'</filter>
+            <change_format>
+                <when input="options_type.create_orthogroup_cond.create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences" value="no" format="ptortho" />
+                <when input="options_type.create_orthogroup_cond.create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences" value="yes" format="ptorthocs" />
+            </change_format>
+        </data>
+        <collection name="orthos" type="list">
+            <discover_datasets pattern="__name__" directory="geneFamilyClassification_dir" visible="false" ext="tabular" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="transcripts.cleaned.nr.pep" ftype="fasta"/>
+            <param name="scaffold" value="22Gv1.1"/>
+            <param name="method" value="orthomcl"/>
+            <param name="classifier" value="both"/>
+            <param name="options_type_selector" value="advanced"/>
+            <param name="create_orthogroup" value="yes"/>
+            <param name="create_corresponding_coding_sequences" value="yes"/>
+            <output name="output_ptorthocs" file="output.ptorthocs" ftype="ptorthocs"/>
+            <output_collection name="orthos" type="list">
+                <element name="proteins.blastp.22Gv1.1" file="proteins.blastp.22Gv1.1" ftype="tabular" compare="contains"/>
+                <element name="proteins.blastp.22Gv1.1.bestOrthos" file="proteins.blastp.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
+                <element name="proteins.both.22Gv1.1.bestOrthos" file="proteins.both.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
+                <element name="proteins.both.22Gv1.1.bestOrthos.summary" file="proteins.both.22Gv1.1.bestOrthos.summary" ftype="tabular" compare="contains"/>
+                <element name="proteins.hmmscan.22Gv1.1" file="proteins.hmmscan.22Gv1.1" ftype="tabular" compare="contains"/>
+                <element name="proteins.hmmscan.22Gv1.1.bestOrthos" file="proteins.hmmscan.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
+            </output_collection>
+        </test>
+    </tests>
+    <help>
+This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
+analyses of genome-scale gene families and transcriptomes. This tool classifies gene coding sequences either produced by
+the AssemblyPostProcessor tool or from an external source into pre-computed orthologous gene family clusters (orthogroups)
+of a PlantTribes scaffold.  Classified sequences are then assigned with the corresponding orthogroups’ metadata that includes
+gene counts of backbone taxa, super clusters (super orthogoups) at multiple stringencies, and functional annotations from
+sources such as Gene Ontology (GO), InterPro protein domains, TAIR, UniProtKB/TrEMBL, and UniProtKB/Swiss-Prot.  Additionally,
+sequences belonging to single/low-copy gene families that are mainly utilized in species tree inference can be determined.
+
+-----
+
+**Required options**
+
+ * **Proteins fasta file** - proteins fasta file either produced by the AssemblyPostProcessor tool or an external source selected from your history.
+ * **Gene family scaffold** - one of the PlantTribes gene family scaffolds [2-4] installed into Galaxy by the PlantTribes Scaffold Data Manager tool.
+ * **Protein clustering method** - gene family scaffold protein clustering method as described in the AssemblyPostProcessor tool.
+ * **Protein classifier** - classifier to assign protein sequences into a specified scaffold orthogroups. PlantTribes implements three classification approaches; blastp (faster)[5], hmmscan (slower but more sensitive assignment of divergent homologs)[6], and both blastp and hmmscan (disagreements resolved in favor of hmmscan; more exhaustive).
+
+**Other options**
+
+ * **Super orthogroups configuration** - select ‘Yes’ to enable super orthogroups configuration options.  Super orthogroups[7] are constructed through a second iteration of MCL clustering to connect distant, but potentially related orthogroup clusters.
+
+   * **Clustering distance measure** - distance measure used in merging orthogroup clusters into super orthogroup clusters.  PlantTribes pre-computed super orthogroups are based on the minimum and average blastp e-value between all pairs of scaffold orthogroups used as the input matrix for MCL clustering algorithm[8].
+
+ * **Single copy orthogroups configuration** - select ‘Yes’ to enable single/low-copy orthogroups selection configuration options.
+
+   * **Selection criterion** - single/low-copy orthogroups selection criterion. PlantTribes provides custom and global selection criteria for selecting user-defined single/low-copy scaffold orthogoups.
+
+     * **Global selection configuration** - the upper limit values of the following two parameters vary depending on the selected gene family scaffold, and the tool will produce an error if the value exceeds the number of species in the circumscribed scaffold.
+
+       * **Minimum single copy taxa** - minimum number of taxa with single copy genes in the orthogroup.
+       * **Minimum taxa present** - minimum number of taxa present in the orthogroup.
+
+     * **Custom selection configuration** - select ‘Yes’ to enable selection of a single copy configuration file.  Scaffold configuration templates (.singleCopy.config) of how to customize single/low-copy orthogroups selection can be found in the scaffold data installed into Galaxy via the PlantTribes Scaffolds Download Data Manager tool, and also available at the PlantTribes GitHub `repository`_.  Single/low-copy settings shown in these templates are used as defaults if ‘No’ is selected.
+
+       * **Custom selection file** - select a single/low-copy customized configuration file from your history.
+
+ * **Orthogroups fasta configuration** - select ‘Yes’ to create proteins orthogroups fasta files for the classified sequences.
+
+   * **Orthogroups coding sequences** - select ‘Yes’ to create corresponding coding sequences orthogroup fasta files for the classified protein sequences. Requires coding sequences fasta file corresponding to the proteins fasta file to be selected from your history.
+
+     * **Coding sequences fasta file** - select coding sequences fasta file corresponding to the proteins fasta file from your history.
+
+.. _repository: https://github.com/dePamphilis/PlantTribes/tree/master/config
+
+    </help>
+    <citations>
+        <expand macro="citation1" />
+        <citation type="bibtex">
+            @article{Sasidharan2012,
+            journal = {Nucleic Acids Research},
+            author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
+            title = {GFam: a platform for automatic annotation of gene families},
+            year = {2012},
+            pages = {gks631},}
+        </citation>
+        <citation type="bibtex">
+            @article{Li2003,
+            journal = {Genome Research}
+            author = {3. Li L, Stoeckert CJ, Roos DS},
+            title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
+            year = {2003},
+            volume = {13},
+            number = {9},
+            pages = {2178-2189},}
+        </citation>
+        <citation type="bibtex">
+            @article{Emms2015,
+            journal = {Genome Biology}
+            author = {4. Emms DM, Kelly S},
+            title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
+            year = {2015},
+            volume = {16},
+            number = {1},
+            pages = {157},}
+        </citation>
+        <citation type="bibtex">
+            @article{Altschul1990,
+            journal = {Journal of molecular biology}
+            author = {5. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ},
+            title = {Basic local alignment search tool},
+            year = {1990},
+            volume = {215},
+            number = {3},
+            pages = {403-410},}
+        </citation>
+        <citation type="bibtex">
+            @article{Eddy2009,
+            journal = {Genome Inform},
+            author = {6. Eddy SR},
+            title = {A new generation of homology search tools based on probabilistic inference},
+            year = {2009},
+            volume = {23},
+            number = {1},
+            pages = {205-211},}
+        </citation>
+        <citation type="bibtex">
+            @article{Wall2008,
+            journal = {Nucleic Acids Research},
+            author = {7. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS},
+            title = {PlantTribes: a gene and gene family resource for comparative genomics in plants},
+            year = {2008},
+            volume = {36},
+            number = {suppl 1},
+            pages = {D970-D976},}
+        </citation>
+        <citation type="bibtex">
+            @article{Enright2002,
+            journal = {Nucleic acids research},
+            author = {8. Enright AJ, Van Dongen S, Ouzounis CA},
+            title = {n efficient algorithm for large-scale detection of protein families},
+            year = {2002},
+            volume = {30},
+            number = {7},
+            pages = {1575-1584},}
+        </citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,130 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <token name="@WRAPPER_VERSION@">1.0</token>
+    <xml name="requirements_assembly_post_processor">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_assembly_post_processor</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_aligner">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_aligner</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_classifier">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_classifier</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_integrator">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_integrator</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_kaks_analysis">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_kaks_analysis</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_ks_distribution">
+        <requirements>
+            <requirement type="package" version="1.3.0">r-optparse</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_phylogeny_builder">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_phylogeny_builder</requirement>
+        </requirements>
+    </xml>
+    <xml name="param_codon_alignments">
+        <param name="codon_alignments" type="select" label="Codon alignments">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+        </param>
+    </xml>
+    <xml name="param_method">
+        <param name="method" type="select" label="Protein clustering method">
+            <option value="gfam" selected="true">GFam</option>
+            <option value="orthofinder">OrthoFinder</option>
+            <option value="orthomcl">OrthoMCL</option>
+        </param>
+    </xml>
+    <xml name="param_options_type">
+        <param name="options_type" type="select" label="Options Configuration">
+            <option value="basic" selected="true">Basic</option>
+            <option value="advanced">Advanced</option>
+        </param>
+    </xml>
+    <xml name="param_orthogroup_fna">
+        <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+        </param>
+    </xml>
+    <xml name="param_scaffold">
+        <param name="scaffold" type="select" label="Gene family scaffold">
+            <options from_data_table="plant_tribes_scaffolds" />
+            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." />
+        </param>
+    </xml>
+    <xml name="param_sequence_type">
+        <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference (dna)">
+            <option value="protein" selected="true">Amino acid based</option>
+            <option value="dna">Nucleotide based</option>
+        </param>
+    </xml>
+    <xml name="cond_alignment_method">
+        <conditional name="alignment_method_cond">
+            <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method">
+                <option value="mafft" selected="true">MAFFT</option>
+                <option value="pasta">PASTA</option>
+            </param>
+            <when value="mafft" />
+            <when value="pasta">
+                <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="cond_remove_gappy_sequences">
+        <conditional name="remove_gappy_sequences_cond">
+            <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no" />
+            <when value="yes">
+                <conditional name="trim_type_cond">
+                    <param name="trim_type" type="select" label="Trimming method">
+                        <option value="gap_trimming" selected="true">Gap score based trimming</option>
+                        <option value="automated_trimming">Automated heuristic trimming</option>
+                    </param>
+                    <when value="gap_trimming">
+                        <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" />
+                    </when>
+                    <when value="automated_trimming" />
+                </conditional>
+                <conditional name="remove_sequences_with_gaps_cond">
+                    <param name="remove_sequences_with_gaps" type="select" label="Remove sequences">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" />
+                        <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" />
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="citation1">
+        <citation type="bibtex">
+            @misc{None,
+            journal = {None},
+            author = {1. Wafula EK},
+            title = {Manuscript in preparation},
+            year = {None},
+            url = {https://github.com/dePamphilis/PlantTribes},}
+        </citation>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,3 @@
+## Plant Tribes scaffolds
+#Value	Name	Path	Description
+22Gv1.1	22Gv1.1	${__HERE__}/test-data/tool-data/plant_tribes/scaffolds/22Gv1.1	22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc.sample	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,4 @@
+## Plant Tribes scaffolds
+#Value	Name	Path	Description
+#22Gv1.0	22Gv1.0	/plant_tribes/scaffolds/22Gv1.0	22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0)
+#22Gv1.1	22Gv1.1	/plant_tribes/scaffolds/22Gv1.1	22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.ptorthocs	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,28 @@
+<html><head><h3>Orthogroups and corresponding coding sequences files: 12 items</h3></head>
+<body><p/><table cellpadding="2">
+<tr><b>Datasets</th></b>
+<tr bgcolor="#D8D8D8"><td><a href="20.faa" type="text/plain">20.faa</a>
+</td></tr>
+<tr bgcolor="#FFFFFF"><td><a href="20.fna" type="text/plain">20.fna</a>
+</td></tr>
+<tr bgcolor="#D8D8D8"><td><a href="3494.faa" type="text/plain">3494.faa</a>
+</td></tr>
+<tr bgcolor="#FFFFFF"><td><a href="3494.fna" type="text/plain">3494.fna</a>
+</td></tr>
+<tr bgcolor="#D8D8D8"><td><a href="3722.faa" type="text/plain">3722.faa</a>
+</td></tr>
+<tr bgcolor="#FFFFFF"><td><a href="3722.fna" type="text/plain">3722.fna</a>
+</td></tr>
+<tr bgcolor="#D8D8D8"><td><a href="38889.faa" type="text/plain">38889.faa</a>
+</td></tr>
+<tr bgcolor="#FFFFFF"><td><a href="38889.fna" type="text/plain">38889.fna</a>
+</td></tr>
+<tr bgcolor="#D8D8D8"><td><a href="39614.faa" type="text/plain">39614.faa</a>
+</td></tr>
+<tr bgcolor="#FFFFFF"><td><a href="39614.fna" type="text/plain">39614.fna</a>
+</td></tr>
+<tr bgcolor="#D8D8D8"><td><a href="5235.faa" type="text/plain">5235.faa</a>
+</td></tr>
+<tr bgcolor="#FFFFFF"><td><a href="5235.fna" type="text/plain">5235.fna</a>
+</td></tr>
+</table></body></html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteins.blastp.22Gv1.1	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,50 @@
+contig_1	gnl|Soltu3.4|PGSC0003DMP400044471	92.17	115	9	0	2	116	15	129	1e-73	231
+contig_1	gnl|Mimgu1.0|PACid:17670850	93.04	115	8	0	2	116	307	421	3e-72	234
+contig_1	gnl|Soltu3.4|PGSC0003DMP400044470	92.17	115	9	0	2	116	313	427	1e-71	233
+contig_1	gnl|Soltu3.4|PGSC0003DMP400044472	92.17	115	9	0	2	116	313	427	1e-71	233
+contig_1	gnl|Solly2.3|Solyc04g083010.2.1	92.17	115	9	0	2	116	313	427	1e-71	232
+contig_1	gnl|Nelnu1.0|NNU_016098-RA	90.43	115	11	0	2	116	315	429	5e-71	230
+contig_1	gnl|Poptr2.2|PACid:18246737	91.30	115	10	0	2	116	301	415	6e-71	230
+contig_1	gnl|Glyma1.01|PACid:16244092	90.43	115	11	0	2	116	298	412	2e-70	229
+contig_1	gnl|Glyma1.01|PACid:16244091	90.43	115	11	0	2	116	298	412	2e-70	229
+contig_1	gnl|Carpa1.181|PACid:16417175	90.43	115	11	0	2	116	289	403	2e-70	229
+contig_1	gnl|Vitvi12X|PACid:17826505	90.43	115	11	0	2	116	229	343	2e-70	228
+contig_1	gnl|Theca1.0|Tc10_g016990	90.43	115	11	0	2	116	288	402	7e-70	227
+contig_1	gnl|Phoda3.0|PDK_30s1127391g001	88.70	115	13	0	2	116	146	260	1e-69	224
+contig_1	gnl|Glyma1.01|PACid:16251026	87.83	115	14	0	2	116	127	241	1e-69	223
+contig_1	gnl|Orysa6.0|PACid:16860403	86.96	115	15	0	2	116	325	439	6e-69	221
+contig_1	gnl|Thepa2.0|Tp5g34670	89.57	115	12	0	2	116	310	424	1e-68	224
+contig_1	gnl|Aquco1.0|PACid:18145344	88.70	115	13	0	2	116	320	434	1e-68	224
+contig_1	gnl|Bradi1.2|Bradi2g30567.2	88.70	115	13	0	2	116	300	414	1e-68	224
+contig_1	gnl|Arath10|AT1G79600.1	89.57	115	12	0	2	116	308	422	2e-68	224
+contig_1	gnl|Glyma1.01|PACid:16251025	87.83	115	14	0	2	116	361	475	6e-68	223
+contig_1	gnl|Medtr3.5|Medtr4g026450.1	86.09	115	16	0	2	116	313	427	6e-68	223
+contig_1	gnl|Glyma1.01|PACid:16245030	86.96	115	15	0	2	116	326	440	1e-67	222
+contig_1	gnl|Glyma1.01|PACid:16245029	86.96	115	15	0	2	116	326	440	1e-67	222
+contig_1	gnl|Orysa6.0|PACid:16860404	86.96	115	15	0	2	116	325	439	2e-67	221
+contig_1	gnl|Sorbi1.4|PACid:1980340	86.09	115	16	0	2	116	312	426	2e-67	221
+contig_1	gnl|Ambtr1.0.27|AmTr_v1.0_scaffold00022.11	85.22	115	17	0	2	116	310	424	7e-67	219
+contig_1	gnl|Frave2.0|gene29299	89.57	115	12	0	2	116	711	825	2e-66	221
+contig_1	gnl|Musac1.0|GSMUA_Achr10T01800_001	82.61	115	20	0	2	116	229	343	4e-66	216
+contig_1	gnl|Musac1.0|GSMUA_Achr8T21380_001	81.74	115	21	0	2	116	229	343	6e-65	210
+contig_1	gnl|Phypa1.6|PACid:18072969	81.74	115	21	0	2	116	498	612	1e-62	209
+contig_1	gnl|Selmo1.0|PACid:15405864	78.07	114	25	0	3	116	228	341	4e-59	197
+contig_1	gnl|Poptr2.2|PACid:18214805	54.70	117	49	2	4	116	279	395	3e-36	135
+contig_1	gnl|Selmo1.0|PACid:15417058	56.60	106	46	0	11	116	216	321	3e-36	134
+contig_1	gnl|Glyma1.01|PACid:16255045	51.85	108	52	0	9	116	307	414	9e-36	134
+contig_1	gnl|Vitvi12X|PACid:17841082	51.79	112	54	0	5	116	337	448	1e-35	134
+contig_1	gnl|Aquco1.0|PACid:18159073	53.57	112	51	1	6	116	165	276	2e-35	131
+contig_1	gnl|Nelnu1.0|NNU_020249-RA	53.77	106	49	0	11	116	45	150	3e-35	130
+contig_1	gnl|Solly2.3|Solyc08g068920.2.1	53.77	106	49	0	11	116	323	428	5e-35	132
+contig_1	gnl|Medtr3.5|Medtr3g105760.1	51.85	108	52	0	9	116	309	416	5e-35	132
+contig_1	gnl|Phypa1.6|PACid:18069401	50.43	117	56	1	2	116	331	447	1e-34	131
+contig_1	gnl|Aquco1.0|PACid:18141086	53.91	115	45	1	2	116	17	123	3e-34	127
+contig_1	gnl|Arath10|AT5G24970.2	50.93	108	53	0	9	116	363	470	3e-34	130
+contig_1	gnl|Thepa2.0|Tp2g22500	51.85	108	52	0	9	116	326	433	3e-34	129
+contig_1	gnl|Mimgu1.0|PACid:17681633	52.83	106	50	0	11	116	191	296	3e-34	128
+contig_1	gnl|Ambtr1.0.27|AmTr_v1.0_scaffold00019.389	54.29	105	48	0	11	115	330	434	4e-34	129
+contig_1	gnl|Musac1.0|GSMUA_Achr1T23540_001	51.38	109	53	0	8	116	313	421	5e-34	129
+contig_1	gnl|Phypa1.6|PACid:18063964	53.04	115	46	1	2	116	312	418	6e-34	129
+contig_1	gnl|Phypa1.6|PACid:18051230	53.04	115	46	1	2	116	312	418	1e-33	128
+contig_1	gnl|Bradi1.2|Bradi1g28540.1	47.75	111	58	0	6	116	299	409	2e-33	127
+contig_1	gnl|Theca1.0|Tc09_g001090	53.77	106	49	0	11	116	332	437	2e-33	127
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteins.blastp.22Gv1.1.bestOrthos	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,7 @@
+Gene ID	Orthogroup ID
+contig_10	3494
+contig_7	3722
+contig_3	554
+contig_2	38889
+contig_1	5235
+contig_9	20
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteins.both.22Gv1.1.bestOrthos	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,7 @@
+Gene ID	Orthogroup ID
+contig_1	5235
+contig_10	3494
+contig_2	38889
+contig_3	39614
+contig_7	3722
+contig_9	20
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteins.both.22Gv1.1.bestOrthos.summary	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,4 @@
+Gene ID	Orthogroup ID	Arabidopsis thaliana	Thellungiella parvula	Carica papaya	Theobroma cacao	Populus trichocarpa	Fragaria vesca	Glycine max	Medicago truncatula	Vitis vinifera	Solanum lycopersicum	Solanum tuberosum	Mimulus guttatus	Nelumbo nucifera	Aquilegia coerulea	Oryza sativa	Brachypodium distachyon	Sorghum bicolor	Musa acuminata	Phoenix dactylifera	Amborella trichopoda	Selaginella moellendorffii	Physcomitrella patens	SuperOthogroup I1.2	SuperOthogroup I1.5	SuperOthogroup I1.8	SuperOthogroup I2.0	SuperOthogroup I2.5	SuperOthogroup I3.0	SuperOthogroup I3.5	SuperOthogroup I4.0	SuperOthogroup I4.5	SuperOthogroup I5.0	AHRD Descriptions	TAIR Gene(s) Descriptions	Pfam Domains	InterProScan Descriptions	GO Molecular Functions	GO Biological Processes	GO Cellular Components
+contig_1	5235	1	1	1	1	1	1	6	1	1	1	3	1	1	1	2	1	1	2	1	1	1	1	2	207	198	330	347	338	461	452	558	557	Protein kinase superfamily protein [1.000]	Protein kinase superfamily protein	ABC1 (PF03109) [0.968] | APH (PF01636) [0.161]	UbiB domain (IPR004147) [0.968] | Aminoglycoside phosphotransferase (IPR002575) [0.161]	NULL / Representative annotation below 0.1%	NULL / Representative annotation below 0.1%	NULL / Representative annotation below 0.1%
+contig_10	3494	1	1	1	1	3	1	4	1	0	1	1	1	3	1	3	5	3	3	4	1	0	0	96	177	236	239	529	538	532	524	780	779	RING/U-box superfamily protein [1.000]	RING/U-box superfamily protein	zf-C3HC4_2 (PF13923) [0.949] | zf-RING_5 (PF14634) [0.026]	Zinc finger, RING-type (IPR001841) [0.026]	protein binding (GO:0005515) [0.026] | zinc ion binding (GO:0008270) [0.026]	NULL / Representative annotation below 0.1%	NULL / Representative annotation below 0.1%
+contig_2	38889	0	0	0	0	0	0	0	0	0	1	0	1	0	0	0	0	0	0	0	0	0	0	489	741	746	752	773	800	798	1128	1403	1407	6-phosphogluconolactonase 5 [0.500] | 6-phosphogluconolactonase 2 [0.500]	Unkown protein(s) / No TAIR description(s)	Glucosamine_iso (PF01182) [1.000]	Glucosamine/galactosamine-6-phosphate isomerase (IPR006148) [1.000]	NULL / Representative annotation below 0.1%	carbohydrate metabolic process (GO:0005975) [1.000]	NULL / Representative annotation below 0.1%
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteins.hmmscan.22Gv1.1	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,25 @@
+#                                                               --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----
+# target name        accession  query name           accession    E-value  score  bias   E-value  score  bias   exp reg clu  ov env dom rep inc description of target
+#------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ -----   --- --- --- --- --- --- --- --- ---------------------
+5235                 -          contig_1             -            9.1e-80  270.0   0.0   9.9e-80  269.9   0.0   1.0   1   0   0   1   1   1   1 -
+10465                -          contig_1             -              5e-45  155.1   0.0   5.8e-45  154.9   0.0   1.0   1   0   0   1   1   1   1 -
+9351                 -          contig_1             -            4.7e-41  142.1   0.0   5.2e-41  142.0   0.0   1.0   1   0   0   1   1   1   1 -
+20322                -          contig_1             -              2e-35  123.0   0.0   2.3e-35  122.8   0.0   1.0   1   0   0   1   1   1   1 -
+7049                 -          contig_1             -            3.9e-29  102.4   0.0   4.4e-29  102.3   0.0   1.0   1   0   0   1   1   1   1 -
+5553                 -          contig_1             -            8.8e-29  101.7   0.0     1e-28  101.4   0.0   1.0   1   0   0   1   1   1   1 -
+2922                 -          contig_1             -            7.3e-28   97.8   0.0   8.5e-28   97.6   0.0   1.0   1   0   0   1   1   1   1 -
+8140                 -          contig_1             -              5e-26   92.3   0.2   1.6e-24   87.3   0.2   2.0   1   1   0   1   1   1   1 -
+4060                 -          contig_1             -            1.6e-22   80.8   0.0   1.9e-22   80.6   0.0   1.0   1   0   0   1   1   1   1 -
+8330                 -          contig_1             -            3.7e-17   63.4   0.0   4.2e-17   63.2   0.0   1.0   1   0   0   1   1   1   1 -
+3825                 -          contig_1             -            4.3e-13   50.4   0.0     7e-13   49.7   0.0   1.3   1   1   0   1   1   1   1 -
+4099                 -          contig_1             -            9.7e-13   48.1   0.0   1.1e-12   47.8   0.0   1.0   1   0   0   1   1   1   1 -
+10051                -          contig_1             -            1.1e-10   42.0   0.0   1.4e-10   41.7   0.0   1.0   1   0   0   1   1   1   1 -
+8737                 -          contig_1             -            1.3e-10   41.9   0.0   1.6e-10   41.7   0.0   1.0   1   0   0   1   1   1   1 -
+25529                -          contig_1             -            2.1e-08   34.3   0.0   2.6e-08   34.0   0.0   1.0   1   0   0   1   1   1   1 -
+40021                -          contig_1             -            6.1e-08   32.9   0.0   6.7e-08   32.8   0.0   1.0   1   0   0   1   1   1   1 -
+3494                 -          contig_10            -              1e-06   28.1   0.1   1.1e-06   28.0   0.1   1.0   1   0   0   1   1   1   1 -
+38889                -          contig_2             -            1.1e-87  295.2   0.2   1.4e-87  294.9   0.2   1.0   1   0   0   1   1   1   1 -
+3534                 -          contig_2             -            1.6e-72  245.8   0.1   1.8e-72  245.7   0.1   1.0   1   0   0   1   1   1   1 -
+4875                 -          contig_2             -            1.1e-62  213.8   0.0   1.3e-62  213.6   0.0   1.0   1   0   0   1   1   1   1 -
+37475                -          contig_2             -            6.1e-49  168.3   0.0   7.1e-49  168.1   0.0   1.0   1   0   0   1   1   1   1 -
+28488                -          contig_2             -            1.1e-45  157.4   0.0   1.4e-45  157.0   0.0   1.0   1   0   0   1   1   1   1 -
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteins.hmmscan.22Gv1.1.bestOrthos	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,7 @@
+Gene ID	Orthogroup ID
+contig_9	20
+contig_1	5235
+contig_2	38889
+contig_10	3494
+contig_7	3722
+contig_3	39614
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tool-data/plant_tribes/scaffolds/README.txt	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,3 @@
+For functional tests to work, this directory must contain symlinks to the scaffolds data
+installed into the Galaxy instance to which planemo points via the --galaxy_root parameter.
+This would typically be something like ~/galaxy/tool-data/plant_tribes/scaffolds/22Gv1.1.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.nr.cds	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,43 @@
+>contig_1
+NTTAAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTACACGAGTGGAAAGGTGCTAAC
+AATGGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGATGCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGA
+ATACCGGCATACAGTGCAGTCTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTA
+GCTACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAACAAGCAAGATCGGCCATAAT
+TGGTCATGTTGTACACATGGTTAATCGN
+>contig_10
+ATGGCAGAAGAGAACACCACTACAATGAACCTCGATCTCAATTTGGGCCCCATCAATAACTCAAGCGACGATAGCGAACC
+TTCATCACGCCCTTATACTGATGTCGCAATGAACTTGGAAGATTGGTTAGATAGTCCCGTCCGAGTTCGTGAAGTCGTCC
+GCCACAGAAATCATAGGTGGCGCTCTTTGTGGCGCCAAATCCCAATTCCGCCTGATACGCGAAACCTCGCGCTCGAATTA
+ATCGGCGGCAATGCCCCN
+>contig_2
+NNCCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGA
+CTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACC
+TCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAA
+TGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTC
+GGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGC
+CGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCA
+CAACTGTTAAATTATGTGCGCTTTGATGAT
+>contig_3
+NNTGTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAG
+AGCTCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATA
+CTTTCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGC
+ACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGT
+TACAGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAAC
+>contig_5
+NNCGGTGGTCCGCCACAAACACACGTCAAGCGGGATCCCGCATCCCGCGGGCTCTTCCACGCGGTCGTCCCGGCGCTCGG
+CCCTCTCCGTGACGCACGTCGAGAGGGACGATTTGGCCGATGCCGCGTGATGCCAGGCCCCGACATCATCAAGGAGCACT
+CCGTTGTGCCGACCCTCGCCTTCGATGACGTCCTCGGGCGTCTAGCGAAGTATCGAAGAAGGGCGAGCGGAGCCATGAAT
+CCTGGAGATGCGAGCCAGGTCACGAGAGGCGCGGCAGGCGAGTCTTTGCTCGCTCTCGCACCGTCTGCTCTCGTGCTTGG
+AGAAGAAGGACCGACTGCTGACGTCGAACCGGATGATGGAAGGTTCGAACAAGTCAGATCTGGAAAGGGGCATGGAAAAG
+GCGACACTCTCACN
+>contig_7
+GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAATAAATCCAATGAAAACGGAAC
+CGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATGTTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCAT
+CATCCAATGAAACATGTAGCAACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGGA
+AACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACGNN
+>contig_9
+NNACTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCA
+TGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCT
+CAAAACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGA
+ACAGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGC
+TGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTTGAN
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/transcripts.cleaned.nr.pep	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,22 @@
+>contig_1
+XKKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLL
+ATPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVNR
+>contig_10
+MAEENTTTMNLDLNLGPINNSSDDSEPSSRPYTDVAMNLEDWLDSPVRVREVVRHRNHRWRSLWRQIPIPPDTRNLALEL
+IGGNAP
+>contig_2
+XLSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEK
+WVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAAS
+QLLNYVRFDD
+>contig_3
+XVDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKG
+TIESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN
+>contig_5
+XGGPPQTHVKRDPASRGLFHAVVPALGPLRDARREGRFGRCRVMPGPDIIKEHSVVPTLAFDDVLGRLAKYRRRASGAMN
+PGDASQVTRGAAGESLLALAPSALVLGEEGPTADVEPDDGRFEQVRSGKGHGKGDTLT
+>contig_7
+ENEWSGAEFLNEMAAMMTQNKSNENGTGTFEELQQLFDEMFQSDIESFNGCSSSSNETCSNSNKRNSIESSSANFRPENG
+NESGEISGKKNTRKGKGDX
+>contig_9
+XLRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGR
+TAHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAVX
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,6 @@
+<tables>
+    <table name="plant_tribes_scaffolds" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="tool-data/plant_tribes_scaffolds.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,6 @@
+<tables>
+    <table name="plant_tribes_scaffolds" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="${__HERE__}/plant_tribes_scaffolds.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py	Thu Jun 08 12:48:23 2017 -0400
@@ -0,0 +1,80 @@
+import os
+import shutil
+import subprocess
+import sys
+
+FSTDERR = 'stderr.txt'
+FSTDOUT = 'stdout.txt'
+
+
+def check_execution_errors(rc, fstderr, fstdout):
+    if rc != 0:
+        fh = open(fstdout, 'rb')
+        out_msg = fh.read()
+        fh.close()
+        fh = open(fstderr, 'rb')
+        err_msg = fh.read()
+        fh.close()
+        msg = '%s\n%s\n' % (str(out_msg), str(err_msg))
+        stop_err(msg)
+
+
+def get_response_buffers():
+    fstderr = os.path.join(os.getcwd(), FSTDERR)
+    fherr = open(fstderr, 'wb')
+    fstdout = os.path.join(os.getcwd(), FSTDOUT)
+    fhout = open(fstdout, 'wb')
+    return fstderr, fherr, fstdout, fhout
+
+
+def move_directory_files(source_dir, destination_dir, copy=False):
+    source_directory = os.path.abspath(source_dir)
+    destination_directory = os.path.abspath(destination_dir)
+    if not os.path.isdir(destination_directory):
+        os.makedirs(destination_directory)
+    for dir_entry in os.listdir(source_directory):
+        source_entry = os.path.join(source_directory, dir_entry)
+        if copy:
+            shutil.copy(source_entry, destination_directory)
+        else:
+            shutil.move(source_entry, destination_directory)
+
+
+def run_command(cmd):
+    fstderr, fherr, fstdout, fhout = get_response_buffers()
+    proc = subprocess.Popen(args=cmd, stderr=fherr, stdout=fhout, shell=True)
+    rc = proc.wait()
+    # Check results.
+    fherr.close()
+    fhout.close()
+    check_execution_errors(rc, fstderr, fstdout)
+
+
+def stop_err(msg):
+    sys.exit(msg)
+
+
+def write_html_output(output, title, dir):
+    with open(output, 'w') as fh:
+        dir_items = sorted(os.listdir(dir))
+        # Directories can only contain either files or directories,
+        # but not both.
+        if len(dir_items) > 0:
+            item_path = os.path.join(dir, dir_items[0])
+            if os.path.isdir(item_path):
+                header = 'Directories'
+            else:
+                header = 'Datasets'
+        else:
+            header = ''
+        fh.write('<html><head><h3>%s: %d items</h3></head>\n' % (title, len(dir_items)))
+        fh.write('<body><p/><table cellpadding="2">\n')
+        fh.write('<tr><b>%s</th></b>\n' % header)
+        for index, fname in enumerate(dir_items):
+            if index % 2 == 0:
+                bgcolor = '#D8D8D8'
+            else:
+                bgcolor = '#FFFFFF'
+            link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname)
+            fh.write('<tr bgcolor="%s"><td>%s</td></tr>\n' % (bgcolor, link))
+        fh.write('</table></body></html>\n')