changeset 0:7000422fbaff draft

Uploaded
author greg
date Thu, 08 Jun 2017 12:51:58 -0400
parents
children f5ae6c86f75f
files .shed.yml gene_family_phylogeny_builder.py gene_family_phylogeny_builder.xml macros.xml plant_tribes_scaffolds.loc.sample tool_data_table_conf.xml.sample utils.py
diffstat 7 files changed, 538 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Thu Jun 08 12:51:58 2017 -0400
@@ -0,0 +1,14 @@
+name: plant_tribes_gene_family_phylogeny_builder
+owner: greg
+description: |
+  Contains a tool that creates multiple sequence alignments and inferred maximum likelihood phylogenies for orthogroups.
+homepage_url: https://github.com/dePamphilis/PlantTribes
+long_description: |
+  This tool is one of the PlantTribes collection of automated modular analysis pipelines that utilize objective classifications of
+  complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It performs phylogenomic
+  analyses by creating multiple sequence alignments and inferred maximum likelihood phylogenies for orthogroups produced by the
+  GeneFamilyAligner tool.
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/gene_family_phylogeny_builder
+type: unrestricted
+categories:
+- Phylogenetics
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_phylogeny_builder.py	Thu Jun 08 12:51:58 2017 -0400
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+import argparse
+import os
+
+import utils
+
+OUTPUT_DIR = 'geneFamilyPhylogenies_dir'
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('--alignment_type', dest='alignment_type', help='Input alignments type produced by the GeneFamilyAligner')
+parser.add_argument('--bootstrap_replicates', dest='bootstrap_replicates', type=int, default=None, help='Number of replicates for rapid bootstrap analysis')
+parser.add_argument('--config_dir', dest='config_dir', help='Directory containing default configuration files')
+parser.add_argument('--max_orthogroup_size', dest='max_orthogroup_size', type=int, help='Maximum number of sequences in orthogroup alignments')
+parser.add_argument('--method', dest='method', help='Protein clustering method')
+parser.add_argument('--min_orthogroup_size', dest='min_orthogroup_size', type=int, help='Minimum number of sequences in orthogroup alignments')
+parser.add_argument('--num_threads', dest='num_threads', type=int, help='Number of threads to use for execution')
+parser.add_argument('--orthogroup_aln', dest='orthogroup_aln', help='Input dataset files_path')
+parser.add_argument('--output_phylip', dest='output_phylip', default=None, help='Output for orthogroup phylip multiple sequence alignments')
+parser.add_argument('--output_phylip_dir', dest='output_phylip_dir', default=None, help='output_phylip.files_path')
+parser.add_argument('--output_tree', dest='output_tree', default=None, help='Output for phylogenetic trees')
+parser.add_argument('--output_tree_dir', dest='output_tree_dir', default=None, help='output_tree.files_path')
+parser.add_argument('--rooting_order', dest='rooting_order', default=None, help='Rooting order configuration for rooting trees')
+parser.add_argument('--scaffold', dest='scaffold', help='Orthogroups or gene families proteins scaffold')
+parser.add_argument('--sequence_type', dest='sequence_type', help='Sequence type used in the phylogenetic inference')
+parser.add_argument('--tree_inference', dest='tree_inference', help='Phylogenetic trees inference method')
+
+args = parser.parse_args()
+
+# Build the command line.
+cmd = 'GeneFamilyPhylogenyBuilder'
+cmd += ' --alignment_type %s' % args.alignment_type
+if args.bootstrap_replicates is not None:
+    cmd += ' --bootstrap_replicates %d' % args.bootstrap_replicates
+cmd += ' --config_dir %s' % args.config_dir
+cmd += ' --max_orthogroup_size %d' % args.max_orthogroup_size
+cmd += ' --method %s' % args.method
+cmd += ' --min_orthogroup_size %d' % args.min_orthogroup_size
+cmd += ' --num_threads %d' % args.num_threads
+cmd += ' --orthogroup_aln %s' % args.orthogroup_aln
+if args.rooting_order is not None:
+    cmd += ' --rooting_order %s' % args.rooting_order
+cmd += ' --scaffold %s' % args.scaffold
+cmd += ' --sequence_type %s' % args.sequence_type
+cmd += ' --tree_inference %s' % args.tree_inference
+
+# Run the command.
+utils.run_command(cmd)
+
+# Handle outputs.
+if args.output_phylip is not None and args.output_phylip_dir is not None:
+    src_output_dir = os.path.join(OUTPUT_DIR, 'phylip_aln')
+    utils.move_directory_files(src_output_dir, args.output_phylip_dir)
+    utils.write_html_output(args.output_phylip, 'Orthogroup phylip multiple sequence alignments', args.output_phylip_dir)
+if args.output_tree is not None and args.output_tree_dir is not None:
+    src_output_dir = os.path.join(OUTPUT_DIR, 'orthogroups_tree')
+    utils.move_directory_files(src_output_dir, args.output_tree_dir, copy=True)
+    utils.write_html_output(args.output_tree, 'Phylogenetic trees', args.output_tree_dir)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_phylogeny_builder.xml	Thu Jun 08 12:51:58 2017 -0400
@@ -0,0 +1,246 @@
+<tool id="plant_tribes_gene_family_phylogeny_builder" name="GeneFamilyPhylogenyBuilder" version="@WRAPPER_VERSION@.0">
+    <description>builds phylogenetic trees of aligned orthologous gene family clusters</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements_gene_family_phylogeny_builder" />
+    <command detect_errors="exit_code"><![CDATA[
+#set input_format = $input_format_cond.input_format
+#set tree_inference = $tree_inference_cond.tree_inference
+#if str($tree_inference) == 'raxml':
+    #set rooting_order_file_cond = $tree_inference_cond.rooting_order_file_cond
+    #set rooting_order_file = $rooting_order_file_cond.rooting_order_file
+    #if str($rooting_order_file) == 'yes':
+        #set rooting_order = $rooting_order_file_cond.rooting_order
+    #end if
+    #set bootstrap_replicates = $tree_inference_cond.bootstrap_replicates
+#end if
+
+python $__tool_directory__/gene_family_phylogeny_builder.py
+#if str($input_format) == 'ptalign':
+    --orthogroup_aln '$input_format_cond.input_ptalign.extra_files_path'
+    --alignment_type 'aln'
+    --sequence_type 'protein'
+#else if str($input_format) == 'ptalignca':
+    --orthogroup_aln '$input_format_cond.input_ptalignca.extra_files_path'
+    --alignment_type 'aln'
+    --sequence_type $input_format_cond.sequence_type
+#else if str($input_format) == 'ptalignfiltered':
+    --orthogroup_aln '$input_format_cond.input_ptalignfiltered.extra_files_path'
+    --alignment_type 'filter'
+    --sequence_type 'protein'
+#else if str($input_format) == 'ptalignfilteredca':
+    --orthogroup_aln '$input_format_cond.input_ptalignfilteredca.extra_files_path'
+    --alignment_type 'filter'
+    --sequence_type $input_format_cond.sequence_type
+#else if str($input_format) == 'ptaligntrimmed':
+    --orthogroup_aln '$input_format_cond.input_ptaligntrimmed.extra_files_path'
+    --alignment_type 'trim'
+    --sequence_type 'protein'
+#else if str($input_format) == 'ptaligntrimmedca':
+    --orthogroup_aln '$input_format_cond.input_ptaligntrimmedca.extra_files_path'
+    --alignment_type 'trim'
+    --sequence_type $input_format_cond.sequence_type
+#end if
+--scaffold '$scaffold.fields.path'
+--config_dir '$scaffold.fields.path'
+--method $method
+--tree_inference $tree_inference
+#if str($tree_inference) == 'raxml':
+    #if str($rooting_order_file) == 'yes':
+        --rooting_order '$rooting_order'
+        ## No else block needed here because the default rooting_order
+        ## configuration will be used if the --rooting_order flag is missing.
+    #end if
+    --bootstrap_replicates $bootstrap_replicates
+    --output_phylip '$output_phylip'
+    --output_phylip_dir '$output_phylip.files_path'
+#end if
+--min_orthogroup_size $min_orthogroup_size
+--max_orthogroup_size $max_orthogroup_size
+--num_threads \${GALAXY_SLOTS:-4}
+#set output_pttree_dir = $output_pttree_dir_cond.output_pttree_dir
+#if str($output_pttree_dir) == 'yes':
+    --output_tree '$output_tree'
+    --output_tree_dir '$output_tree.files_path'
+#end if
+    ]]></command>
+    <inputs>
+        <conditional name="input_format_cond">
+            <param name="input_format" type="select" label="Orthogroup alignments">
+                <option value="ptalign">Proteins orthogroup alignments</option>
+                <option value="ptalignca">Protein and coding sequences orthogroup alignments</option>
+                <option value="ptalignfiltered">Filtered proteins orthogroup alignments</option>
+                <option value="ptalignfilteredca">Filtered protein and coding sequences orthogroup alignments</option>
+                <option value="ptaligntrimmed">Trimmed proteins orthogroup alignments</option>
+                <option value="ptaligntrimmedca">Trimmed protein and coding sequences orthogroup alignments</option>
+            </param>
+            <when value="ptalign">
+                <param name="input_ptalign" format="ptalign" type="data" label="Proteins orthogroup alignments">
+                    <validator type="empty_extra_files_path" />
+                </param>
+            </when>
+            <when value="ptalignca">
+                <param name="input_ptalignca" format="ptalignca" type="data" label="Protein and coding sequences orthogroup alignments">
+                    <validator type="empty_extra_files_path" />
+                </param>
+                <expand macro="param_sequence_type" />
+            </when>
+            <when value="ptalignfiltered">
+                <param name="input_ptalignfiltered" format="ptalignfiltered" type="data" label="Filtered proteins orthogroup alignments">
+                    <validator type="empty_extra_files_path" />
+                </param>
+            </when>
+            <when value="ptalignfilteredca">
+                <param name="input_ptalignfilteredca" format="ptalignfilteredca" type="data" label="Filtered protein and coding sequences orthogroup alignments">
+                    <validator type="empty_extra_files_path" />
+                </param>
+                <expand macro="param_sequence_type" />
+            </when>
+            <when value="ptaligntrimmed">
+                <param name="input_ptaligntrimmed" format="ptaligntrimmed" type="data" label="Trimmed proteins orthogroup alignments">
+                    <validator type="empty_extra_files_path" />
+                </param>
+            </when>
+            <when value="ptaligntrimmedca">
+                <param name="input_ptaligntrimmedca" format="ptaligntrimmedca" type="data" label="Trimmed protein and coding sequences orthogroup alignments">
+                    <validator type="empty_extra_files_path" />
+                </param>
+                <expand macro="param_sequence_type" />
+            </when>
+        </conditional>
+        <expand macro="param_scaffold" />
+        <expand macro="param_method" />
+        <conditional name="tree_inference_cond">
+            <param name="tree_inference" type="select" label="Phylogenetic inference method">
+                <option value="raxml" selected="true">RAxML</option>
+                <option value="fasttree">FastTree</option>
+            </param>
+            <when value="raxml">
+                <conditional name="rooting_order_file_cond">
+                    <param name="rooting_order_file" type="select" label="Rooting order configuration">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="rooting_order" format="txt" type="data" label="Rooting order configuration" />
+                    </when>
+                </conditional>
+                <param name="bootstrap_replicates" type="integer" value="100" min="0" label="Bootstrap replicates" />
+            </when>
+            <when value="fasttree" />
+        </conditional>
+        <param name="max_orthogroup_size" type="integer" value="100" min="4" label="Maximum orthogroup size" />
+        <param name="min_orthogroup_size" type="integer" value="4" min="4" label="Minimum orthogroup size" />
+        <conditional name="output_pttree_dir_cond">
+            <param name="output_pttree_dir" type="select" label="Output additional directory of phylogenetic tree files?">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no" />
+            <when value="yes" />
+        </conditional>
+    </inputs>
+    <outputs>
+        <collection name="tree" type="list" label="${tool.name} (phylogenetic trees, dataset collection) on ${on_string}">
+            <discover_datasets pattern="__name__" directory="geneFamilyPhylogenies_dir/orthogroups_tree" format="nhx" />
+        </collection>
+        <data name="output_tree" format="pttree" label="${tool.name} (phylogenetic trees, directory) on ${on_string}">
+            <filter>output_pttree_dir_cond['output_pttree_dir'] == 'yes'</filter>
+        </data>
+        <data name="output_phylip" format="ptphylip" label="${tool.name} (orthogroup phylip multiple sequence alignments) on ${on_string}">
+            <filter>tree_inference_cond['tree_inference'] == 'raxml'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Test framework does not currently support inputs whose associated extra_files_path contains files to be analyzed.
+        <test>
+        </test>
+        -->
+    </tests>
+    <help>
+This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
+analyses of genome-scale gene families and transcriptomes.  This tool performs gene family phylogenetic inference of
+multiple sequence alignments produced by the GeneFamilyAligner tool.
+
+-----
+
+**Required options**
+
+ * **Orthogroup alignments** - orthogroup alignment fasta files produced by the GeneFamilyAligner tool selected from your history.  Depending on how the GeneFamilyAligner tool was executed, these could either be pre-processed alignments, trimmed alignments or both trimmed and filtered alignments.
+ * **Gene family scaffold** - one of the PlantTribes gene family scaffolds installed into Galaxy by the PlantTribes Scaffold Download Data Manager tool.
+ * **Protein clustering method** - gene family scaffold protein clustering method as described in the AssemblyPostProcessor tool.
+ * **Phylogenetic inference method** - method for estimating orthogroup maximum likelihood (ML) phylogenetic trees.  PlantTribes estimates ML phylogenetic trees using either RAxML or FastTree algorithms.
+
+**Other options**
+
+ * **Rooting order configuration** - select 'Yes' to enable selection of a rooting order configuration file for RAxML.  Scaffold configuration templates (.rootingOrder.config) of how to customize the RAxML ML tree rooting order can be found in the scaffold data installed into Galaxy via the PlantTribes Scaffolds Download Data Manager tool, and is also available at the PlantTribes GitHub `repository`_.  Phylogenetic tree rooting order settings shown in these templates are used as defaults if 'No' is selected.
+ * **Bootstrap replicates** - number of bootstrap replicates for RAxML to conduct a rapid bootstrap analysis and search for the best-scoring ML tree (default = 100).
+ * **Maximum orthogroup size** - maximum number of sequences allowed in orthogroup alignments (default = 100).
+ * **Minimum orthogroup size** - minimum number of sequences allowed in orthogroup alignments (default = 4).
+ * **Output additional directory of phylogenetic tree files** - selecting 'Yes' will produce an additional output that has an associated directory of files that are copies of the  elements of the phylogenetic trees output dataset collection (this output will be used for future downstream tools).
+
+.. _repository: https://github.com/dePamphilis/PlantTribes/tree/master/config
+
+    </help>
+    <citations>
+        <expand macro="citation1" />
+        <citation type="bibtex">
+            @article{Wall2008,
+            journal = {Nucleic Acids Research},
+            author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS},
+            title = {PlantTribes: a gene and gene family resource for comparative genomics in plants},
+            year = {2008},
+            volume = {36},
+            number = {suppl 1},
+            pages = {D970-D976},}
+        </citation>
+        <citation type="bibtex">
+            @article{Sasidharan2012,
+            journal = {Nucleic Acids Research},
+            author = {3. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
+            title = {GFam: a platform for automatic annotation of gene families},
+            year = {2012},
+            pages = {gks631},}
+        </citation>
+        <citation type="bibtex">
+            @article{Li2003,
+            journal = {Genome Research}
+            author = {4. Li L, Stoeckert CJ, Roos DS},
+            title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
+            year = {2003},
+            volume = {13},
+            number = {9},
+            pages = {2178-2189},}
+        </citation>
+        <citation type="bibtex">
+            @article{Emms2015,
+            journal = {Genome Biology}
+            author = {5. Emms DM, Kelly S},
+            title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
+            year = {2015},
+            volume = {16},
+            number = {1},
+            pages = {157},}
+        </citation>
+        <citation type="bibtex">
+            @article{Stamatakis2014,
+            journal = {Bioinformatics},
+            author = {6. Stamatakis A},
+            title = {RAxML Version 8: A tool for Phylogenetic Analysis and Post-Analysis of Large Phylogenies},
+            year = {2014},
+            url = {http://sco.h-its.org/exelixis/web/software/raxml/index.html},}
+        </citation>
+        <citation type="bibtex">
+            @article{Price2010,
+            journal = {PLoS one},
+            author = {7. Price MN, Dehal PS, Arkin AP},
+            title = {FastTree 2-approximately maximim-likelihood trees for large alignments},
+            year = {2010},
+            volume = {5},
+            number = {3},
+            pages = {e9490},}
+        </citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Jun 08 12:51:58 2017 -0400
@@ -0,0 +1,130 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <token name="@WRAPPER_VERSION@">1.0</token>
+    <xml name="requirements_assembly_post_processor">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_assembly_post_processor</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_aligner">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_aligner</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_classifier">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_classifier</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_integrator">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_integrator</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_kaks_analysis">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_kaks_analysis</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_ks_distribution">
+        <requirements>
+            <requirement type="package" version="1.3.0">r-optparse</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_phylogeny_builder">
+        <requirements>
+            <requirement type="package" version="1.0.0">plant_tribes_gene_family_phylogeny_builder</requirement>
+        </requirements>
+    </xml>
+    <xml name="param_codon_alignments">
+        <param name="codon_alignments" type="select" label="Codon alignments">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+        </param>
+    </xml>
+    <xml name="param_method">
+        <param name="method" type="select" label="Protein clustering method">
+            <option value="gfam" selected="true">GFam</option>
+            <option value="orthofinder">OrthoFinder</option>
+            <option value="orthomcl">OrthoMCL</option>
+        </param>
+    </xml>
+    <xml name="param_options_type">
+        <param name="options_type" type="select" label="Options Configuration">
+            <option value="basic" selected="true">Basic</option>
+            <option value="advanced">Advanced</option>
+        </param>
+    </xml>
+    <xml name="param_orthogroup_fna">
+        <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+        </param>
+    </xml>
+    <xml name="param_scaffold">
+        <param name="scaffold" type="select" label="Gene family scaffold">
+            <options from_data_table="plant_tribes_scaffolds" />
+            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." />
+        </param>
+    </xml>
+    <xml name="param_sequence_type">
+        <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference (dna)">
+            <option value="protein" selected="true">Amino acid based</option>
+            <option value="dna">Nucleotide based</option>
+        </param>
+    </xml>
+    <xml name="cond_alignment_method">
+        <conditional name="alignment_method_cond">
+            <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method">
+                <option value="mafft" selected="true">MAFFT</option>
+                <option value="pasta">PASTA</option>
+            </param>
+            <when value="mafft" />
+            <when value="pasta">
+                <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="cond_remove_gappy_sequences">
+        <conditional name="remove_gappy_sequences_cond">
+            <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="no" />
+            <when value="yes">
+                <conditional name="trim_type_cond">
+                    <param name="trim_type" type="select" label="Trimming method">
+                        <option value="gap_trimming" selected="true">Gap score based trimming</option>
+                        <option value="automated_trimming">Automated heuristic trimming</option>
+                    </param>
+                    <when value="gap_trimming">
+                        <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" />
+                    </when>
+                    <when value="automated_trimming" />
+                </conditional>
+                <conditional name="remove_sequences_with_gaps_cond">
+                    <param name="remove_sequences_with_gaps" type="select" label="Remove sequences">
+                        <option value="no" selected="true">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" />
+                        <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" />
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="citation1">
+        <citation type="bibtex">
+            @misc{None,
+            journal = {None},
+            author = {1. Wafula EK},
+            title = {Manuscript in preparation},
+            year = {None},
+            url = {https://github.com/dePamphilis/PlantTribes},}
+        </citation>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc.sample	Thu Jun 08 12:51:58 2017 -0400
@@ -0,0 +1,4 @@
+## Plant Tribes scaffolds
+#Value	Name	Path	Description
+#22Gv1.0	22Gv1.0	/plant_tribes/scaffolds/22Gv1.0	22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0)
+#22Gv1.1	22Gv1.1	/plant_tribes/scaffolds/22Gv1.1	22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Jun 08 12:51:58 2017 -0400
@@ -0,0 +1,6 @@
+<tables>
+    <table name="plant_tribes_scaffolds" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="tool-data/plant_tribes_scaffolds.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py	Thu Jun 08 12:51:58 2017 -0400
@@ -0,0 +1,80 @@
+import os
+import shutil
+import subprocess
+import sys
+
+FSTDERR = 'stderr.txt'
+FSTDOUT = 'stdout.txt'
+
+
+def check_execution_errors(rc, fstderr, fstdout):
+    if rc != 0:
+        fh = open(fstdout, 'rb')
+        out_msg = fh.read()
+        fh.close()
+        fh = open(fstderr, 'rb')
+        err_msg = fh.read()
+        fh.close()
+        msg = '%s\n%s\n' % (str(out_msg), str(err_msg))
+        stop_err(msg)
+
+
+def get_response_buffers():
+    fstderr = os.path.join(os.getcwd(), FSTDERR)
+    fherr = open(fstderr, 'wb')
+    fstdout = os.path.join(os.getcwd(), FSTDOUT)
+    fhout = open(fstdout, 'wb')
+    return fstderr, fherr, fstdout, fhout
+
+
+def move_directory_files(source_dir, destination_dir, copy=False):
+    source_directory = os.path.abspath(source_dir)
+    destination_directory = os.path.abspath(destination_dir)
+    if not os.path.isdir(destination_directory):
+        os.makedirs(destination_directory)
+    for dir_entry in os.listdir(source_directory):
+        source_entry = os.path.join(source_directory, dir_entry)
+        if copy:
+            shutil.copy(source_entry, destination_directory)
+        else:
+            shutil.move(source_entry, destination_directory)
+
+
+def run_command(cmd):
+    fstderr, fherr, fstdout, fhout = get_response_buffers()
+    proc = subprocess.Popen(args=cmd, stderr=fherr, stdout=fhout, shell=True)
+    rc = proc.wait()
+    # Check results.
+    fherr.close()
+    fhout.close()
+    check_execution_errors(rc, fstderr, fstdout)
+
+
+def stop_err(msg):
+    sys.exit(msg)
+
+
+def write_html_output(output, title, dir):
+    with open(output, 'w') as fh:
+        dir_items = sorted(os.listdir(dir))
+        # Directories can only contain either files or directories,
+        # but not both.
+        if len(dir_items) > 0:
+            item_path = os.path.join(dir, dir_items[0])
+            if os.path.isdir(item_path):
+                header = 'Directories'
+            else:
+                header = 'Datasets'
+        else:
+            header = ''
+        fh.write('<html><head><h3>%s: %d items</h3></head>\n' % (title, len(dir_items)))
+        fh.write('<body><p/><table cellpadding="2">\n')
+        fh.write('<tr><b>%s</th></b>\n' % header)
+        for index, fname in enumerate(dir_items):
+            if index % 2 == 0:
+                bgcolor = '#D8D8D8'
+            else:
+                bgcolor = '#FFFFFF'
+            link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname)
+            fh.write('<tr bgcolor="%s"><td>%s</td></tr>\n' % (bgcolor, link))
+        fh.write('</table></body></html>\n')