Repository 'velvet_toolsuite'
hg clone https://toolshed.g2.bx.psu.edu/repos/edward-kirton/velvet_toolsuite

Changeset 0:4afe13ac23b6 (2011-06-07)
Commit message:
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
added:
velvet/jgi_assembly.py
velvet/suite_config.xml
velvet/velvetg_jgi.xml
velvet/velvetg_jgi_wrapper.pl
velvet/velveth_jgi.xml
velvet/velveth_jgi_wrapper.pl
b
diff -r 000000000000 -r 4afe13ac23b6 velvet/jgi_assembly.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/velvet/jgi_assembly.py Tue Jun 07 17:52:16 2011 -0400
[
@@ -0,0 +1,117 @@
+"""
+Assembly classes
+"""
+
+import data
+import logging
+import re
+import string
+from cgi import escape
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes import metadata
+import galaxy.model
+from galaxy import util
+from sniff import *
+
+log = logging.getLogger(__name__)
+
+class Assembly( data.Text ):
+    """Class describing an assembly"""
+
+    """Add metadata elements"""
+    MetadataElement( name="contigs", default=0, desc="Number of contigs", readonly=True, visible=False, optional=True, no_value=0 )
+    MetadataElement( name="reads", default=0, desc="Number of reads", readonly=True, visible=False, optional=True, no_value=0 )
+
+
+class Ace(Assembly):
+    """Class describing an assembly Ace file"""
+
+    file_ext = "ace"
+
+#    def init_meta( self, dataset, copy_from=None ):
+#        Assembly.init_meta( self, dataset, copy_from=copy_from )
+
+    def set_meta( self, dataset, overwrite=True, **kwd ):
+        """
+        Set the number of assembled contigs and read sequences and the number of data lines in dataset.
+        """
+        contigs = 0
+        reads = 0
+        for line in file( dataset.file_name ):
+            line = line.strip()
+            if line and line.startswith( '#' ):
+                # Don't count comment lines
+                continue
+            if line and line.startswith( 'CO' ):
+                contigs += 1
+            if line and line.startswith( 'RD' ):
+                reads += 1
+        dataset.metadata.contigs = contigs
+        dataset.metadata.reads = reads
+
+    def set_peek( self, dataset, is_multi_byte=False ):
+        if not dataset.dataset.purged:
+            dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+            if dataset.metadata.contigs:
+                dataset.blurb = "%s contigs" % util.commaify( str( dataset.metadata.contigs ) )
+            else:
+                dataset.blurb = data.nice_size( dataset.get_size() )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+
+    def sniff( self, filename ):
+        """
+        Determines whether the file is in ace format
+
+        An ace file contains these sections 
+        AS  \d+ \d+
+
+        CO \S+ \d+ \d+ \d+ \w
+        [atcgATCGN\*]+
+
+        BQ
+        [\d\s]+
+
+        AF \S+ [CU] \-?\d+
+
+        BS \d+ \d+ \S+
+
+        RD \S+ \d+ \d+ \d+
+        [ATCGN\*]+
+
+        QA \d+ \d+ \d+ \d+
+        DS .*
+
+        Currently we only check if file begins with AS
+
+        >>> fname = get_test_fname( 'genome.ace' )
+        >>> Ace().sniff( fname )
+        True
+        >>> fname = get_test_fname( 'genome.fasta' )
+        >>> Ace().sniff( fname )
+        False
+        """
+
+        try:
+            fh = open( filename )
+            line = fh.readline()
+            line = line.strip()
+            if line:
+                if line.startswith( 'AS ' ):
+                    fh.close()
+                    return True
+            fh.close()
+            return False
+        except:
+            pass
+        return False
+
+class Velveth(Assembly):
+    composite_type='basic'
+    file_ext = "txt"
+
+    def __init__(self,**kwd):
+        Assembly.__init__(self,**kwd)
+        self.add_composite_file('Roadmap')
+        self.add_composite_file('Sequences')
b
diff -r 000000000000 -r 4afe13ac23b6 velvet/suite_config.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/velvet/suite_config.xml Tue Jun 07 17:52:16 2011 -0400
b
@@ -0,0 +1,9 @@
+<suite id='velvet_toolsuite' name='Suite of Velvet assembler tools' version="1.0.0">
+ <description>Velvet assembler (a different version than in galaxy-central)</description>
+ <tool id="velveth_jgi" name="velveth" version="1.0.0">
+ <description>Hash reads</description>
+ </tool>
+ <tool id="velvetg_jgi" name="velvetg" version="1.0.0">
+ <description>Generate contigs</description>
+ </tool>
+</suite>
b
diff -r 000000000000 -r 4afe13ac23b6 velvet/velvetg_jgi.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/velvet/velvetg_jgi.xml Tue Jun 07 17:52:16 2011 -0400
[
b'@@ -0,0 +1,134 @@\n+<tool id="velvetg_jgi" name="velvetg" version="1.0.0">\n+<description>Assemble preprocessed reads</description>\n+<command interpreter=\'perl\'>velvetg_jgi_wrapper.pl\n+$velveth.extra_files_path\n+$velvetg_outfile.extra_files_path\n+$velvetg_outfile\n+$contigs_outfile\n+$stats_outfile\n+$lastgraph_outfile\n+$unused_reads_outfile\n+$amos_outfile\n+velvetg $velvetg_outfile.extra_files_path\n+-very_clean yes\n+#if $ins_length != \'\':\n+-ins_length $ins_length\n+    #if $ins_length_sd != \'\':\n+    -ins_length_sd $ins_length_sd\n+    #end if\n+#end if\n+#if $ins_length2 != \'\':\n+-ins_length2 $ins_length2\n+    #if $ins_length2_sd != \'\':\n+    -ins_length2_sd $ins_length2_sd\n+    #end if\n+#end if\n+#if $ins_length3 != \'\':\n+-ins_length3 $ins_length3\n+    #if $ins_length3_sd != \'\':\n+    -ins_length3_sd $ins_length3_sd\n+    #end if\n+#end if\n+#if $ins_length4 != \'\':\n+-ins_length4 $ins_length4\n+    #if $ins_length4_sd != \'\':\n+    -ins_length4_sd $ins_length4_sd\n+    #end if\n+#end if\n+#if $ins_length5 != \'\':\n+-ins_length5 $ins_length5\n+    #if $ins_length5_sd != \'\':\n+    -ins_length5_sd $ins_length5_sd\n+    #end if\n+#end if\n+#if $ins_length_long != \'\':\n+-ins_length_long $ins_length_long\n+    #if $ins_length_long_sd != \'\':\n+    -ins_length_long_sd $ins_length_long_sd\n+    #end if\n+#end if\n+-exp_cov $exp_cov\n+-cov_cutoff $cov_cutoff\n+-long_cov_cutoff $long_cov_cutoff\n+-max_coverage $max_coverage\n+-min_contig_lgth $min_contig_lgth\n+-read_trkg $read_trkg\n+-amos_file $amos_file\n+-unused_reads $unused_reads\n+-max_branch_length $max_branch_length\n+-max_divergence $max_divergence\n+-max_gap_count $max_gap_count\n+-scaffolding $scaffolding\n+-long_mult_cutoff $long_mult_cutoff\n+-min_pair_count $min_pair_count\n+-alignments $alignments\n+-exportFiltered $exportFiltered\n+</command>\n+<inputs>\n+    <param name="velveth" type="data" format="velveth" label="velvet hash"/>\n+\n+    <param name="ins_length" type="text" value=\'auto\' label="[-ins_length] Insert length (bp) of short library" help="blank=no read pairing; auto=infer; or supply value (integer)" />\n+    <param name="ins_length_sd" type="text" value=\'auto\' label="[ins_length_sd] Insert length standard deviation (bp) of short library; requires above" help="blank=default of 10% of corresponding length; auto=infer; or supply value (integer)" />\n+    <param name="ins_length2" type="text" value=\'auto\' label="[ins_length2] Insert length (bp) of short2 library" help="blank=no read pairing; auto=infer; or supply value (integer)" />\n+    <param name="ins_length2_sd" type="text" value=\'auto\' label="[-ins_length2_sd] Insert length standard deviation (bp) of short2 library; requires above" help="blank=default of 10% of corresponding length; auto=infer; or supply value (integer)" />\n+    <param name="ins_length3" type="text" value=\'auto\' label="[ins_length3] Insert length (bp) of short3 library" help="blank=no read pairing; auto=infer; or supply value (integer)" />\n+    <param name="ins_length3_sd" type="text" value=\'auto\' label="[-ins_length3_sd] Insert length standard deviation (bp) of short3 library; requires above" help="blank=default of 10% of corresponding length; auto=infer; or supply value (integer)" />\n+    <param name="ins_length4" type="text" value=\'auto\' label="[ins_length4] Insert length (bp) of short4 library" help="blank=no read pairing; auto=infer; or supply value (integer)" />\n+    <param name="ins_length4_sd" type="text" value=\'auto\' label="[-ins_length4_sd] Insert length standard deviation (bp) of short4 library; requires above" help="blank=default of 10% of corresponding length; auto=infer; or supply value (integer)" />\n+    <param name="ins_length5" type="text" value=\'auto\' label="[ins_length5] Insert length (bp) of short5 library" help="blank=no read pairing; auto=infer; or supply value (integer)" />\n+    <param name="ins_length5_sd" type="text" value=\'auto\' label="[-ins_length5_sd] Insert length standard deviation (bp) of short5 library; requires above" help="blank=default of 10% of corresponding le'..b'ins_length_long_sd" type="text" value=\'auto\' label="[-ins_length_sd_long] Insert length standard deviation (bp) of long library; requires above" help="blank=default of 10% of corresponding length; auto=infer; or supply value (integer)" />\n+\n+    <param name=\'exp_cov\' type=\'text\' value=\'-1\' label=\'[-exp_cov] Expected short read k-mer coverage\' help=\'-1=no long or paired-end read resolution; auto=infer it; or supply value (real number)\' />\n+    <param name="cov_cutoff" type="text" value=\'-1\' label="[-cov_cutoff] Removal of low coverage nodes AFTER tour bus" help="-1=no removal; auto=infer cutoff; or specify cutoff (real number)" />\n+    <param name="long_cov_cutoff" type="float" value=\'-1\' label="[-long_cov_cutoff] Removal of low long-read coverage nodes AFTER tour bus" help="-1=no removal; or specify cutoff (real number)" />\n+    <param name=\'max_coverage\' type=\'float\' value=\'-1\' label="[-max_coverage] Exclude highly covered data from your assembly (e.g. plasmid, mitochondrial, and chloroplast sequences)" help=\'-1 for default: no removal\' />\n+    <param name="min_contig_lgth" type="integer" value=\'-1\' label="Minimum contig length" help="-1 for default: hash length *2" />\n+    <param name="scaffolding" type="boolean" checked=\'true\' label="Scaffolding" truevalue=\'yes\' falsevalue=\'no\'/>\n+\n+    <!-- Tour Bus Parameters -->\n+    <param name="max_branch_length" type="integer" value=\'100\' label="Maximum branch length" help="Default recommended"/>\n+    <param name="max_divergence" type="float" value=\'0.20\' label="Maximum divergence rate" help="Default recommended"/>\n+    <param name="max_gap_count" type="integer" value=\'3\' label="Maximum gap count" help="Default recommended"/>\n+\n+    <!-- Rock Band Parameters -->\n+    <param name=\'long_mult_cutoff\' type="integer" value=\'2\' label="Minimum long read connection cutoff"/> \n+\n+    <!-- Pebble Parameters -->\n+    <param name="min_pair_count" type="integer" value=\'10\' label="Minimum Read-Pair Validation"/>\n+\n+    <!-- Output Parameters -->\n+    <param name=\'unused_reads\' type=\'boolean\' truevalue=\'yes\' falsevalue=\'no\' checked=\'true\' label=\'Export unused reads\' />\n+    <param name=\'read_trkg\' type=\'boolean\' truevalue=\'yes\' falsevalue=\'no\' checked=\'false\' label="[-read_trkg] tracking of short read positions in assembly" help="This will cost slightly more memory and calculation time, but will have the advantage of producing in the end a more detailed description of the assembly.  The additional information will be included in the LastGraph file and optionally the Amos assembly file (below)." />\n+    <param name=\'amos_file\' type=\'boolean\' truevalue=\'yes\' falsevalue=\'no\' checked=\'false\' label="[-amos_file] export assembly to AMOS file" help="If selecting this option, you may also want to select -read_trkg (above)" />\n+    <param name="alignments" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="[-alignments] export a summary of contig alignment to the reference sequences" />\n+    <param name="exportFiltered" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="[-exportFiltered] export the long nodes which were eliminated by the coverage filters" />\n+</inputs>\n+\n+<outputs>\n+    <data name="velvetg_outfile" format="txt" />\n+    <data name="contigs_outfile" format="fasta" label=\'Contigs\' />\n+    <data name="stats_outfile" format="tabular" label=\'Contig Stats\' />\n+    <data name="lastgraph_outfile" format="txt" label="LastGraph" />\n+    <data name=\'unused_reads_outfile\' format=\'fasta\' label=\'Unused Reads\'>\n+        <filter>unused_reads is True</filter>\n+    </data>\n+    <data name=\'amos_outfile\' format=\'txt\' label=\'Amos Assembly\'>\n+        <filter>amos_file is True</filter>\n+    </data>\n+</outputs>\n+\n+<help>\n+\n+**What it does**\n+\n+This tool assembles contigous sequences from preprocessed reads by constructing a de Bruijn graph.\n+\n+Sequences must be preprocessed by the velveth tool.\n+\n+**Manual**\n+\n+http://www.ebi.ac.uk/~zerbino/velvet/Manual.pdf\n+</help>\n+</tool>\n'
b
diff -r 000000000000 -r 4afe13ac23b6 velvet/velvetg_jgi_wrapper.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/velvet/velvetg_jgi_wrapper.pl Tue Jun 07 17:52:16 2011 -0400
b
@@ -0,0 +1,66 @@
+#!/usr/bin/env perl
+
+# Conventience wrapper for velvetg; copies outfiles to galaxy-specified destinations.
+# Please email bugs/feature requests to Edward Kirton (ESKirton@LBL.gov)
+#
+# History:
+# - 2010/03/04 : file created
+# - 2001/02/05 : added new options, outfiles; renamed to velvetg_jgi to avoid collision with the other velvetg tool
+
+use strict;
+use warnings;
+use File::Copy;
+
+# shift wrapper args
+my $velveth_path=shift @ARGV or die;
+my $velvetg_path=shift @ARGV or die;
+my $velvetg_outfile=shift @ARGV or die;
+my $contigs_outfile=shift @ARGV or die;
+my $stats_outfile=shift @ARGV or die;
+my $lastgraph_outfile=shift @ARGV or die;
+my $unused_reads_outfile=shift @ARGV or die;
+my $amos_outfile=shift @ARGV or die;
+
+# setup velvetg folder
+die("Velveth folder does not exist: $velveth_path\n") unless -d $velveth_path;
+-d $velvetg_path or mkdir($velvetg_path) or die("Unable to create output folder, $velvetg_path: $!\n");
+die("velveth Sequences file does not exist: $velveth_path/Sequences") unless -f "$velveth_path/Sequences";
+symlink("$velveth_path/Sequences", "$velvetg_path/Sequences");
+die("velveth Roadmaps file does not exist: $velveth_path/Roadmaps") unless -f "$velveth_path/Roadmaps";
+symlink("$velveth_path/Roadmaps", "$velvetg_path/Roadmaps");
+die("velveth Log file does not exist: $velveth_path/Log") unless -f "$velveth_path/Log";
+copy("$velveth_path/Log", "$velvetg_path/Log");
+
+# run command (remaining args, starting with exe path)
+open (VELVETG, "@ARGV|") or die("Unable to run velvetg\n");
+open (OUT, ">$velvetg_outfile") or die("Unable to open outfile, $velvetg_outfile: $!\n");
+while (<VELVETG>) {
+    print OUT $_;
+    print if /^Final graph/;
+}
+close VELVETG;
+close OUT;
+
+# process output
+unlink($contigs_outfile);
+move("$velvetg_path/contigs.fa", $contigs_outfile);
+unlink($stats_outfile);
+move("$velvetg_path/stats.txt", $stats_outfile);
+
+unlink($lastgraph_outfile);
+if ( -f "$velvetg_path/LastGraph") {
+    move("$velvetg_path/LastGraph", $lastgraph_outfile);
+} elsif ( -f "$velvetg_path/Graph2") {
+    move("$velvetg_path/Graph2", $lastgraph_outfile);
+} else {
+    open(OUT, ">$lastgraph_outfile") or die($!);
+    print OUT "ERROR: $velvetg_path/LastGraph not found!\n";
+    close OUT;
+}
+unlink($unused_reads_outfile);
+move("$velvetg_path/UnusedReads.fa", $unused_reads_outfile);
+if ( $amos_outfile ne 'None' ) {
+    unlink($amos_outfile);
+    move("$velvetg_path/velvet_asm.afg", $amos_outfile);
+}
+exit;
b
diff -r 000000000000 -r 4afe13ac23b6 velvet/velveth_jgi.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/velvet/velveth_jgi.xml Tue Jun 07 17:52:16 2011 -0400
b
b'@@ -0,0 +1,276 @@\n+<tool id="velveth_jgi" name="velveth" version="1.0.0">\n+<description>Preprocess sequences for velvet assembly</description>\n+<command interpreter=\'perl\'>velveth_jgi_wrapper.pl $output $output.extra_files_path\n+velveth $output.extra_files_path $hash_length $strand_specific\n+#for $i in $short.files\n+$short.libtype $i.filetype.select $i.filetype.file\n+#end for\n+#for $i in $short2.files\n+$short2.libtype $i.filetype.select $i.filetype.file\n+#end for\n+#for $i in $short3.files\n+$short3.libtype $i.filetype.select $i.filetype.file\n+#end for\n+#for $i in $short4.files\n+$short4.libtype $i.filetype.select $i.filetype.file\n+#end for\n+#for $i in $short5.files\n+$short5.libtype $i.filetype.select $i.filetype.file\n+#end for\n+#for $i in $long.files\n+$long.libtype $i.filetype.select $i.filetype.file\n+#end for\n+</command>\n+<inputs>\n+    <param name="hash_length" type="integer" value=\'21\' label="Hash length. Odd numbers only. Maximum 75."/>\n+    <param name="strand_specific" type=\'boolean\' label="All libraries strand-specific?" truevalue=\'-strand_specific\' falsevalue=\'\'/>\n+\n+    <conditional name="short">\n+        <param name="libtype" type="select" label="Short Library Type">\n+            <option value="-shortPaired">Paired</option>\n+            <option value="-short">Unpaired</option>\n+        </param>\n+        <when value="-shortPaired">\n+            <repeat name="files" title="Files">\n+                <conditional name="filetype">\n+                    <param name=\'select\' type=\'select\' label=\'File Type\'>\n+                        <option value=\'-fastq\'>Fastq</option>\n+                        <option value=\'-fasta\'>Fasta</option>\n+                    </param>\n+                    <when value="-fasta">\n+                        <param name="file" type="data" format="fasta" label="File"/>\n+                    </when>\n+                    <when value="-fastq">\n+                        <param name="file" type="data" format="fastq" label="File"/>\n+                    </when>\n+                </conditional>\n+            </repeat>\n+        </when>\n+        <when value="-short">\n+            <repeat name="files" title="Files">\n+                <conditional name="filetype">\n+                    <param name=\'select\' type=\'select\' label=\'File Type\'>\n+                        <option value=\'-fastq\'>Fastq</option>\n+                        <option value=\'-fasta\'>Fasta</option>\n+                    </param>\n+                    <when value="-fasta">\n+                        <param name="file" type="data" format="fasta" label="File"/>\n+                    </when>\n+                    <when value="-fastq">\n+                        <param name="file" type="data" format="fastq" label="File"/>\n+                    </when>\n+                </conditional>\n+            </repeat>\n+        </when>\n+    </conditional>\n+\n+    <conditional name="short2">\n+        <param name="libtype" type="select" label="Short2 Library Type">\n+            <option value="-shortPaired2">Paired</option>\n+            <option value="-short2">Unpaired</option>\n+        </param>\n+        <when value="-shortPaired2">\n+            <repeat name="files" title="Files">\n+                <conditional name="filetype">\n+                    <param name=\'select\' type=\'select\' label=\'File Type\'>\n+                        <option value=\'-fastq\'>Fastq</option>\n+                        <option value=\'-fasta\'>Fasta</option>\n+                    </param>\n+                    <when value="-fasta">\n+                        <param name="file" type="data" format="fasta" label="File"/>\n+                    </when>\n+                    <when value="-fastq">\n+                        <param name="file" type="data" format="fastq" label="File"/>\n+                    </when>\n+                </conditional>\n+            </repeat>\n+        </when>\n+        <when value="-short2">\n+            <repeat name="files" title="Files">\n+                <conditional name="filetype">\n+                    <param name=\'se'..b'at="fastq" label="File"/>\n+                    </when>\n+                </conditional>\n+            </repeat>\n+        </when>\n+    </conditional>\n+\n+    <conditional name="short5">\n+        <param name="libtype" type="select" label="Short5 Library Type">\n+            <option value="-shortPaired5">Paired</option>\n+            <option value="-short5">Unpaired</option>\n+        </param>\n+        <when value="-shortPaired5">\n+            <repeat name="files" title="Files">\n+                <conditional name="filetype">\n+                    <param name=\'select\' type=\'select\' label=\'File Type\'>\n+                        <option value=\'-fastq\'>Fastq</option>\n+                        <option value=\'-fasta\'>Fasta</option>\n+                    </param>\n+                    <when value="-fasta">\n+                        <param name="file" type="data" format="fasta" label="File"/>\n+                    </when>\n+                    <when value="-fastq">\n+                        <param name="file" type="data" format="fastq" label="File"/>\n+                    </when>\n+                </conditional>\n+            </repeat>\n+        </when>\n+        <when value="-short5">\n+            <repeat name="files" title="Files">\n+                <conditional name="filetype">\n+                    <param name=\'select\' type=\'select\' label=\'File Type\'>\n+                        <option value=\'-fastq\'>Fastq</option>\n+                        <option value=\'-fasta\'>Fasta</option>\n+                    </param>\n+                    <when value="-fasta">\n+                        <param name="file" type="data" format="fasta" label="File"/>\n+                    </when>\n+                    <when value="-fastq">\n+                        <param name="file" type="data" format="fastq" label="File"/>\n+                    </when>\n+                </conditional>\n+            </repeat>\n+        </when>\n+    </conditional>\n+\n+    <conditional name="long">\n+        <param name="libtype" type="select" label="Long Library Type">\n+            <option value="-longPaired">Paired</option>\n+            <option value="-long">Unpaired</option>\n+        </param>\n+        <when value="-longPaired">\n+            <repeat name="files" title="Files">\n+                <conditional name="filetype">\n+                    <param name=\'select\' type=\'select\' label=\'File Type\'>\n+                        <option value=\'-fastq\'>Fastq</option>\n+                        <option value=\'-fasta\'>Fasta</option>\n+                    </param>\n+                    <when value="-fasta">\n+                        <param name="file" type="data" format="fasta" label="File"/>\n+                    </when>\n+                    <when value="-fastq">\n+                        <param name="file" type="data" format="fastq" label="File"/>\n+                    </when>\n+                </conditional>\n+            </repeat>\n+        </when>\n+        <when value="-long">\n+            <repeat name="files" title="Files">\n+                <conditional name="filetype">\n+                    <param name=\'select\' type=\'select\' label=\'File Type\'>\n+                        <option value=\'-fastq\'>Fastq</option>\n+                        <option value=\'-fasta\'>Fasta</option>\n+                    </param>\n+                    <when value="-fasta">\n+                        <param name="file" type="data" format="fasta" label="File"/>\n+                    </when>\n+                    <when value="-fastq">\n+                        <param name="file" type="data" format="fastq" label="File"/>\n+                    </when>\n+                </conditional>\n+            </repeat>\n+        </when>\n+    </conditional>\n+</inputs>\n+<outputs>\n+    <data name="output" format="velveth"/>\n+</outputs>\n+\n+<help>\n+\n+**What it does**\n+\n+This tool preprocesses the sequence data using the specified hash length.  A larger hash length increases sensitivity at the expense of read depth.\n+\n+**Manual**\n+\n+http://www.ebi.ac.uk/~zerbino/velvet/Manual.pdf\n+</help>\n+</tool>\n'
b
diff -r 000000000000 -r 4afe13ac23b6 velvet/velveth_jgi_wrapper.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/velvet/velveth_jgi_wrapper.pl Tue Jun 07 17:52:16 2011 -0400
[
@@ -0,0 +1,31 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+my $start=time;
+my $outfile=shift @ARGV;
+my $outdir=shift @ARGV;
+my $kmer=$ARGV[2];
+die ("USER ERROR: Hash length (kmer) must be odd!\n") unless $kmer % 2;
+my $tot_reads=0;
+open (VELVETH, "@ARGV 2>&1|") or die("Unable to run velveth: $!\n");
+open(OUT, ">$outfile") or die($!);
+while (<VELVETH>) {
+    print OUT $_;
+    if (/^\[\d+\.\d+\] (\d+) sequences found/) {
+        $tot_reads += $1;
+    }
+}
+close VELVETH;
+close OUT;
+die("No reads found\n") unless $tot_reads;
+my $sec=time-$start;
+my $min=int($sec/60);
+$sec -= ($min*60);
+my $hr=int($min/60);
+$min -= ($hr*60);
+print "$tot_reads processed in";
+print " $hr hr" if $hr;
+print " $min min" if $min;
+print " $sec sec\n";
+exit